From 20849ca3e1a90fae0373123af05af7c9ed89517c Mon Sep 17 00:00:00 2001 From: tarun Date: Sat, 14 Feb 2026 18:26:47 +0530 Subject: [PATCH] Fix non-equi join error with equi + 2 inequality conditions (#7641) When rightcols contains duplicates (same x column in multiple non-equi conditions), chmatchdup remaps them into the expanded result namespace. This overwrote rightcols, causing downstream code (e.g. .shallow(x, rightcols) in .join_result_key) to reference columns beyond ncol(x). Introduce ansrightcols for the remapped indices and keep rightcols as original column indices into x. Only icolsAns needs the remapped values; all other uses (xcols, names_x[rightcols], .join_result_key) need the original x-relative indices. Closes #7641 --- NEWS.md | 2 ++ R/data.table.R | 9 ++++++--- inst/tests/tests.Rraw | 5 +++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index fd0ee8bf1..77f32bb4b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -38,6 +38,8 @@ 4. `rowwiseDT()` now provides a helpful error message when a complex object that is not a list (e.g., a function) is provided as a cell value, instructing the user to wrap it in `list()`, [#7219](https://github.com/Rdatatable/data.table/issues/7219). Thanks @kylebutts for the report and @venom1204 for the fix. +5. Non-equi joins combining an equality condition with two inequality conditions on the same column (e.g., `on = .(id == id, val >= lo, val <= hi)`) no longer error, [#7641](https://github.com/Rdatatable/data.table/issues/7641). The internal `chmatchdup` remapping of duplicate `rightcols` was overwriting the original column indices, causing downstream code to reference non-existent columns. Thanks @aitap for the diagnosis. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). diff --git a/R/data.table.R b/R/data.table.R index 85d623d39..a989538b1 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1075,12 +1075,15 @@ replace_dot_alias = function(e) { if (length(tt)) jisvars[tt] = paste0("i.",jisvars[tt]) if (length(duprightcols <- rightcols[duplicated(rightcols)])) { nx = c(names_x, names_x[duprightcols]) - rightcols = chmatchdup(names_x[rightcols], nx) + ansrightcols = chmatchdup(names_x[rightcols], nx) # indices into result namespace nx, #7641 nx = make.unique(nx) - } else nx = names_x + } else { + nx = names_x + ansrightcols = rightcols + } ansvars = make.unique(c(nx, jisvars)) icols = c(leftcols, seq_along(i)[-leftcols]) - icolsAns = c(rightcols, seq.int(length(nx)+1L, length.out=ncol(i)-length(unique(leftcols)))) + icolsAns = c(ansrightcols, seq.int(length(nx)+1L, length.out=ncol(i)-length(unique(leftcols)))) xcols = xcolsAns = seq_along(x)[-rightcols] } ansvals = chmatch(ansvars, nx) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f30467dae..508bf6aa0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6954,6 +6954,11 @@ DT1 <- data.table(a=1L, key='a') DT2 <- data.table(a=2.0, key='a') test(1483.83, DT1[DT2, roll='nearest'], data.table(a=2L, key='a')) +# non-equi join with equi + 2 inequality conditions should not error, #7641 +dt_x = data.table(id = c("A","A","B","B"), val = c(1,5,2,4), key = c("id","val")) +dt_i = data.table(id = c("A","B"), lo = c(2,1), hi = c(6,3)) +test(1483.91, nrow(dt_x[dt_i, on = .(id == id, val >= lo, val <= hi)]), 2L) + # NULL items should be removed when making data.table from list, #842 # Original fix for #842 added a branch in as.data.table.list() using point() # Then PR#3471 moved logic from data.table() into as.data.table.list() and now removes NULL items up front, so longer need for the branch