diff --git a/R/data.table.R b/R/data.table.R index 801482147e..15e270753c 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -527,12 +527,15 @@ replace_dot_alias = function(e) { # Really, `anyDuplicated` in base is AWESOME! # allow.cartesian shouldn't error if a) not-join, b) 'i' has no duplicates if (verbose) {last.started.at=proc.time();catf("Constructing irows for '!byjoin || nqbyjoin' ... ");flush.console()} - irows = if (allLen1) f__ else vecseq(f__,len__, - if (allow.cartesian || - notjoin || # #698. When notjoin=TRUE, ignore allow.cartesian. Rows in answer will never be > nrow(x). - !anyDuplicated(f__, incomparables = c(0L, NA_integer_))) { - NULL # #742. If 'i' has no duplicates, ignore - } else as.double(nrow(x)+nrow(i))) # rows in i might not match to x so old max(nrow(x),nrow(i)) wasn't enough. But this limit now only applies when there are duplicates present so the reason now for nrow(x)+nrow(i) is just to nail it down and be bigger than max(nrow(x),nrow(i)). + if (allLen1) + irows = f__ + else { + # notjoin condition for #698. When notjoin=TRUE, ignore allow.cartesian. Rows in answer will never be > nrow(x). + # anyDuplicated condition for # #742. If 'i' has no duplicates, no clamp needed. + # rows in i might not match to x so old max(nrow(x),nrow(i)) wasn't enough. But this limit now only applies when there are duplicates present so the reason now for nrow(x)+nrow(i) is just to nail it down and be bigger than max(nrow(x),nrow(i)). + clamp = if (!allow.cartesian && !notjoin && anyDuplicated(f__, incomparables = c(0L, NA_integer_))) as.double(nrow(x) + nrow(i)) + irows = vecseq(f__, len__, clamp) + } if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # Fix for #1092 and #1074 # TODO: implement better version of "any"/"all"/"which" to avoid