You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
} else as.double(nrow(x)+nrow(i))) # rows in i might not match to x so old max(nrow(x),nrow(i)) wasn't enough. But this limit now only applies when there are duplicates present so the reason now for nrow(x)+nrow(i) is just to nail it down and be bigger than max(nrow(x),nrow(i)).
545
+
if (allLen1) {
546
+
irows=f__
547
+
} else {
548
+
join.many= isTRUE(getOption("datatable.join.many", TRUE)) # #914, default TRUE for backward compatibility
549
+
anyDups=!notjoin&&
550
+
(
551
+
# #698. When notjoin=TRUE, ignore allow.cartesian. Rows in answer will never be > nrow(x).
552
+
(join.many&&!allow.cartesian) ||
553
+
# special case of scalar i match to const duplicated x, not handled by anyDuplicate: data.table(x=c(1L,1L))[data.table(x=1L), on="x"]
limit=if (anyDups) { # #742. If 'i' has no duplicates, ignore
558
+
if (!join.many) stopf("Joining resulted in many-to-many join. Perform quality check on your data, use mult!='all', or set 'datatable.join.many' option to TRUE to allow rows explosion.")
559
+
if (allow.cartesian) internal_error("checking allow.cartesian and join.many, unexpected else branch reached") # nocov
560
+
as.double(nrow(x)+nrow(i)) # rows in i might not match to x so old max(nrow(x),nrow(i)) wasn't enough. But this limit now only applies when there are duplicates present so the reason now for nrow(x)+nrow(i) is just to nail it down and be bigger than max(nrow(x),nrow(i)).
561
+
}
562
+
irows= vecseq(f__, len__, limit)
563
+
}
551
564
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
552
565
# Fix for #1092 and #1074
553
566
# TODO: implement better version of "any"/"all"/"which" to avoid
# extra layer over bmerge to provide ready to use row indices (or NULL for 1:nrow)
83
+
# NULL to avoid extra copies in downstream code, it turned out that avoiding copies precisely is costly and enormously complicates code, need #4409 and/or handle 1:nrow in subsetDT
84
+
dtmerge=function(x, i, on, how, mult, join.many, void=FALSE, verbose) {
85
+
nomatch=switch(how,
86
+
inner=, semi=, anti=, cross=0L,
87
+
left=, right=, full=NA_integer_)
88
+
nomatch0= identical(nomatch, 0L)
89
+
if (is.null(mult))
90
+
mult=switch(how,
91
+
semi=, anti="last",
92
+
cross="all",
93
+
inner=, left=, right=, full="error")
94
+
if (void&&mult!="error")
95
+
internal_error("'void' must be used with mult='error'") # nocov
96
+
if (how=="cross") { ## short-circuit bmerge results only for cross join
97
+
if (length(on) ||mult!="all"||!join.many)
98
+
stopf("cross join must be used with zero-length on, mult='all', join.many=TRUE")
99
+
if (void)
100
+
internal_error("cross join must be used with void=FALSE") # nocov
if (void) { ## void=T is only for the case when we want raise error for mult='error', and that would happen in above line
111
+
return(invisible(NULL))
112
+
} elseif (how=="semi"||how=="anti") { ## semi and anti short-circuit
113
+
## we will subset i rather than x, thus assign to irows, not to xrows
114
+
if (how=="semi")
115
+
irows= which(ans$lens!=0L)
116
+
else
117
+
irows= which(ans$lens==0L)
118
+
if (length(irows) == length(ans$lens)) irows=NULL
119
+
return(list(ans=ans, irows=irows))
120
+
} elseif (mult=="all"&&!ans$allLen1&&!join.many&&## join.many, like allow.cartesian, check
121
+
!(length(ans$starts) ==1L&&ans$lens== nrow(x)) &&## special case of scalar i match to const duplicated x, not handled by anyDuplicate: data.table(x=c(1L,1L))[data.table(x=1L), on="x"]
stopf("Joining resulted in many-to-many join. Perform quality check on your data, use mult!='all', or set 'datatable.join.many' option to TRUE to allow rows explosion.")
\code{allow.cartesian} parameter; see \code{\link{data.table}}.IfthevalueofthisparameterisFALSE, anerrorisraisedasasafeguardagainstanexplosiveCartesianjoin.}
0 commit comments