From b14534d80a766d638b3ff66f10b3e7b859b03dc7 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 09:17:14 +0530
Subject: [PATCH 01/23] hsdhP

---
 R/bmerge.R | 17 ++++++++++++++++-
 R/merge.R  | 49 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index dffca5e44f..9ff5e9d9e3 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -84,7 +84,22 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
           next
         }
       }
-      stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
+      raw_msg <- sprintf("Incompatible join types for bmerge: '%s' (%s) and '%s' (%s). Factor columns must join to factor or character columns.",
+                       xname, x_merge_type, # Details for C-bmerge 'x' argument
+                       iname, i_merge_type) # Details for C-bmerge 'i' argument
+
+      condition <- structure(
+        list(
+          message = raw_msg, # Generic message from bmerge's perspective
+          call = NULL,       # Will be populated by stop() or handler
+          c_bmerge_x_arg_col_specifier = xname,        # e.g., "x.a" or "a"
+          c_bmerge_x_arg_type          = x_merge_type,  # e.g., "integer"
+          c_bmerge_i_arg_col_specifier = iname,        # e.g., "i.a" or "a"
+          c_bmerge_i_arg_type          = i_merge_type   # e.g., "factor"
+        ),
+        class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
+      )
+      stop(condition)
     }
     # we check factors first to cater for the case when trying to do rolling joins on factors
     if (x_merge_type == i_merge_type) {
diff --git a/R/merge.R b/R/merge.R
index 4d7245983e..d406e40521 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -46,7 +46,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   } else {
     if (is.null(by))
       by = intersect(key(x), key(y))
-    if (!length(by))   # was is.null() before PR#5183  changed to !length()
+    if (!length(by))
       by = key(x)
     if (!length(by))
       by = intersect(nm_x, nm_y)
@@ -62,7 +62,6 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     by.x = by.y = by
   }
 
-  # warn about unused arguments #2587
   if (length(list(...))) {
     ell = as.list(substitute(list(...)))[-1L]
     for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n)
@@ -70,10 +69,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (unnamed_n)
       warningf("Passed %d unknown and unnamed arguments.", unnamed_n)
   }
-  # with i. prefix in v1.9.3, this goes away. Left here for now ...
-  ## sidestep the auto-increment column number feature-leading-to-bug by
-  ## ensuring no names end in ".1", see unit test
-  ## "merge and auto-increment columns in y[x]" in test-data.frame.like.R
+
   start = setdiff(nm_x, by.x)
   end = setdiff(nm_y, by.y)
   dupnames = intersect(start, end)
@@ -81,47 +77,62 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
     end[chmatch(dupnames, end, 0L)] = paste0(dupnames, suffixes[2L])
   }
-  # If no.dups = TRUE we also need to added the suffix to columns in y
-  # that share a name with by.x
+
   dupkeyx = intersect(by.x, end)
   if (no.dups && length(dupkeyx)) {
     end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
   }
 
-  # implement incomparables argument #2587
   if (!is.null(incomparables)) {
-    # %fin% to be replaced when #5232 is implemented/closed
     "%fin%" = function(x, table) if (is.character(x) && is.character(table)) x %chin% table else x %in% table
     xind = rowSums(x[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.x]) == length(by)
     yind = rowSums(y[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.y]) == length(by)
-    # subset both so later steps still work
     x = x[xind]
     y = y[yind]
   }
-  dt = y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]   # includes JIS columns (with a i. prefix if conflict with x names)
 
-  if (all.y && nrow(y)) {  # If y does not have any rows, no need to proceed
-    # Perhaps not very commonly used, so not a huge deal that the join is redone here.
+  # Enhanced error handling during join
+  user_x_name = deparse1(substitute(x))
+  user_y_name = deparse1(substitute(y))
+  dt = tryCatch({
+    y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]
+  }, bmerge_incompatible_type_error = function(e) {
+    col_from_user_x <- e$bmerge_i_arg_details$col_name
+    type_from_user_x <- e$bmerge_i_arg_details$type_str
+
+    col_from_user_y <- e$bmerge_x_arg_details$col_name
+    type_from_user_y <- e$bmerge_x_arg_details$type_str
+
+    by_col_name <- col_from_user_x
+
+    new_msg <- sprintf(
+      "When merging data.table '%s' (argument 'x') and data.table '%s' (argument 'y') on column '%s':\n  Incompatible join types. Factor columns must join to factor or character columns.\n  Type in '%s' (column '%s'%s): %s\n  Type in '%s' (column '%s'%s): %s",
+      user_x_name,
+      user_y_name,
+      by_col_name,
+      user_x_name, by_col_name, if (suffixes[1] != "") paste0(" becoming '", by_col_name, suffixes[1], "'") else "", type_from_user_x,
+      user_y_name, by_col_name, if (suffixes[2] != "") paste0(" becoming '", by_col_name, suffixes[2], "'") else "", type_from_user_y
+    )
+
+    stop(errorCondition(message = new_msg, class = c("datatable_merge_error", "error", "condition"), call = NULL))
+  })
+
+  if (all.y && nrow(y)) {
     missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
   }
-  # X[Y] syntax puts JIS i columns at the end, merge likes them alongside i.
   newend = setdiff(nm_y, by.y)
-  # fix for #1290, make sure by.y order is set properly before naming
   setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
   setnames(dt, c(by.x, start, end))
   if (nrow(dt) > 0L) {
     setkeyv(dt, if (sort) by.x else NULL)
   }
 
-  # Throw warning if there are duplicate column names in 'dt' (i.e. if
-  # `suffixes=c("","")`, to match behaviour in base:::merge.data.frame)
   resultdupnames = names(dt)[duplicated(names(dt))]
   if (length(resultdupnames)) {
     warningf("column names %s are duplicated in the result", brackify(resultdupnames))
   }
 
-  # retain custom classes of first argument that resulted in dispatch to this method, #1378
   setattr(dt, "class", class_x)
   dt
 }

From 54a7286473cc35e562c579056cac2c41af568888 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 09:23:37 +0530
Subject: [PATCH 02/23] jb

---
 R/merge.R | 59 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/R/merge.R b/R/merge.R
index d406e40521..edcb9fc3ff 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -27,7 +27,6 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   nm_x = names(x)
   nm_y = names(y)
 
-  ## set up 'by'/'by.x'/'by.y'
   if ((!is.null(by.x) || !is.null(by.y)) && length(by.x) != length(by.y))
     stopf("`by.x` and `by.y` must be of same length.")
   if (!missing(by) && !missing(by.x))
@@ -77,7 +76,6 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
     end[chmatch(dupnames, end, 0L)] = paste0(dupnames, suffixes[2L])
   }
-
   dupkeyx = intersect(by.x, end)
   if (no.dups && length(dupkeyx)) {
     end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
@@ -91,36 +89,47 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     y = y[yind]
   }
 
-  # Enhanced error handling during join
-  user_x_name = deparse1(substitute(x))
-  user_y_name = deparse1(substitute(y))
-  dt = tryCatch({
-    y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]
-  }, bmerge_incompatible_type_error = function(e) {
-    col_from_user_x <- e$bmerge_i_arg_details$col_name
-    type_from_user_x <- e$bmerge_i_arg_details$type_str
-
-    col_from_user_y <- e$bmerge_x_arg_details$col_name
-    type_from_user_y <- e$bmerge_x_arg_details$type_str
-
-    by_col_name <- col_from_user_x
+  # ----- BEGIN MODIFICATION -----
+  user_x_name <- deparse1(substitute(x))
+  user_y_name <- deparse1(substitute(y))
+  by_for_join <- by
+  dt <- tryCatch({
+      y[x, nomatch = if (all.x) NA else NULL, on = by_for_join, allow.cartesian = allow.cartesian]
+    },
+    bmerge_incompatible_type_error = function(e) {
+      safe_attr_get <- function(obj, attr_name, placeholder) {
+        val <- obj[[attr_name]]
+        if (is.null(val) || length(val) != 1L || is.na(val) || !nzchar(as.character(val)[1L])) {
+          warning(paste0("Problem retrieving attribute '", attr_name, "' from bmerge_incompatible_type_error object. Was: ", paste(capture.output(dput(val)), collapse = " ")))
+          return(placeholder)
+        }
+        return(as.character(val)[1L])
+      }
+      col_from_user_x_arg <- safe_attr_get(e, "c_bmerge_i_arg_bare_col_name", "[unknown column from x]")
+      type_from_user_x_arg <- safe_attr_get(e, "c_bmerge_i_arg_type", "[unknown type from x]")
+      col_from_user_y_arg <- safe_attr_get(e, "c_bmerge_x_arg_bare_col_name", "[unknown column from y]")
+      type_from_user_y_arg <- safe_attr_get(e, "c_bmerge_x_arg_type", "[unknown type from y]")
+      final_user_x_name <- if (is.null(user_x_name) || length(user_x_name) != 1L || !nzchar(user_x_name[1L])) "x" else user_x_name[1L]
+      final_user_y_name <- if (is.null(user_y_name) || length(user_y_name) != 1L || !nzchar(user_y_name[1L])) "y" else user_y_name[1L]
 
-    new_msg <- sprintf(
-      "When merging data.table '%s' (argument 'x') and data.table '%s' (argument 'y') on column '%s':\n  Incompatible join types. Factor columns must join to factor or character columns.\n  Type in '%s' (column '%s'%s): %s\n  Type in '%s' (column '%s'%s): %s",
-      user_x_name,
-      user_y_name,
-      by_col_name,
-      user_x_name, by_col_name, if (suffixes[1] != "") paste0(" becoming '", by_col_name, suffixes[1], "'") else "", type_from_user_x,
-      user_y_name, by_col_name, if (suffixes[2] != "") paste0(" becoming '", by_col_name, suffixes[2], "'") else "", type_from_user_y
-    )
+      msg <- sprintf(
+        "When merging data.table '%s' (argument 'x') and data.table '%s' (argument 'y'):\n  Incompatible join types. Factor columns must join to factor or character columns.\n  Column '%s' from '%s' (arg 'x') has type: %s.\n  Column '%s' from '%s' (arg 'y') has type: %s.",
+        final_user_x_name,
+        final_user_y_name,
+        col_from_user_x_arg, final_user_x_name, type_from_user_x_arg,
+        col_from_user_y_arg, final_user_y_name, type_from_user_y_arg
+      )
 
-    stop(errorCondition(message = new_msg, class = c("datatable_merge_error", "error", "condition"), call = NULL))
-  })
+      stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition"), call = NULL))
+    }
+  )
+  # ----- END MODIFICATION -----
 
   if (all.y && nrow(y)) {
     missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
   }
+
   newend = setdiff(nm_y, by.y)
   setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
   setnames(dt, c(by.x, start, end))

From d4bce9f3283faba9cbe35f5e34e3cc6e898427cd Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 09:26:04 +0530
Subject: [PATCH 03/23] jh

---
 R/bmerge.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 9ff5e9d9e3..31b9421a19 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -92,10 +92,10 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         list(
           message = raw_msg, # Generic message from bmerge's perspective
           call = NULL,       # Will be populated by stop() or handler
-          c_bmerge_x_arg_col_specifier = xname,        # e.g., "x.a" or "a"
-          c_bmerge_x_arg_type          = x_merge_type,  # e.g., "integer"
-          c_bmerge_i_arg_col_specifier = iname,        # e.g., "i.a" or "a"
-          c_bmerge_i_arg_type          = i_merge_type   # e.g., "factor"
+          c_bmerge_x_arg_bare_col_name = x_bare_colname_variable, # MUST be a valid string
+        c_bmerge_x_arg_type          = x_coltype_str_variable,  # MUST be a valid string
+        c_bmerge_i_arg_bare_col_name = i_bare_colname_variable, # MUST be a valid string
+        c_bmerge_i_arg_type          = i_coltype_str_variable   # MUST be a valid string
         ),
         class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
       )

From 2ede355d40a6d569cc5582949d96fa3db73df941 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 10:34:07 +0530
Subject: [PATCH 04/23] jkf

---
 R/bmerge.R | 57 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 31b9421a19..9293128fe3 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -1,5 +1,3 @@
-
-
 mergeType = function(x) {
   ans = typeof(x)
   if      (ans=="integer") { if (is.factor(x))             ans = "factor"    }
@@ -60,10 +58,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     xcol = xcols[a]
     x_merge_type = mergeType(x[[xcol]])
     i_merge_type = mergeType(i[[icol]])
-    xname = paste0("x.", names(x)[xcol])
-    iname = paste0("i.", names(i)[icol])
+    xname = paste0("x.", names(x)[xcol]) # Prefixed name for x's column (bmerge's perspective)
+    iname = paste0("i.", names(i)[icol]) # Prefixed name for i's column (bmerge's perspective)
     if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type)
     if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type)
+
     if (x_merge_type=="factor" || i_merge_type=="factor") {
       if (roll!=0.0 && a==length(icols))
         stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname)
@@ -71,12 +70,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname)
         set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]])  # nomatch=0L otherwise a level that is missing would match to NA values
         next
-      } else {
-        if (x_merge_type=="character") {
+      } else { # One is factor, the other is not factor
+        if (x_merge_type=="character") { # i is factor, x is character
           coerce_col(i, icol, "factor", "character", iname, xname, verbose=verbose)
           set(callersi, j=icol, value=i[[icol]])  # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
           next
-        } else if (i_merge_type=="character") {
+        } else if (i_merge_type=="character") { # i is character, x is factor
           if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
           newvalue = chmatch(i[[icol]], levels(x[[xcol]]), nomatch=0L)
           if (anyNA(i[[icol]])) newvalue[is.na(i[[icol]])] = NA_integer_  # NA_character_ should match to NA in factor, #3809
@@ -84,29 +83,46 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
           next
         }
       }
-      raw_msg <- sprintf("Incompatible join types for bmerge: '%s' (%s) and '%s' (%s). Factor columns must join to factor or character columns.",
-                       xname, x_merge_type, # Details for C-bmerge 'x' argument
-                       iname, i_merge_type) # Details for C-bmerge 'i' argument
+      # ----- BEGIN MODIFICATION -----
+      # If execution reaches here, it means:
+      # 1. One of (x_merge_type, i_merge_type) is "factor".
+      # 2. They are not BOTH "factor".
+      # 3. The non-factor column is NOT "character".
+      # This is the specific incompatibility: Factor joining with non-factor/non-character.
+
+      # Message for the condition object, from bmerge's perspective using its 'x' and 'i' arguments
+      # and their prefixed column names.
+      condition_message <- sprintf(
+        "Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.",
+        xname, x_merge_type, # xname is like "x.colA", x_merge_type is its type
+        iname, i_merge_type  # iname is like "i.colB", i_merge_type is its type
+      )
 
       condition <- structure(
         list(
-          message = raw_msg, # Generic message from bmerge's perspective
-          call = NULL,       # Will be populated by stop() or handler
-          c_bmerge_x_arg_bare_col_name = x_bare_colname_variable, # MUST be a valid string
-        c_bmerge_x_arg_type          = x_coltype_str_variable,  # MUST be a valid string
-        c_bmerge_i_arg_bare_col_name = i_bare_colname_variable, # MUST be a valid string
-        c_bmerge_i_arg_type          = i_coltype_str_variable   # MUST be a valid string
+          message = condition_message, # Raw message if not caught and rephrased
+          call = NULL,                 # Will be populated by stop() or handler
+
+          # Details for 'x' argument received by bmerge.R (e.g., DT2 in merge(DT1,DT2) or DT1 in DT1[DT2])
+          c_bmerge_x_arg_bare_col_name = names(x)[xcol], # Bare column name, e.g., "a"
+          c_bmerge_x_arg_type          = x_merge_type,  # Type string, e.g., "integer"
+
+          # Details for 'i' argument received by bmerge.R (e.g., DT1 in merge(DT1,DT2) or DT2 in DT1[DT2])
+          c_bmerge_i_arg_bare_col_name = names(i)[icol], # Bare column name, e.g., "a"
+          c_bmerge_i_arg_type          = i_merge_type   # Type string, e.g., "factor"
         ),
         class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
       )
       stop(condition)
-    }
+      # ----- END MODIFICATION -----
+    } # End of initial "if (x_merge_type=='factor' || i_merge_type=='factor')" block
+
     # we check factors first to cater for the case when trying to do rolling joins on factors
     if (x_merge_type == i_merge_type) {
       if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
       next
     }
-    cfl = c("character", "logical", "factor")
+    cfl = c("character", "logical", "factor") # Note: 'factor' in cfl is effectively dead code here due to earlier factor handling
     if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) {
       if (anyNA(i[[icol]]) && allNA(i[[icol]])) {
         coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, from_detail=gettext(" (all-NA)"), verbose=verbose)
@@ -116,6 +132,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
         next
       }
+      # This stopf is for other incompatible types not covered by the factor-specific error above
       stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
     }
     if (x_merge_type=="integer64" || i_merge_type=="integer64") {
@@ -168,7 +185,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         }
       }
     }
-  }
+  } # End of for loop iterating through join columns
 
   ## after all modifications of x, check if x has a proper key on all xcols.
   ## If not, calculate the order. Also for non-equi joins, the order must be calculated.
@@ -236,4 +253,4 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
 
   ans$xo = xo  # for further use by [.data.table
   ans
-}
+}
\ No newline at end of file

From dabaddce12ca2a926e9da9934dd09847dd778ee7 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 11:01:22 +0530
Subject: [PATCH 05/23] kjh

---
 R/bmerge.R |  36 ++++++------------
 R/merge.R  | 105 +++++++++++++++++++++++++++++++----------------------
 2 files changed, 72 insertions(+), 69 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 9293128fe3..72e560aa4c 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -70,8 +70,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname)
         set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]])  # nomatch=0L otherwise a level that is missing would match to NA values
         next
-      } else { # One is factor, the other is not factor
-        if (x_merge_type=="character") { # i is factor, x is character
+      } else {
+        if (x_merge_type=="character") { 
           coerce_col(i, icol, "factor", "character", iname, xname, verbose=verbose)
           set(callersi, j=icol, value=i[[icol]])  # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
           next
@@ -83,40 +83,27 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
           next
         }
       }
-      # ----- BEGIN MODIFICATION -----
-      # If execution reaches here, it means:
-      # 1. One of (x_merge_type, i_merge_type) is "factor".
-      # 2. They are not BOTH "factor".
-      # 3. The non-factor column is NOT "character".
-      # This is the specific incompatibility: Factor joining with non-factor/non-character.
-
-      # Message for the condition object, from bmerge's perspective using its 'x' and 'i' arguments
-      # and their prefixed column names.
       condition_message <- sprintf(
-        "Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.",
+        "Incompatible join types (from bmerge perspective): %s (%s) and %s (%s). Factor columns must join to factor or character columns.",
         xname, x_merge_type, # xname is like "x.colA", x_merge_type is its type
         iname, i_merge_type  # iname is like "i.colB", i_merge_type is its type
       )
-
       condition <- structure(
         list(
-          message = condition_message, # Raw message if not caught and rephrased
+          message = condition_message, # Raw message from bmerge if not caught and rephrased
           call = NULL,                 # Will be populated by stop() or handler
 
-          # Details for 'x' argument received by bmerge.R (e.g., DT2 in merge(DT1,DT2) or DT1 in DT1[DT2])
-          c_bmerge_x_arg_bare_col_name = names(x)[xcol], # Bare column name, e.g., "a"
-          c_bmerge_x_arg_type          = x_merge_type,  # Type string, e.g., "integer"
+          # Details for 'x' argument bmerge.R received (e.g., table y from merge(x,y) or table x from x[i])
+          c_bmerge_x_arg_bare_col_name = names(x)[xcol], # Bare column name from x, e.g., "a"
+          c_bmerge_x_arg_type          = x_merge_type,  # Type string for x's column, e.g., "integer"
 
-          # Details for 'i' argument received by bmerge.R (e.g., DT1 in merge(DT1,DT2) or DT2 in DT1[DT2])
-          c_bmerge_i_arg_bare_col_name = names(i)[icol], # Bare column name, e.g., "a"
-          c_bmerge_i_arg_type          = i_merge_type   # Type string, e.g., "factor"
+          # Details for 'i' argument bmerge.R received (e.g., table x from merge(x,y) or table i from x[i])
+          c_bmerge_i_arg_bare_col_name = names(i)[icol], # Bare column name from i, e.g., "a"
+          c_bmerge_i_arg_type          = i_merge_type   # Type string for i's column, e.g., "factor"
         ),
         class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
       )
       stop(condition)
-      # ----- END MODIFICATION -----
-    } # End of initial "if (x_merge_type=='factor' || i_merge_type=='factor')" block
-
     # we check factors first to cater for the case when trying to do rolling joins on factors
     if (x_merge_type == i_merge_type) {
       if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
@@ -185,8 +172,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         }
       }
     }
-  } # End of for loop iterating through join columns
-
+  }
   ## after all modifications of x, check if x has a proper key on all xcols.
   ## If not, calculate the order. Also for non-equi joins, the order must be calculated.
   non_equi = which.first(ops != 1L) # 1 is "==" operator
diff --git a/R/merge.R b/R/merge.R
index edcb9fc3ff..996ac50209 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -1,5 +1,10 @@
 merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FALSE, all.x = all,
                all.y = all, sort = TRUE, suffixes = c(".x", ".y"), no.dups = TRUE, allow.cartesian=getOption("datatable.allow.cartesian"), incomparables=NULL, ...) {
+
+  
+  user_x_name <- deparse1(substitute(x))
+  user_y_name <- deparse1(substitute(y))
+
   if (!sort %in% c(TRUE, FALSE))
     stopf("Argument 'sort' should be logical TRUE/FALSE")
   if (!no.dups %in% c(TRUE, FALSE))
@@ -17,9 +22,9 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (x0 && y0)
       warningf("Neither of the input data.tables to join have columns.")
     else if (x0)
-      warningf("Input data.table '%s' has no columns.", "x")
+      warningf("Input data.table '%s' (argument 'x') has no columns.", user_x_name)
     else
-      warningf("Input data.table '%s' has no columns.", "y")
+      warningf("Input data.table '%s' (argument 'y') has no columns.", user_y_name)
   }
   check_duplicate_names(x)
   check_duplicate_names(y)
@@ -27,40 +32,44 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   nm_x = names(x)
   nm_y = names(y)
 
+  ## set up 'by'/'by.x'/'by.y'
+  # The 'by' variable created here is crucial as it becomes the 'on' argument for y[x, on=by, ...]
   if ((!is.null(by.x) || !is.null(by.y)) && length(by.x) != length(by.y))
     stopf("`by.x` and `by.y` must be of same length.")
   if (!missing(by) && !missing(by.x))
     warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.")
-  if (!is.null(by.x)) {
+  if (!is.null(by.x)) { # by.x and by.y are provided
     if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
       stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
+    # ----- MODIFIED: Use specific table names in stopf -----
     if (!all(idx <- by.x %chin% nm_x)) {
-      stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx]))
+      stopf("The following columns listed in `%s` are missing from data.table '%s' (argument 'x'): %s", "by.x", user_x_name, brackify(by.x[!idx]))
     }
     if (!all(idx <- by.y %chin% nm_y)) {
-      stopf("The following columns listed in `%s` are missing from %s: %s", "by.y", "y", brackify(by.y[!idx]))
+      stopf("The following columns listed in `%s` are missing from data.table '%s' (argument 'y'): %s", "by.y", user_y_name, brackify(by.y[!idx]))
     }
-    by = by.x
-    names(by) = by.y
-  } else {
+    # ----- END MODIFICATION -----
+    by = by.x             # 'by' takes values from by.x (cols of table 'x' arg)
+    names(by) = by.y      # names of 'by' are cols from by.y (cols of table 'y' arg)
+                          # So `by` for `on=by` will be c(col_from_y = "col_from_x", ...)
+  } else { # by is provided or inferred
     if (is.null(by))
       by = intersect(key(x), key(y))
-    if (!length(by))
+    if (!length(by))   # was is.null() before PR#5183  changed to !length()
       by = key(x)
     if (!length(by))
       by = intersect(nm_x, nm_y)
     if (length(by) == 0L || !is.character(by))
       stopf("A non-empty vector of column names for `by` is required.")
     if (!all(idx <- by %in% nm_x)) {
-      stopf("The following columns listed in `%s` are missing from %s: %s", "by", "x", brackify(by[!idx]))
+      stopf("The following columns listed in `%s` are missing from data.table '%s' (argument 'x'): %s", "by", user_x_name, brackify(by[!idx]))
     }
     if (!all(idx <- by %in% nm_y)) {
-      stopf("The following columns listed in `%s` are missing from %s: %s", "by", "y", brackify(by[!idx]))
+      stopf("The following columns listed in `%s` are missing from data.table '%s' (argument 'y'): %s", "by", user_y_name, brackify(by[!idx]))
     }
-    by = unname(by)
-    by.x = by.y = by
+    by = unname(by)       # 'by' becomes a character vector of common column names for `on=by`
+    by.x = by.y = by      # by.x and by.y are set for later use in renaming/ordering
   }
-
   if (length(list(...))) {
     ell = as.list(substitute(list(...)))[-1L]
     for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n)
@@ -68,9 +77,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (unnamed_n)
       warningf("Passed %d unknown and unnamed arguments.", unnamed_n)
   }
-
-  start = setdiff(nm_x, by.x)
-  end = setdiff(nm_y, by.y)
+  start = setdiff(nm_x, by.x) # by.x here refers to key cols from original x's perspective or common names
+  end = setdiff(nm_y, by.y)   # by.y here refers to key cols from original y's perspective or common names
   dupnames = intersect(start, end)
   if (length(dupnames)) {
     start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
@@ -80,68 +88,77 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   if (no.dups && length(dupkeyx)) {
     end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
   }
-
   if (!is.null(incomparables)) {
-    "%fin%" = function(x, table) if (is.character(x) && is.character(table)) x %chin% table else x %in% table
-    xind = rowSums(x[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.x]) == length(by)
-    yind = rowSums(y[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.y]) == length(by)
+    "%fin%" = function(x_val, table_val) if (is.character(x_val) && is.character(table_val)) x_val %chin% table_val else x_val %in% table_val
+    xind = rowSums(x[, lapply(.SD, function(col_val_x) !(col_val_x %fin% incomparables)), .SDcols=by.x]) == length(by.x) # Use by.x for columns from table x
+    yind = rowSums(y[, lapply(.SD, function(col_val_y) !(col_val_y %fin% incomparables)), .SDcols=by.y]) == length(by.y) # Use by.y for columns from table y
     x = x[xind]
     y = y[yind]
   }
-
-  # ----- BEGIN MODIFICATION -----
-  user_x_name <- deparse1(substitute(x))
-  user_y_name <- deparse1(substitute(y))
-  by_for_join <- by
   dt <- tryCatch({
-      y[x, nomatch = if (all.x) NA else NULL, on = by_for_join, allow.cartesian = allow.cartesian]
+      y[x, nomatch = if (all.x) NA else NULL, on = by, allow.cartesian = allow.cartesian]
     },
     bmerge_incompatible_type_error = function(e) {
-      safe_attr_get <- function(obj, attr_name, placeholder) {
-        val <- obj[[attr_name]]
+            safe_attr_get <- function(obj, attr_name, placeholder) {
+        val <- obj[[attr_name]] # Use [[ to get NULL if attribute doesn't exist
         if (is.null(val) || length(val) != 1L || is.na(val) || !nzchar(as.character(val)[1L])) {
-          warning(paste0("Problem retrieving attribute '", attr_name, "' from bmerge_incompatible_type_error object. Was: ", paste(capture.output(dput(val)), collapse = " ")))
+          # Provide a warning to console to aid debugging if attributes are not as expected
+          warning(paste0(
+            "Developer warning: Problem retrieving attribute '", attr_name,
+            "' from bmerge_incompatible_type_error object. Was: ",
+            paste(capture.output(dput(val)), collapse=" ")
+          ))
           return(placeholder)
         }
         return(as.character(val)[1L])
       }
-      col_from_user_x_arg <- safe_attr_get(e, "c_bmerge_i_arg_bare_col_name", "[unknown column from x]")
-      type_from_user_x_arg <- safe_attr_get(e, "c_bmerge_i_arg_type", "[unknown type from x]")
-      col_from_user_y_arg <- safe_attr_get(e, "c_bmerge_x_arg_bare_col_name", "[unknown column from y]")
-      type_from_user_y_arg <- safe_attr_get(e, "c_bmerge_x_arg_type", "[unknown type from y]")
+
+      # In merge(USER_X, USER_Y), the internal join is y[x, ...]:
+      # - USER_X (1st arg to merge) corresponds to 'i' argument in bmerge.R (source of error `e$c_bmerge_i_arg_*`).
+      # - USER_Y (2nd arg to merge) corresponds to 'x' argument in bmerge.R (source of error `e$c_bmerge_x_arg_*`).
+
+      col_from_user_x_table <- safe_attr_get(e, "c_bmerge_i_arg_bare_col_name", "[unknown column from 'x' arg]")
+      type_from_user_x_table <- safe_attr_get(e, "c_bmerge_i_arg_type", "[unknown type for 'x' arg]")
+
+      col_from_user_y_table <- safe_attr_get(e, "c_bmerge_x_arg_bare_col_name", "[unknown column from 'y' arg]")
+      type_from_user_y_table <- safe_attr_get(e, "c_bmerge_x_arg_type", "[unknown type for 'y' arg]")
+
+      # Ensure user_x_name and user_y_name are valid strings for sprintf
       final_user_x_name <- if (is.null(user_x_name) || length(user_x_name) != 1L || !nzchar(user_x_name[1L])) "x" else user_x_name[1L]
       final_user_y_name <- if (is.null(user_y_name) || length(user_y_name) != 1L || !nzchar(user_y_name[1L])) "y" else user_y_name[1L]
-
       msg <- sprintf(
         "When merging data.table '%s' (argument 'x') and data.table '%s' (argument 'y'):\n  Incompatible join types. Factor columns must join to factor or character columns.\n  Column '%s' from '%s' (arg 'x') has type: %s.\n  Column '%s' from '%s' (arg 'y') has type: %s.",
         final_user_x_name,
         final_user_y_name,
-        col_from_user_x_arg, final_user_x_name, type_from_user_x_arg,
-        col_from_user_y_arg, final_user_y_name, type_from_user_y_arg
+        col_from_user_x_table, final_user_x_name, type_from_user_x_table,
+        col_from_user_y_table, final_user_y_name, type_from_user_y_table
       )
-
       stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition"), call = NULL))
     }
   )
-  # ----- END MODIFICATION -----
 
-  if (all.y && nrow(y)) {
+
+  if (all.y && nrow(y)) {  
+    # are most likely to be caught in the primary join attempt.
     missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
   }
-
-  newend = setdiff(nm_y, by.y)
+    newend = setdiff(nm_y, by.y)
   setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
+
   setnames(dt, c(by.x, start, end))
   if (nrow(dt) > 0L) {
-    setkeyv(dt, if (sort) by.x else NULL)
+    setkeyv(dt, if (sort) by.x else NULL) # Uses final key column names
   }
 
+  # Throw warning if there are duplicate column names in 'dt' (i.e. if
+  # `suffixes=c("","")`, to match behaviour in base:::merge.data.frame)
   resultdupnames = names(dt)[duplicated(names(dt))]
   if (length(resultdupnames)) {
     warningf("column names %s are duplicated in the result", brackify(resultdupnames))
   }
 
+  # retain custom classes of first argument that resulted in dispatch to this method, #1378
   setattr(dt, "class", class_x)
   dt
-}
+}
\ No newline at end of file

From bfaad2c0f5ba2c99e456c93259985d88f3c082bd Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 11:17:42 +0530
Subject: [PATCH 06/23] iyg

---
 R/bmerge.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/bmerge.R b/R/bmerge.R
index 72e560aa4c..5528130c13 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -104,6 +104,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
       )
       stop(condition)
+    }
     # we check factors first to cater for the case when trying to do rolling joins on factors
     if (x_merge_type == i_merge_type) {
       if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)

From 8f97c836f11a42b90983c3aaeacef7c5d58de170 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 12:01:18 +0530
Subject: [PATCH 07/23] jsgdf

---
 R/merge.R | 130 +++++++++++++++++++++++++-----------------------------
 1 file changed, 60 insertions(+), 70 deletions(-)

diff --git a/R/merge.R b/R/merge.R
index 996ac50209..35872a6389 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -1,9 +1,7 @@
 merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FALSE, all.x = all,
                all.y = all, sort = TRUE, suffixes = c(".x", ".y"), no.dups = TRUE, allow.cartesian=getOption("datatable.allow.cartesian"), incomparables=NULL, ...) {
 
-  
-  user_x_name <- deparse1(substitute(x))
-  user_y_name <- deparse1(substitute(y))
+  # NO user_x_name / user_y_name here to maintain original variable environment for tests
 
   if (!sort %in% c(TRUE, FALSE))
     stopf("Argument 'sort' should be logical TRUE/FALSE")
@@ -13,7 +11,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   if (!is.data.table(y)) {
     y = as.data.table(y)
     if (missing(by) && missing(by.x)) {
-      by = key(x)
+      by = key(x) # Original logic
     }
   }
   x0 = length(x) == 0L
@@ -22,37 +20,33 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (x0 && y0)
       warningf("Neither of the input data.tables to join have columns.")
     else if (x0)
-      warningf("Input data.table '%s' (argument 'x') has no columns.", user_x_name)
+      warningf("Input data.table '%s' has no columns.", "x") # Original: literal "x"
     else
-      warningf("Input data.table '%s' (argument 'y') has no columns.", user_y_name)
+      warningf("Input data.table '%s' has no columns.", "y") # Original: literal "y"
   }
   check_duplicate_names(x)
   check_duplicate_names(y)
 
-  nm_x = names(x)
-  nm_y = names(y)
+  nm_x = names(x) # Original logic
+  nm_y = names(y) # Original logic
 
-  ## set up 'by'/'by.x'/'by.y'
-  # The 'by' variable created here is crucial as it becomes the 'on' argument for y[x, on=by, ...]
+  ## set up 'by'/'by.x'/'by.y' - RETAIN ORIGINAL LOGIC EXACTLY
   if ((!is.null(by.x) || !is.null(by.y)) && length(by.x) != length(by.y))
     stopf("`by.x` and `by.y` must be of same length.")
   if (!missing(by) && !missing(by.x))
     warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.")
-  if (!is.null(by.x)) { # by.x and by.y are provided
+  if (!is.null(by.x)) {
     if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
       stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
-    # ----- MODIFIED: Use specific table names in stopf -----
     if (!all(idx <- by.x %chin% nm_x)) {
-      stopf("The following columns listed in `%s` are missing from data.table '%s' (argument 'x'): %s", "by.x", user_x_name, brackify(by.x[!idx]))
+      stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx])) # Original: literal "x"
     }
     if (!all(idx <- by.y %chin% nm_y)) {
-      stopf("The following columns listed in `%s` are missing from data.table '%s' (argument 'y'): %s", "by.y", user_y_name, brackify(by.y[!idx]))
+      stopf("The following columns listed in `%s` are missing from %s: %s", "by.y", "y", brackify(by.y[!idx])) # Original: literal "y"
     }
-    # ----- END MODIFICATION -----
-    by = by.x             # 'by' takes values from by.x (cols of table 'x' arg)
-    names(by) = by.y      # names of 'by' are cols from by.y (cols of table 'y' arg)
-                          # So `by` for `on=by` will be c(col_from_y = "col_from_x", ...)
-  } else { # by is provided or inferred
+    by = by.x
+    names(by) = by.y
+  } else {
     if (is.null(by))
       by = intersect(key(x), key(y))
     if (!length(by))   # was is.null() before PR#5183  changed to !length()
@@ -62,14 +56,16 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (length(by) == 0L || !is.character(by))
       stopf("A non-empty vector of column names for `by` is required.")
     if (!all(idx <- by %in% nm_x)) {
-      stopf("The following columns listed in `%s` are missing from data.table '%s' (argument 'x'): %s", "by", user_x_name, brackify(by[!idx]))
+      stopf("The following columns listed in `%s` are missing from %s: %s", "by", "x", brackify(by[!idx])) # Original: literal "x"
     }
     if (!all(idx <- by %in% nm_y)) {
-      stopf("The following columns listed in `%s` are missing from data.table '%s' (argument 'y'): %s", "by", user_y_name, brackify(by[!idx]))
+      stopf("The following columns listed in `%s` are missing from %s: %s", "by", "y", brackify(by[!idx])) # Original: literal "y"
     }
-    by = unname(by)       # 'by' becomes a character vector of common column names for `on=by`
-    by.x = by.y = by      # by.x and by.y are set for later use in renaming/ordering
+    by = unname(by)
+    by.x = by.y = by
   }
+
+  # warn about unused arguments #2587
   if (length(list(...))) {
     ell = as.list(substitute(list(...)))[-1L]
     for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n)
@@ -77,8 +73,9 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (unnamed_n)
       warningf("Passed %d unknown and unnamed arguments.", unnamed_n)
   }
-  start = setdiff(nm_x, by.x) # by.x here refers to key cols from original x's perspective or common names
-  end = setdiff(nm_y, by.y)   # by.y here refers to key cols from original y's perspective or common names
+
+  start = setdiff(nm_x, by.x) # Original logic
+  end = setdiff(nm_y, by.y)   # Original logic
   dupnames = intersect(start, end)
   if (length(dupnames)) {
     start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
@@ -88,77 +85,70 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   if (no.dups && length(dupkeyx)) {
     end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
   }
+
+  # implement incomparables argument #2587
   if (!is.null(incomparables)) {
+    # %fin% to be replaced when #5232 is implemented/closed
     "%fin%" = function(x_val, table_val) if (is.character(x_val) && is.character(table_val)) x_val %chin% table_val else x_val %in% table_val
-    xind = rowSums(x[, lapply(.SD, function(col_val_x) !(col_val_x %fin% incomparables)), .SDcols=by.x]) == length(by.x) # Use by.x for columns from table x
-    yind = rowSums(y[, lapply(.SD, function(col_val_y) !(col_val_y %fin% incomparables)), .SDcols=by.y]) == length(by.y) # Use by.y for columns from table y
-    x = x[xind]
-    y = y[yind]
+    xind = rowSums(x[, lapply(.SD, function(x_col_val) !(x_col_val %fin% incomparables)), .SDcols=by.x]) == length(by.x) # Original by.x usage
+    yind = rowSums(y[, lapply(.SD, function(y_col_val) !(y_col_val %fin% incomparables)), .SDcols=by.y]) == length(by.y) # Original by.y usage
+    # subset both so later steps still work
+    x = x[xind] # Original modification of x
+    y = y[yind] # Original modification of y
   }
+
+  # ----- MINIMAL MODIFICATION: tryCatch around the main join call -----
   dt <- tryCatch({
-      y[x, nomatch = if (all.x) NA else NULL, on = by, allow.cartesian = allow.cartesian]
+      # Original join call, using 'by' as constructed by original logic above
+      y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]
     },
     bmerge_incompatible_type_error = function(e) {
-            safe_attr_get <- function(obj, attr_name, placeholder) {
-        val <- obj[[attr_name]] # Use [[ to get NULL if attribute doesn't exist
-        if (is.null(val) || length(val) != 1L || is.na(val) || !nzchar(as.character(val)[1L])) {
-          # Provide a warning to console to aid debugging if attributes are not as expected
-          warning(paste0(
-            "Developer warning: Problem retrieving attribute '", attr_name,
-            "' from bmerge_incompatible_type_error object. Was: ",
-            paste(capture.output(dput(val)), collapse=" ")
-          ))
-          return(placeholder)
-        }
-        return(as.character(val)[1L])
-      }
-
-      # In merge(USER_X, USER_Y), the internal join is y[x, ...]:
-      # - USER_X (1st arg to merge) corresponds to 'i' argument in bmerge.R (source of error `e$c_bmerge_i_arg_*`).
-      # - USER_Y (2nd arg to merge) corresponds to 'x' argument in bmerge.R (source of error `e$c_bmerge_x_arg_*`).
+      # Get table names *locally* only when this specific error occurs.
+      # These x_arg_name and y_arg_name refer to the 'x' and 'y' arguments of merge.data.table
+      x_arg_name <- deparse1(substitute(x, env = parent.frame(2))) # Go up to merge.data.table's frame
+      y_arg_name <- deparse1(substitute(y, env = parent.frame(2))) # Go up to merge.data.table's frame
 
-      col_from_user_x_table <- safe_attr_get(e, "c_bmerge_i_arg_bare_col_name", "[unknown column from 'x' arg]")
-      type_from_user_x_table <- safe_attr_get(e, "c_bmerge_i_arg_type", "[unknown type for 'x' arg]")
+      # Ensure arg names are single, non-empty strings
+      final_x_arg_name <- if (is.null(x_arg_name) || length(x_arg_name) != 1L || !nzchar(x_arg_name[1L])) "arg 'x'" else x_arg_name[1L]
+      final_y_arg_name <- if (is.null(y_arg_name) || length(y_arg_name) != 1L || !nzchar(y_arg_name[1L])) "arg 'y'" else y_arg_name[1L]
 
-      col_from_user_y_table <- safe_attr_get(e, "c_bmerge_x_arg_bare_col_name", "[unknown column from 'y' arg]")
-      type_from_user_y_table <- safe_attr_get(e, "c_bmerge_x_arg_type", "[unknown type for 'y' arg]")
+      # Extract column/type details from error object 'e'
+      # Assumes attributes on 'e' are valid single character strings from R/bmerge.R
+      # Mapping: merge's 'x' is bmerge's 'i'; merge's 'y' is bmerge's 'x'
+      col_from_x_arg <- e$c_bmerge_i_arg_bare_col_name
+      type_from_x_arg <- e$c_bmerge_i_arg_type
+      col_from_y_arg <- e$c_bmerge_x_arg_bare_col_name
+      type_from_y_arg <- e$c_bmerge_x_arg_type
 
-      # Ensure user_x_name and user_y_name are valid strings for sprintf
-      final_user_x_name <- if (is.null(user_x_name) || length(user_x_name) != 1L || !nzchar(user_x_name[1L])) "x" else user_x_name[1L]
-      final_user_y_name <- if (is.null(user_y_name) || length(user_y_name) != 1L || !nzchar(user_y_name[1L])) "y" else user_y_name[1L]
+      # Construct the concise error message
       msg <- sprintf(
-        "When merging data.table '%s' (argument 'x') and data.table '%s' (argument 'y'):\n  Incompatible join types. Factor columns must join to factor or character columns.\n  Column '%s' from '%s' (arg 'x') has type: %s.\n  Column '%s' from '%s' (arg 'y') has type: %s.",
-        final_user_x_name,
-        final_user_y_name,
-        col_from_user_x_table, final_user_x_name, type_from_user_x_table,
-        col_from_user_y_table, final_user_y_name, type_from_user_y_table
+        "Incompatible join types for merge: table '%s' column '%s' (%s) and table '%s' column '%s' (%s). Factor columns must join to factor or character columns.",
+        final_x_arg_name, col_from_x_arg, type_from_x_arg,
+        final_y_arg_name, col_from_y_arg, type_from_y_arg
       )
+      
       stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition"), call = NULL))
     }
   )
+  # ----- END MINIMAL MODIFICATION -----
 
-
-  if (all.y && nrow(y)) {  
-    # are most likely to be caught in the primary join attempt.
+  if (all.y && nrow(y)) {  # Original logic
     missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
   }
-    newend = setdiff(nm_y, by.y)
-  setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
-
-  setnames(dt, c(by.x, start, end))
+  
+  newend = setdiff(nm_y, by.y) # Original logic
+  setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend)) # Original logic
+  setnames(dt, c(by.x, start, end)) # Original logic
   if (nrow(dt) > 0L) {
-    setkeyv(dt, if (sort) by.x else NULL) # Uses final key column names
+    setkeyv(dt, if (sort) by.x else NULL) # Original logic
   }
 
-  # Throw warning if there are duplicate column names in 'dt' (i.e. if
-  # `suffixes=c("","")`, to match behaviour in base:::merge.data.frame)
   resultdupnames = names(dt)[duplicated(names(dt))]
   if (length(resultdupnames)) {
     warningf("column names %s are duplicated in the result", brackify(resultdupnames))
   }
 
-  # retain custom classes of first argument that resulted in dispatch to this method, #1378
   setattr(dt, "class", class_x)
   dt
 }
\ No newline at end of file

From 2b7edce4b23e4b9dafe13eede520d3e118489012 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 12:09:11 +0530
Subject: [PATCH 08/23] kjh

---
 R/bmerge.R | 95 ++++++++++++++++++------------------------------------
 1 file changed, 32 insertions(+), 63 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 5528130c13..1d6ff4f71a 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -2,14 +2,11 @@ mergeType = function(x) {
   ans = typeof(x)
   if      (ans=="integer") { if (is.factor(x))             ans = "factor"    }
   else if (ans=="double")  { if (inherits(x, "integer64")) ans = "integer64" }
-  # do not call fitsInInt*(x) yet because i) if both types are double we don't need to coerce even if one or both sides
-  # are int-as-double, and ii) to save calling it until we really need it
   ans
 }
 
 cast_with_attrs = function(x, cast_fun) {
   ans = cast_fun(x)
-  # do not copy attributes when coercing factor (to character)
   if (!is.factor(x) && !is.null(attributes(x))) attributes(ans) = attributes(x)
   ans
 }
@@ -27,18 +24,6 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
 {
   callersi = i
   i = shallow(i)
-  # Just before the call to bmerge() in [.data.table there is a shallow() copy of i to prevent coercions here
-  # by bmerge changing the type of the user's input object by reference. We now shallow copy i again. If we then
-  # coerce a column in i only, we are just changing the temporary coercion used for the merge operation. If we
-  # set callersi too then we are keeping that coerced i column in the merge result returned to user.
-  # The type of the i column is always returned (i.e. just i set not callersi too), other than:
-  #   i) to convert int-as-double to int, useful for ad hoc joins when the L postfix is often forgotten.
-  #  ii) to coerce i.factor to character when joining to x.character
-  # So those are the only two uses of callersi below.
-  # Careful to only use plonk syntax (full column) on i and x from now on, otherwise user's i and x would
-  # change. This is why shallow() is very importantly internal only, currently.
-
-  # Using .SD in j to join could fail due to being locked and set() being used here, #1926
   .Call(C_unlock, i)
   x = shallow(x)
   .Call(C_unlock, x)
@@ -50,16 +35,12 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
   supported = c(ORDERING_TYPES, "factor", "integer64")
 
   if (nrow(i)) for (a in seq_along(icols)) {
-    # - check that join columns have compatible types
-    # - do type coercions if necessary on just the shallow local copies for the purpose of join
-    # - handle factor columns appropriately
-    # Note that if i is keyed, if this coerces i's key gets dropped by set()
     icol = icols[a]
     xcol = xcols[a]
     x_merge_type = mergeType(x[[xcol]])
     i_merge_type = mergeType(i[[icol]])
-    xname = paste0("x.", names(x)[xcol]) # Prefixed name for x's column (bmerge's perspective)
-    iname = paste0("i.", names(i)[icol]) # Prefixed name for i's column (bmerge's perspective)
+    xname = paste0("x.", names(x)[xcol])
+    iname = paste0("i.", names(i)[icol])
     if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type)
     if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type)
 
@@ -68,49 +49,48 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname)
       if (x_merge_type=="factor" && i_merge_type=="factor") {
         if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname)
-        set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]])  # nomatch=0L otherwise a level that is missing would match to NA values
+        set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]])
         next
       } else {
-        if (x_merge_type=="character") { 
+        if (x_merge_type=="character") {
           coerce_col(i, icol, "factor", "character", iname, xname, verbose=verbose)
-          set(callersi, j=icol, value=i[[icol]])  # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
+          set(callersi, j=icol, value=i[[icol]])
           next
-        } else if (i_merge_type=="character") { # i is character, x is factor
+        } else if (i_merge_type=="character") {
           if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
           newvalue = chmatch(i[[icol]], levels(x[[xcol]]), nomatch=0L)
-          if (anyNA(i[[icol]])) newvalue[is.na(i[[icol]])] = NA_integer_  # NA_character_ should match to NA in factor, #3809
+          if (anyNA(i[[icol]])) newvalue[is.na(i[[icol]])] = NA_integer_
           set(i, j=icol, value=newvalue)
           next
         }
       }
+      # Incompatible factor join: Factor vs (Not Factor and Not Character)
+      # The 'message' attribute must match the *old* error for direct calls to bmerge (e.g., DT[otherDT])
       condition_message <- sprintf(
-        "Incompatible join types (from bmerge perspective): %s (%s) and %s (%s). Factor columns must join to factor or character columns.",
-        xname, x_merge_type, # xname is like "x.colA", x_merge_type is its type
-        iname, i_merge_type  # iname is like "i.colB", i_merge_type is its type
+        "Incompatible join types: %s (%s) and %s (%s)", # Exact match for tests like 2044.24
+        xname, x_merge_type,
+        iname, i_merge_type
       )
+
       condition <- structure(
         list(
-          message = condition_message, # Raw message from bmerge if not caught and rephrased
-          call = NULL,                 # Will be populated by stop() or handler
-
-          # Details for 'x' argument bmerge.R received (e.g., table y from merge(x,y) or table x from x[i])
-          c_bmerge_x_arg_bare_col_name = names(x)[xcol], # Bare column name from x, e.g., "a"
-          c_bmerge_x_arg_type          = x_merge_type,  # Type string for x's column, e.g., "integer"
-
-          # Details for 'i' argument bmerge.R received (e.g., table x from merge(x,y) or table i from x[i])
-          c_bmerge_i_arg_bare_col_name = names(i)[icol], # Bare column name from i, e.g., "a"
-          c_bmerge_i_arg_type          = i_merge_type   # Type string for i's column, e.g., "factor"
+          message = condition_message,
+          call = NULL,
+          c_bmerge_x_arg_bare_col_name = names(x)[xcol],
+          c_bmerge_x_arg_type          = x_merge_type,
+          c_bmerge_i_arg_bare_col_name = names(i)[icol],
+          c_bmerge_i_arg_type          = i_merge_type
         ),
         class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
       )
       stop(condition)
     }
-    # we check factors first to cater for the case when trying to do rolling joins on factors
+
     if (x_merge_type == i_merge_type) {
       if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
       next
     }
-    cfl = c("character", "logical", "factor") # Note: 'factor' in cfl is effectively dead code here due to earlier factor handling
+    cfl = c("character", "logical", "factor")
     if (x_merge_type %chin% cfl || i_merge_type %chin% cfl) {
       if (anyNA(i[[icol]]) && allNA(i[[icol]])) {
         coerce_col(i, icol, i_merge_type, x_merge_type, iname, xname, from_detail=gettext(" (all-NA)"), verbose=verbose)
@@ -120,25 +100,23 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
         next
       }
-      # This stopf is for other incompatible types not covered by the factor-specific error above
+      # This 'stopf' might have been the one originally hit by the failing tests.
+      # Our custom error above now preempts this for incompatible factor joins.
       stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
     }
     if (x_merge_type=="integer64" || i_merge_type=="integer64") {
       nm = c(iname, xname)
-      if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) }  # w is which to coerce
+      if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) }
       if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
         from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
         coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
       } else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
     } else {
-      # just integer and double left
-      ic_idx = which(icol == icols) # check if on is joined on multiple conditions, #6602
+      ic_idx = which(icol == icols)
       if (i_merge_type=="double") {
         coerce_x = FALSE
         if (fitsInInt32(i[[icol]])) {
           coerce_x = TRUE
-          # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys
-          # we've always coerced to int and returned int, for convenience.
           if (length(ic_idx)>1L) {
             xc_idx = xcols[ic_idx]
             for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) {
@@ -151,7 +129,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
           if (coerce_x) {
             from_detail = gettext(" (which contains no fractions)")
             coerce_col(i, icol, "double", "integer", iname, xname, from_detail, verbose=verbose)
-            set(callersi, j=icol, value=i[[icol]])       # change the shallow copy of i up in [.data.table to reflect in the result, too.
+            set(callersi, j=icol, value=i[[icol]])
             if (length(ic_idx)>1L) {
               xc_idx = xcols[ic_idx]
               for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) {
@@ -174,11 +152,9 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
       }
     }
   }
-  ## after all modifications of x, check if x has a proper key on all xcols.
-  ## If not, calculate the order. Also for non-equi joins, the order must be calculated.
-  non_equi = which.first(ops != 1L) # 1 is "==" operator
+
+  non_equi = which.first(ops != 1L)
   if (is.na(non_equi)) {
-    # equi join. use existing key (#1825) or existing secondary index (#1439)
     if (identical(xcols, head(chmatch(key(x), names(x)), length(xcols)))) {
       xo = integer(0L)
       if (verbose) catf("on= matches existing key, using key\n")
@@ -192,27 +168,21 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         if (verbose) {last.started.at=proc.time(); flush.console()}
         xo = forderv(x, by = xcols)
         if (verbose) {catf("Calculated ad hoc index in %s\n", timetaken(last.started.at)); flush.console()}
-        # TODO: use setindex() instead, so it's cached for future reuse
       }
     }
-    ## these variables are only needed for non-equi joins. Set them to default.
     nqgrp = integer(0L)
     nqmaxgrp = 1L
   } else {
-    # non-equi operators present.. investigate groups..
     nqgrp = integer(0L)
     nqmaxgrp = 1L
     if (verbose) catf("Non-equi join operators detected ... \n")
     if (roll != FALSE) stopf("roll is not implemented for non-equi joins yet.")
     if (verbose) {last.started.at=proc.time();catf("  forder took ... ");flush.console()}
-    # TODO: could check/reuse secondary indices, but we need 'starts' attribute as well!
     xo = forderv(x, xcols, retGrp=TRUE)
-    if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
+    if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
     xg = attr(xo, 'starts', exact=TRUE)
     resetcols = head(xcols, non_equi-1L)
     if (length(resetcols)) {
-      # TODO: can we get around having to reorder twice here?
-      # or at least reuse previous order?
       if (verbose) {last.started.at=proc.time();catf("  Generating group lengths ... ");flush.console()}
       resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts', exact=TRUE)
       resetlen = .Call(Cuniqlengths, resetlen, nrow(x))
@@ -221,8 +191,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     if (verbose) {last.started.at=proc.time();catf("  Generating non-equi group ids ... ");flush.console()}
     nqgrp = .Call(Cnestedid, x, xcols[non_equi:length(xcols)], xo, xg, resetlen, mult)
     if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console()}
-    if (length(nqgrp)) nqmaxgrp = max(nqgrp) # fix for #1986, when 'x' is 0-row table max(.) returns -Inf.
-    if (nqmaxgrp > 1L) { # got some non-equi join work to do
+    if (length(nqgrp)) nqmaxgrp = max(nqgrp)
+    if (nqmaxgrp > 1L) {
       if ("_nqgrp_" %in% names(x)) stopf("Column name '_nqgrp_' is reserved for non-equi joins.")
       if (verbose) {last.started.at=proc.time();catf("  Recomputing forder with non-equi ids ... ");flush.console()}
       set(nqx<-shallow(x), j="_nqgrp_", value=nqgrp)
@@ -236,8 +206,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
   if (verbose) {last.started.at=proc.time();catf("Starting bmerge ...\n");flush.console()}
   ans = .Call(Cbmerge, i, x, as.integer(icols), as.integer(xcols), xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp)
   if (verbose) {catf("bmerge done in %s\n",timetaken(last.started.at)); flush.console()}
-  # TO DO: xo could be moved inside Cbmerge
 
-  ans$xo = xo  # for further use by [.data.table
+  ans$xo = xo
   ans
 }
\ No newline at end of file

From af1f0a7b9a052641694ac31aa24c84a0fa175f4e Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 15:54:25 +0530
Subject: [PATCH 09/23] hb

---
 R/bmerge.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 1d6ff4f71a..6e44a40b59 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -67,7 +67,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
       # Incompatible factor join: Factor vs (Not Factor and Not Character)
       # The 'message' attribute must match the *old* error for direct calls to bmerge (e.g., DT[otherDT])
       condition_message <- sprintf(
-        "Incompatible join types: %s (%s) and %s (%s)", # Exact match for tests like 2044.24
+        "Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", # Exact match for tests like 2044.24
         xname, x_merge_type,
         iname, i_merge_type
       )

From 5d0b166ed867c3867003f7f04b302ce3622c9db7 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 17:38:23 +0530
Subject: [PATCH 10/23] hjg

---
 R/bmerge.R |  1 -
 R/merge.R  | 75 ++++++++++++++++++++++--------------------------------
 2 files changed, 30 insertions(+), 46 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 6e44a40b59..c3d5402db0 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -75,7 +75,6 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
       condition <- structure(
         list(
           message = condition_message,
-          call = NULL,
           c_bmerge_x_arg_bare_col_name = names(x)[xcol],
           c_bmerge_x_arg_type          = x_merge_type,
           c_bmerge_i_arg_bare_col_name = names(i)[icol],
diff --git a/R/merge.R b/R/merge.R
index 35872a6389..0afa5f17ac 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -1,7 +1,8 @@
 merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FALSE, all.x = all,
                all.y = all, sort = TRUE, suffixes = c(".x", ".y"), no.dups = TRUE, allow.cartesian=getOption("datatable.allow.cartesian"), incomparables=NULL, ...) {
 
-  # NO user_x_name / user_y_name here to maintain original variable environment for tests
+  # NO user_x_name / user_y_name at the top to maintain original variable environment for most of the function
+  # They will be fetched *only* within the error handler if needed.
 
   if (!sort %in% c(TRUE, FALSE))
     stopf("Argument 'sort' should be logical TRUE/FALSE")
@@ -39,33 +40,32 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
       stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
     if (!all(idx <- by.x %chin% nm_x)) {
-      stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx])) # Original: literal "x"
+      stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx]))
     }
     if (!all(idx <- by.y %chin% nm_y)) {
-      stopf("The following columns listed in `%s` are missing from %s: %s", "by.y", "y", brackify(by.y[!idx])) # Original: literal "y"
+      stopf("The following columns listed in `%s` are missing from %s: %s", "by.y", "y", brackify(by.y[!idx]))
     }
     by = by.x
     names(by) = by.y
   } else {
     if (is.null(by))
       by = intersect(key(x), key(y))
-    if (!length(by))   # was is.null() before PR#5183  changed to !length()
+    if (!length(by))
       by = key(x)
     if (!length(by))
       by = intersect(nm_x, nm_y)
     if (length(by) == 0L || !is.character(by))
       stopf("A non-empty vector of column names for `by` is required.")
     if (!all(idx <- by %in% nm_x)) {
-      stopf("The following columns listed in `%s` are missing from %s: %s", "by", "x", brackify(by[!idx])) # Original: literal "x"
+      stopf("The following columns listed in `%s` are missing from %s: %s", "by", "x", brackify(by[!idx]))
     }
     if (!all(idx <- by %in% nm_y)) {
-      stopf("The following columns listed in `%s` are missing from %s: %s", "by", "y", brackify(by[!idx])) # Original: literal "y"
+      stopf("The following columns listed in `%s` are missing from %s: %s", "by", "y", brackify(by[!idx]))
     }
     by = unname(by)
     by.x = by.y = by
   }
 
-  # warn about unused arguments #2587
   if (length(list(...))) {
     ell = as.list(substitute(list(...)))[-1L]
     for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n)
@@ -74,8 +74,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
       warningf("Passed %d unknown and unnamed arguments.", unnamed_n)
   }
 
-  start = setdiff(nm_x, by.x) # Original logic
-  end = setdiff(nm_y, by.y)   # Original logic
+  start = setdiff(nm_x, by.x)
+  end = setdiff(nm_y, by.y)
   dupnames = intersect(start, end)
   if (length(dupnames)) {
     start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
@@ -86,62 +86,47 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
   }
 
-  # implement incomparables argument #2587
   if (!is.null(incomparables)) {
-    # %fin% to be replaced when #5232 is implemented/closed
     "%fin%" = function(x_val, table_val) if (is.character(x_val) && is.character(table_val)) x_val %chin% table_val else x_val %in% table_val
-    xind = rowSums(x[, lapply(.SD, function(x_col_val) !(x_col_val %fin% incomparables)), .SDcols=by.x]) == length(by.x) # Original by.x usage
-    yind = rowSums(y[, lapply(.SD, function(y_col_val) !(y_col_val %fin% incomparables)), .SDcols=by.y]) == length(by.y) # Original by.y usage
-    # subset both so later steps still work
-    x = x[xind] # Original modification of x
-    y = y[yind] # Original modification of y
+    xind = rowSums(x[, lapply(.SD, function(x_col_val) !(x_col_val %fin% incomparables)), .SDcols=by.x]) == length(by.x)
+    yind = rowSums(y[, lapply(.SD, function(y_col_val) !(y_col_val %fin% incomparables)), .SDcols=by.y]) == length(by.y)
+    x = x[xind]
+    y = y[yind]
   }
 
-  # ----- MINIMAL MODIFICATION: tryCatch around the main join call -----
   dt <- tryCatch({
-      # Original join call, using 'by' as constructed by original logic above
       y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]
     },
     bmerge_incompatible_type_error = function(e) {
-      # Get table names *locally* only when this specific error occurs.
-      # These x_arg_name and y_arg_name refer to the 'x' and 'y' arguments of merge.data.table
-      x_arg_name <- deparse1(substitute(x, env = parent.frame(2))) # Go up to merge.data.table's frame
-      y_arg_name <- deparse1(substitute(y, env = parent.frame(2))) # Go up to merge.data.table's frame
-
-      # Ensure arg names are single, non-empty strings
-      final_x_arg_name <- if (is.null(x_arg_name) || length(x_arg_name) != 1L || !nzchar(x_arg_name[1L])) "arg 'x'" else x_arg_name[1L]
-      final_y_arg_name <- if (is.null(y_arg_name) || length(y_arg_name) != 1L || !nzchar(y_arg_name[1L])) "arg 'y'" else y_arg_name[1L]
-
-      # Extract column/type details from error object 'e'
-      # Assumes attributes on 'e' are valid single character strings from R/bmerge.R
-      # Mapping: merge's 'x' is bmerge's 'i'; merge's 'y' is bmerge's 'x'
-      col_from_x_arg <- e$c_bmerge_i_arg_bare_col_name
-      type_from_x_arg <- e$c_bmerge_i_arg_type
-      col_from_y_arg <- e$c_bmerge_x_arg_bare_col_name
-      type_from_y_arg <- e$c_bmerge_x_arg_type
+      # For merge(x=DT1, y=DT2), DT1 (user's 'x') is bmerge's 'i'
+      #                            DT2 (user's 'y') is bmerge's 'x'
+      x_part_col_name <- e$c_bmerge_i_arg_bare_col_name
+      x_part_type     <- e$c_bmerge_i_arg_type
+      y_part_col_name <- e$c_bmerge_x_arg_bare_col_name
+      y_part_type     <- e$c_bmerge_x_arg_type
 
-      # Construct the concise error message
+      # Use literal "x." and "y." prefixes referring to the arguments of merge()
       msg <- sprintf(
-        "Incompatible join types for merge: table '%s' column '%s' (%s) and table '%s' column '%s' (%s). Factor columns must join to factor or character columns.",
-        final_x_arg_name, col_from_x_arg, type_from_x_arg,
-        final_y_arg_name, col_from_y_arg, type_from_y_arg
+        "Incompatible join types: x.%s (%s) and y.%s (%s). Factor columns must join to factor or character columns.",
+        x_part_col_name, x_part_type,  # Corresponds to merge() argument 'x'
+        y_part_col_name, y_part_type   # Corresponds to merge() argument 'y'
       )
       
-      stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition"), call = NULL))
+      # Remove call = NULL to get "Error in merge.data.table(...): " prefix.
+      stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition")))
     }
   )
-  # ----- END MINIMAL MODIFICATION -----
 
-  if (all.y && nrow(y)) {  # Original logic
+  if (all.y && nrow(y)) {
     missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
   }
   
-  newend = setdiff(nm_y, by.y) # Original logic
-  setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend)) # Original logic
-  setnames(dt, c(by.x, start, end)) # Original logic
+  newend = setdiff(nm_y, by.y)
+  setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
+  setnames(dt, c(by.x, start, end))
   if (nrow(dt) > 0L) {
-    setkeyv(dt, if (sort) by.x else NULL) # Original logic
+    setkeyv(dt, if (sort) by.x else NULL)
   }
 
   resultdupnames = names(dt)[duplicated(names(dt))]

From 56d696d78121ca706e40d1d0d06f5293d5a753b3 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Tue, 3 Jun 2025 20:09:39 +0530
Subject: [PATCH 11/23] jhdf

---
 R/bmerge.R | 74 +++++++++++++++++++++++++++++++++++++-----------------
 R/merge.R  | 47 +++++++++++++++++++---------------
 2 files changed, 78 insertions(+), 43 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index c3d5402db0..255d7dbdfe 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -1,12 +1,17 @@
+
+
 mergeType = function(x) {
   ans = typeof(x)
   if      (ans=="integer") { if (is.factor(x))             ans = "factor"    }
   else if (ans=="double")  { if (inherits(x, "integer64")) ans = "integer64" }
+  # do not call fitsInInt*(x) yet because i) if both types are double we don't need to coerce even if one or both sides
+  # are int-as-double, and ii) to save calling it until we really need it
   ans
 }
 
 cast_with_attrs = function(x, cast_fun) {
   ans = cast_fun(x)
+  # do not copy attributes when coercing factor (to character)
   if (!is.factor(x) && !is.null(attributes(x))) attributes(ans) = attributes(x)
   ans
 }
@@ -24,6 +29,18 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
 {
   callersi = i
   i = shallow(i)
+  # Just before the call to bmerge() in [.data.table there is a shallow() copy of i to prevent coercions here
+  # by bmerge changing the type of the user's input object by reference. We now shallow copy i again. If we then
+  # coerce a column in i only, we are just changing the temporary coercion used for the merge operation. If we
+  # set callersi too then we are keeping that coerced i column in the merge result returned to user.
+  # The type of the i column is always returned (i.e. just i set not callersi too), other than:
+  #   i) to convert int-as-double to int, useful for ad hoc joins when the L postfix is often forgotten.
+  #  ii) to coerce i.factor to character when joining to x.character
+  # So those are the only two uses of callersi below.
+  # Careful to only use plonk syntax (full column) on i and x from now on, otherwise user's i and x would
+  # change. This is why shallow() is very importantly internal only, currently.
+
+  # Using .SD in j to join could fail due to being locked and set() being used here, #1926
   .Call(C_unlock, i)
   x = shallow(x)
   .Call(C_unlock, x)
@@ -35,6 +52,10 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
   supported = c(ORDERING_TYPES, "factor", "integer64")
 
   if (nrow(i)) for (a in seq_along(icols)) {
+    # - check that join columns have compatible types
+    # - do type coercions if necessary on just the shallow local copies for the purpose of join
+    # - handle factor columns appropriately
+    # Note that if i is keyed, if this coerces i's key gets dropped by set()
     icol = icols[a]
     xcol = xcols[a]
     x_merge_type = mergeType(x[[xcol]])
@@ -43,48 +64,44 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     iname = paste0("i.", names(i)[icol])
     if (!x_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", xname, x_merge_type)
     if (!i_merge_type %chin% supported) stopf("%s is type %s which is not supported by data.table join", iname, i_merge_type)
-
     if (x_merge_type=="factor" || i_merge_type=="factor") {
       if (roll!=0.0 && a==length(icols))
         stopf("Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined.", xname, iname)
       if (x_merge_type=="factor" && i_merge_type=="factor") {
         if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname)
-        set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]])
+        set(i, j=icol, value=chmatch(levels(i[[icol]]), levels(x[[xcol]]), nomatch=0L)[i[[icol]]])  # nomatch=0L otherwise a level that is missing would match to NA values
         next
       } else {
         if (x_merge_type=="character") {
           coerce_col(i, icol, "factor", "character", iname, xname, verbose=verbose)
-          set(callersi, j=icol, value=i[[icol]])
+          set(callersi, j=icol, value=i[[icol]])  # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
           next
         } else if (i_merge_type=="character") {
           if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
           newvalue = chmatch(i[[icol]], levels(x[[xcol]]), nomatch=0L)
-          if (anyNA(i[[icol]])) newvalue[is.na(i[[icol]])] = NA_integer_
+          if (anyNA(i[[icol]])) newvalue[is.na(i[[icol]])] = NA_integer_  # NA_character_ should match to NA in factor, #3809
           set(i, j=icol, value=newvalue)
           next
         }
       }
-      # Incompatible factor join: Factor vs (Not Factor and Not Character)
-      # The 'message' attribute must match the *old* error for direct calls to bmerge (e.g., DT[otherDT])
       condition_message <- sprintf(
         "Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", # Exact match for tests like 2044.24
         xname, x_merge_type,
         iname, i_merge_type
       )
-
       condition <- structure(
         list(
           message = condition_message,
-          c_bmerge_x_arg_bare_col_name = names(x)[xcol],
-          c_bmerge_x_arg_type          = x_merge_type,
-          c_bmerge_i_arg_bare_col_name = names(i)[icol],
-          c_bmerge_i_arg_type          = i_merge_type
+          bmerge_x_arg_col_name      = names(x)[xcol],
+          bmerge_x_arg_type          = x_merge_type,
+          bmerge_i_arg_col_name      = names(i)[icol],
+          bmerge_i_arg_type          = i_merge_type
         ),
         class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
       )
       stop(condition)
     }
-
+    # we check factors first to cater for the case when trying to do rolling joins on factors
     if (x_merge_type == i_merge_type) {
       if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, x_merge_type, xname)
       next
@@ -99,23 +116,24 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         coerce_col(x, xcol, x_merge_type, i_merge_type, xname, iname, from_detail=gettext(" (all-NA)"), verbose=verbose)
         next
       }
-      # This 'stopf' might have been the one originally hit by the failing tests.
-      # Our custom error above now preempts this for incompatible factor joins.
       stopf("Incompatible join types: %s (%s) and %s (%s)", xname, x_merge_type, iname, i_merge_type)
     }
     if (x_merge_type=="integer64" || i_merge_type=="integer64") {
       nm = c(iname, xname)
-      if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) }
+      if (x_merge_type=="integer64") { w=i; wc=icol; wclass=i_merge_type; } else { w=x; wc=xcol; wclass=x_merge_type; nm=rev(nm) }  # w is which to coerce
       if (wclass=="integer" || (wclass=="double" && fitsInInt64(w[[wc]]))) {
         from_detail = if (wclass == "double") gettext(" (which has integer64 representation, e.g. no fractions)") else ""
         coerce_col(w, wc, wclass, "integer64", nm[1L], nm[2L], from_detail, verbose=verbose)
       } else stopf("Incompatible join types: %s is type integer64 but %s is type double and cannot be coerced to integer64 (e.g. has fractions)", nm[2L], nm[1L])
     } else {
-      ic_idx = which(icol == icols)
+      # just integer and double left
+      ic_idx = which(icol == icols) # check if on is joined on multiple conditions, #6602
       if (i_merge_type=="double") {
         coerce_x = FALSE
         if (fitsInInt32(i[[icol]])) {
           coerce_x = TRUE
+          # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys
+          # we've always coerced to int and returned int, for convenience.
           if (length(ic_idx)>1L) {
             xc_idx = xcols[ic_idx]
             for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) {
@@ -128,7 +146,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
           if (coerce_x) {
             from_detail = gettext(" (which contains no fractions)")
             coerce_col(i, icol, "double", "integer", iname, xname, from_detail, verbose=verbose)
-            set(callersi, j=icol, value=i[[icol]])
+            set(callersi, j=icol, value=i[[icol]])       # change the shallow copy of i up in [.data.table to reflect in the result, too.
             if (length(ic_idx)>1L) {
               xc_idx = xcols[ic_idx]
               for (xb in xc_idx[which(vapply_1c(.shallow(x, xc_idx), mergeType) == "double")]) {
@@ -152,8 +170,11 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     }
   }
 
-  non_equi = which.first(ops != 1L)
+  ## after all modifications of x, check if x has a proper key on all xcols.
+  ## If not, calculate the order. Also for non-equi joins, the order must be calculated.
+  non_equi = which.first(ops != 1L) # 1 is "==" operator
   if (is.na(non_equi)) {
+    # equi join. use existing key (#1825) or existing secondary index (#1439)
     if (identical(xcols, head(chmatch(key(x), names(x)), length(xcols)))) {
       xo = integer(0L)
       if (verbose) catf("on= matches existing key, using key\n")
@@ -167,21 +188,27 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         if (verbose) {last.started.at=proc.time(); flush.console()}
         xo = forderv(x, by = xcols)
         if (verbose) {catf("Calculated ad hoc index in %s\n", timetaken(last.started.at)); flush.console()}
+        # TODO: use setindex() instead, so it's cached for future reuse
       }
     }
+    ## these variables are only needed for non-equi joins. Set them to default.
     nqgrp = integer(0L)
     nqmaxgrp = 1L
   } else {
+    # non-equi operators present.. investigate groups..
     nqgrp = integer(0L)
     nqmaxgrp = 1L
     if (verbose) catf("Non-equi join operators detected ... \n")
     if (roll != FALSE) stopf("roll is not implemented for non-equi joins yet.")
     if (verbose) {last.started.at=proc.time();catf("  forder took ... ");flush.console()}
+    # TODO: could check/reuse secondary indices, but we need 'starts' attribute as well!
     xo = forderv(x, xcols, retGrp=TRUE)
-    if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
+    if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
     xg = attr(xo, 'starts', exact=TRUE)
     resetcols = head(xcols, non_equi-1L)
     if (length(resetcols)) {
+      # TODO: can we get around having to reorder twice here?
+      # or at least reuse previous order?
       if (verbose) {last.started.at=proc.time();catf("  Generating group lengths ... ");flush.console()}
       resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts', exact=TRUE)
       resetlen = .Call(Cuniqlengths, resetlen, nrow(x))
@@ -190,8 +217,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     if (verbose) {last.started.at=proc.time();catf("  Generating non-equi group ids ... ");flush.console()}
     nqgrp = .Call(Cnestedid, x, xcols[non_equi:length(xcols)], xo, xg, resetlen, mult)
     if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console()}
-    if (length(nqgrp)) nqmaxgrp = max(nqgrp)
-    if (nqmaxgrp > 1L) {
+    if (length(nqgrp)) nqmaxgrp = max(nqgrp) # fix for #1986, when 'x' is 0-row table max(.) returns -Inf.
+    if (nqmaxgrp > 1L) { # got some non-equi join work to do
       if ("_nqgrp_" %in% names(x)) stopf("Column name '_nqgrp_' is reserved for non-equi joins.")
       if (verbose) {last.started.at=proc.time();catf("  Recomputing forder with non-equi ids ... ");flush.console()}
       set(nqx<-shallow(x), j="_nqgrp_", value=nqgrp)
@@ -205,7 +232,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
   if (verbose) {last.started.at=proc.time();catf("Starting bmerge ...\n");flush.console()}
   ans = .Call(Cbmerge, i, x, as.integer(icols), as.integer(xcols), xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp)
   if (verbose) {catf("bmerge done in %s\n",timetaken(last.started.at)); flush.console()}
+  # TO DO: xo could be moved inside Cbmerge
 
-  ans$xo = xo
+  ans$xo = xo  # for further use by [.data.table
   ans
-}
\ No newline at end of file
+}
diff --git a/R/merge.R b/R/merge.R
index 0afa5f17ac..6cbb67dd62 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -1,9 +1,5 @@
 merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FALSE, all.x = all,
                all.y = all, sort = TRUE, suffixes = c(".x", ".y"), no.dups = TRUE, allow.cartesian=getOption("datatable.allow.cartesian"), incomparables=NULL, ...) {
-
-  # NO user_x_name / user_y_name at the top to maintain original variable environment for most of the function
-  # They will be fetched *only* within the error handler if needed.
-
   if (!sort %in% c(TRUE, FALSE))
     stopf("Argument 'sort' should be logical TRUE/FALSE")
   if (!no.dups %in% c(TRUE, FALSE))
@@ -12,7 +8,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   if (!is.data.table(y)) {
     y = as.data.table(y)
     if (missing(by) && missing(by.x)) {
-      by = key(x) # Original logic
+      by = key(x)
     }
   }
   x0 = length(x) == 0L
@@ -21,17 +17,17 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (x0 && y0)
       warningf("Neither of the input data.tables to join have columns.")
     else if (x0)
-      warningf("Input data.table '%s' has no columns.", "x") # Original: literal "x"
+      warningf("Input data.table '%s' has no columns.", "x")
     else
-      warningf("Input data.table '%s' has no columns.", "y") # Original: literal "y"
+      warningf("Input data.table '%s' has no columns.", "y")
   }
   check_duplicate_names(x)
   check_duplicate_names(y)
 
-  nm_x = names(x) # Original logic
-  nm_y = names(y) # Original logic
+  nm_x = names(x)
+  nm_y = names(y)
 
-  ## set up 'by'/'by.x'/'by.y' - RETAIN ORIGINAL LOGIC EXACTLY
+  ## set up 'by'/'by.x'/'by.y'
   if ((!is.null(by.x) || !is.null(by.y)) && length(by.x) != length(by.y))
     stopf("`by.x` and `by.y` must be of same length.")
   if (!missing(by) && !missing(by.x))
@@ -50,7 +46,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   } else {
     if (is.null(by))
       by = intersect(key(x), key(y))
-    if (!length(by))
+    if (!length(by))   # was is.null() before PR#5183  changed to !length()
       by = key(x)
     if (!length(by))
       by = intersect(nm_x, nm_y)
@@ -66,6 +62,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     by.x = by.y = by
   }
 
+  # warn about unused arguments #2587
   if (length(list(...))) {
     ell = as.list(substitute(list(...)))[-1L]
     for (n in setdiff(names(ell), "")) warningf("Unknown argument '%s' has been passed.", n)
@@ -73,7 +70,10 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (unnamed_n)
       warningf("Passed %d unknown and unnamed arguments.", unnamed_n)
   }
-
+  # with i. prefix in v1.9.3, this goes away. Left here for now ...
+  ## sidestep the auto-increment column number feature-leading-to-bug by
+  ## ensuring no names end in ".1", see unit test
+  ## "merge and auto-increment columns in y[x]" in test-data.frame.like.R
   start = setdiff(nm_x, by.x)
   end = setdiff(nm_y, by.y)
   dupnames = intersect(start, end)
@@ -81,11 +81,14 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
     end[chmatch(dupnames, end, 0L)] = paste0(dupnames, suffixes[2L])
   }
+  # If no.dups = TRUE we also need to added the suffix to columns in y
+  # that share a name with by.x
   dupkeyx = intersect(by.x, end)
   if (no.dups && length(dupkeyx)) {
     end[chmatch(dupkeyx, end, 0L)] = paste0(dupkeyx, suffixes[2L])
   }
 
+  # implement incomparables argument #2587
   if (!is.null(incomparables)) {
     "%fin%" = function(x_val, table_val) if (is.character(x_val) && is.character(table_val)) x_val %chin% table_val else x_val %in% table_val
     xind = rowSums(x[, lapply(.SD, function(x_col_val) !(x_col_val %fin% incomparables)), .SDcols=by.x]) == length(by.x)
@@ -100,16 +103,16 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     bmerge_incompatible_type_error = function(e) {
       # For merge(x=DT1, y=DT2), DT1 (user's 'x') is bmerge's 'i'
       #                            DT2 (user's 'y') is bmerge's 'x'
-      x_part_col_name <- e$c_bmerge_i_arg_bare_col_name
-      x_part_type     <- e$c_bmerge_i_arg_type
-      y_part_col_name <- e$c_bmerge_x_arg_bare_col_name
-      y_part_type     <- e$c_bmerge_x_arg_type
+      x_part_col_name <- e$bmerge_i_arg_col_name
+      x_part_type     <- e$bmerge_i_arg_type
+      y_part_col_name <- e$bmerge_x_arg_col_name
+      y_part_type     <- e$bmerge_x_arg_type
 
       # Use literal "x." and "y." prefixes referring to the arguments of merge()
       msg <- sprintf(
-        "Incompatible join types: x.%s (%s) and y.%s (%s). Factor columns must join to factor or character columns.",
-        x_part_col_name, x_part_type,  # Corresponds to merge() argument 'x'
-        y_part_col_name, y_part_type   # Corresponds to merge() argument 'y'
+        "Incompatible join types: x.%s (%s) and i.%s (%s). Factor columns must join to factor or character columns.",
+        x_part_col_name, x_part_type, 
+        y_part_col_name, y_part_type   
       )
       
       # Remove call = NULL to get "Error in merge.data.table(...): " prefix.
@@ -121,19 +124,23 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
   }
-  
+  # X[Y] syntax puts JIS i columns at the end, merge likes them alongside i.
   newend = setdiff(nm_y, by.y)
+  # fix for #1290, make sure by.y order is set properly before naming
   setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
   setnames(dt, c(by.x, start, end))
   if (nrow(dt) > 0L) {
     setkeyv(dt, if (sort) by.x else NULL)
   }
 
+  # Throw warning if there are duplicate column names in 'dt' (i.e. if
+  # `suffixes=c("","")`, to match behaviour in base:::merge.data.frame)
   resultdupnames = names(dt)[duplicated(names(dt))]
   if (length(resultdupnames)) {
     warningf("column names %s are duplicated in the result", brackify(resultdupnames))
   }
 
+  # retain custom classes of first argument that resulted in dispatch to this method, #1378
   setattr(dt, "class", class_x)
   dt
 }
\ No newline at end of file

From bb0d6e6c72ca421e82454e39528f4aad778e9f38 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Thu, 5 Jun 2025 13:03:50 +0530
Subject: [PATCH 12/23] modification in bmerge and merge

---
 R/bmerge.R | 13 ++++++++++++-
 R/merge.R  | 18 +++++++++++++++---
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index dffca5e44f..8b6074f456 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -84,7 +84,18 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
           next
         }
       }
-      stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
+      condition_message = sprintf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
+      condition <- structure(
+        list(
+          message = condition_message,
+          bmerge_x_arg_col_name      = names(x)[xcol],
+          bmerge_x_arg_type          = x_merge_type,
+          bmerge_i_arg_col_name      = names(i)[icol],
+          bmerge_i_arg_type          = i_merge_type
+        ),
+        class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
+      )
+      stop(condition)
     }
     # we check factors first to cater for the case when trying to do rolling joins on factors
     if (x_merge_type == i_merge_type) {
diff --git a/R/merge.R b/R/merge.R
index 4d7245983e..f6394255b5 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -92,13 +92,25 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   if (!is.null(incomparables)) {
     # %fin% to be replaced when #5232 is implemented/closed
     "%fin%" = function(x, table) if (is.character(x) && is.character(table)) x %chin% table else x %in% table
-    xind = rowSums(x[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.x]) == length(by)
-    yind = rowSums(y[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.y]) == length(by)
+    xind = rowSums(x[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.x]) == length(by.x)
+    yind = rowSums(y[, lapply(.SD, function(x) !(x %fin% incomparables)), .SDcols=by.y]) == length(by.y)
     # subset both so later steps still work
     x = x[xind]
     y = y[yind]
   }
-  dt = y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]   # includes JIS columns (with a i. prefix if conflict with x names)
+  dt = tryCatch({
+      y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]
+    },
+    bmerge_incompatible_type_error = function(e) {
+      x_part_col_name = e$bmerge_i_arg_col_name
+      x_part_type     = e$bmerge_i_arg_type
+      y_part_col_name = e$bmerge_x_arg_col_name
+      y_part_type     = e$bmerge_x_arg_type
+
+      msg = sprintf("Incompatible join types: x.%s (%s) and i.%s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type)
+      stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition")))
+    }
+  )
 
   if (all.y && nrow(y)) {  # If y does not have any rows, no need to proceed
     # Perhaps not very commonly used, so not a huge deal that the join is redone here.

From ccd481f57ef21dd458be134f5f2fa7ac8921e377 Mon Sep 17 00:00:00 2001
From: Mukul Kumar <mukulrao6480@gmail.com>
Date: Fri, 6 Jun 2025 20:05:10 +0530
Subject: [PATCH 13/23] add test and appropriate changes

---
 R/bmerge.R            | 11 ++++-------
 R/merge.R             | 24 +++++++-----------------
 inst/tests/tests.Rraw |  5 +++++
 3 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 255d7dbdfe..13c34cee78 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -84,20 +84,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
           next
         }
       }
-      condition_message <- sprintf(
-        "Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", # Exact match for tests like 2044.24
-        xname, x_merge_type,
-        iname, i_merge_type
-      )
-      condition <- structure(
+      condition_message = gettextf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
+      condition = structure(
         list(
           message = condition_message,
+          call = sys.call(sys.nframe()-1L),
           bmerge_x_arg_col_name      = names(x)[xcol],
           bmerge_x_arg_type          = x_merge_type,
           bmerge_i_arg_col_name      = names(i)[icol],
           bmerge_i_arg_type          = i_merge_type
         ),
-        class = c("bmerge_incompatible_type_error", "data.table_error", "error", "condition")
+        class = c("dt_bmerge_incompatible_type_error", "error", "condition")
       )
       stop(condition)
     }
diff --git a/R/merge.R b/R/merge.R
index 6cbb67dd62..63ce50aa7d 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -97,26 +97,16 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     y = y[yind]
   }
 
-  dt <- tryCatch({
+  dt = tryCatch({
       y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]
     },
-    bmerge_incompatible_type_error = function(e) {
-      # For merge(x=DT1, y=DT2), DT1 (user's 'x') is bmerge's 'i'
-      #                            DT2 (user's 'y') is bmerge's 'x'
-      x_part_col_name <- e$bmerge_i_arg_col_name
-      x_part_type     <- e$bmerge_i_arg_type
-      y_part_col_name <- e$bmerge_x_arg_col_name
-      y_part_type     <- e$bmerge_x_arg_type
+    dt_bmerge_incompatible_type_error = function(e) {
+      x_part_col_name = e$bmerge_i_arg_col_name
+      x_part_type     = e$bmerge_i_arg_type
+      y_part_col_name = e$bmerge_x_arg_col_name
+      y_part_type     = e$bmerge_x_arg_type
 
-      # Use literal "x." and "y." prefixes referring to the arguments of merge()
-      msg <- sprintf(
-        "Incompatible join types: x.%s (%s) and i.%s (%s). Factor columns must join to factor or character columns.",
-        x_part_col_name, x_part_type, 
-        y_part_col_name, y_part_type   
-      )
-      
-      # Remove call = NULL to get "Error in merge.data.table(...): " prefix.
-      stop(errorCondition(message = msg, class = c("datatable_merge_type_error", "data.table_error", "error", "condition")))
+      stopf("Incompatible join types: x.%s (%s) and i.%s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type, class = "dt_merge_incompatible_type_error")
     }
   )
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c9cfe6244b..722209d32c 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21157,3 +21157,8 @@ test(2317.6, DT1[DF1, on='a', .(d = x.a + i.d)]$d, 5)
 test(2317.7, DT1[DF2, on='a', e := i.e]$e, 5)
 test(2317.8, DT1[DF2, on='a', e2 := x.a + i.e]$e2, 6)
 test(2317.9, DT1[DF2, on='a', .(e = x.a + i.e)]$e, 6)
+
+# Test for incompatible factor joins rephrased by merge.data.table(#7048)
+DT1=data.table(a=factor('a'))
+DT2=data.table(a=1L)
+test(2318, merge(DT1, DT2, by = "a"), error = "Incompatible join types: x.a (factor) and i.a (integer). Factor columns must join to factor or character columns.")

From 495a06d02b37d7cced1920011e01b3084d54e3d0 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Mon, 9 Jun 2025 13:21:40 -0700
Subject: [PATCH 14/23] terminal newline

---
 R/merge.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/merge.R b/R/merge.R
index 63ce50aa7d..c85cde7145 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -133,4 +133,4 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   # retain custom classes of first argument that resulted in dispatch to this method, #1378
   setattr(dt, "class", class_x)
   dt
-}
\ No newline at end of file
+}

From 62b7c8a75ec4299b36f55732af26beb192516de0 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Mon, 9 Jun 2025 13:25:28 -0700
Subject: [PATCH 15/23] avoid structure()

---
 R/bmerge.R | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 13c34cee78..0331624200 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -85,17 +85,15 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         }
       }
       condition_message = gettextf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", xname, x_merge_type, iname, i_merge_type)
-      condition = structure(
-        list(
-          message = condition_message,
-          call = sys.call(sys.nframe()-1L),
-          bmerge_x_arg_col_name      = names(x)[xcol],
-          bmerge_x_arg_type          = x_merge_type,
-          bmerge_i_arg_col_name      = names(i)[icol],
-          bmerge_i_arg_type          = i_merge_type
-        ),
-        class = c("dt_bmerge_incompatible_type_error", "error", "condition")
+      condition = list(
+        message = condition_message,
+        call = sys.call(sys.nframe() - 1L),
+        bmerge_x_arg_col_name      = names(x)[xcol],
+        bmerge_x_arg_type          = x_merge_type,
+        bmerge_i_arg_col_name      = names(i)[icol],
+        bmerge_i_arg_type          = i_merge_type
       )
+      class(condition) = c("dt_bmerge_incompatible_type_error", "error", "condition")
       stop(condition)
     }
     # we check factors first to cater for the case when trying to do rolling joins on factors

From 1c7d2501715e2356b41da895b1287e0b065cde20 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 9 Jun 2025 13:28:14 -0700
Subject: [PATCH 16/23] disable irrelevant linter

---
 .ci/.lintr.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.ci/.lintr.R b/.ci/.lintr.R
index f081763b41..c4f6d0c5e1 100644
--- a/.ci/.lintr.R
+++ b/.ci/.lintr.R
@@ -40,6 +40,9 @@ linters = c(dt_linters, all_linters(
   # TODO(lintr#2442): Use this once x[ , j, by] is supported.
   commas_linter = NULL,
   commented_code_linter = NULL,
+  # mostly, we just use stopf() & friends, but ignore this for the
+  #   rare cases we need plain stop (e.g. #7048)
+  condition_call_linter = NULL,
   # TODO(linter->3.2.0): Activate this.
   consecutive_assertion_linter = NULL,
   cyclocomp_linter = NULL,

From 3e3004663398a6a1a9c8750aa411cfecc0ee3cbe Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 9 Jun 2025 13:29:56 -0700
Subject: [PATCH 17/23] correct prefix :)

---
 R/merge.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/merge.R b/R/merge.R
index c85cde7145..92162cb6cf 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -106,7 +106,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
       y_part_col_name = e$bmerge_x_arg_col_name
       y_part_type     = e$bmerge_x_arg_type
 
-      stopf("Incompatible join types: x.%s (%s) and i.%s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type, class = "dt_merge_incompatible_type_error")
+      stopf("Incompatible join types: x.%s (%s) and y.%s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type, class = "dt_merge_incompatible_type_error")
     }
   )
 

From 672bdbb4f81aa673b3551262ee325ee68ecbe47c Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 9 Jun 2025 13:30:13 -0700
Subject: [PATCH 18/23] correct prefix in tests

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 722209d32c..c8bb42ae0e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21161,4 +21161,4 @@ test(2317.9, DT1[DF2, on='a', .(e = x.a + i.e)]$e, 6)
 # Test for incompatible factor joins rephrased by merge.data.table(#7048)
 DT1=data.table(a=factor('a'))
 DT2=data.table(a=1L)
-test(2318, merge(DT1, DT2, by = "a"), error = "Incompatible join types: x.a (factor) and i.a (integer). Factor columns must join to factor or character columns.")
+test(2318, merge(DT1, DT2, by = "a"), error = "Incompatible join types: x.a (factor) and y.a (integer). Factor columns must join to factor or character columns.")

From fe23e002256362bb8fff9151d3c9187f6f7bcdf9 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 9 Jun 2025 13:33:55 -0700
Subject: [PATCH 19/23] style: avoid {} if not needed

---
 R/merge.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/merge.R b/R/merge.R
index 92162cb6cf..5e3d533a80 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -97,9 +97,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     y = y[yind]
   }
 
-  dt = tryCatch({
-      y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian]
-    },
+  dt = tryCatch(
+      y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian],
     dt_bmerge_incompatible_type_error = function(e) {
       x_part_col_name = e$bmerge_i_arg_col_name
       x_part_type     = e$bmerge_i_arg_type

From fc67d724981fa2a6cd9547a13b286c0962dc2d03 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 9 Jun 2025 13:34:19 -0700
Subject: [PATCH 20/23] ws

---
 R/merge.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/merge.R b/R/merge.R
index 5e3d533a80..2af03a3b49 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -98,7 +98,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   }
 
   dt = tryCatch(
-      y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian],
+    y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian],
     dt_bmerge_incompatible_type_error = function(e) {
       x_part_col_name = e$bmerge_i_arg_col_name
       x_part_type     = e$bmerge_i_arg_type

From 18a0662d477c7b59b0058a44005047312a74ad28 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 9 Jun 2025 13:37:41 -0700
Subject: [PATCH 21/23] Apply prefix outside translation to get identical
 messages in gettext

---
 R/merge.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/merge.R b/R/merge.R
index 2af03a3b49..a4d41b5031 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -100,12 +100,12 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   dt = tryCatch(
     y[x, nomatch=if (all.x) NA else NULL, on=by, allow.cartesian=allow.cartesian],
     dt_bmerge_incompatible_type_error = function(e) {
-      x_part_col_name = e$bmerge_i_arg_col_name
+      x_part_col_name = paste0("x.", e$bmerge_i_arg_col_name)
       x_part_type     = e$bmerge_i_arg_type
-      y_part_col_name = e$bmerge_x_arg_col_name
+      y_part_col_name = paste0("y.", e$bmerge_x_arg_col_name)
       y_part_type     = e$bmerge_x_arg_type
 
-      stopf("Incompatible join types: x.%s (%s) and y.%s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type, class = "dt_merge_incompatible_type_error")
+      stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type, class = "dt_merge_incompatible_type_error")
     }
   )
 

From ae572ce596c38e2c909281da42ceab47d1547b90 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 9 Jun 2025 13:42:19 -0700
Subject: [PATCH 22/23] restore comment

---
 R/merge.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/merge.R b/R/merge.R
index a4d41b5031..37c1bd6641 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -109,7 +109,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     }
   )
 
-  if (all.y && nrow(y)) {
+  if (all.y && nrow(y)) {  # If y does not have any rows, no need to proceed
+    # Perhaps not very commonly used, so not a huge deal that the join is redone here.
     missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
   }

From 4cbe8b9d11f7445c4be48a21319850a0e08f4d46 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 9 Jun 2025 13:42:59 -0700
Subject: [PATCH 23/23] drop custom class in merge() error for now

---
 R/merge.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/merge.R b/R/merge.R
index 37c1bd6641..cc15b830bf 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -105,7 +105,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
       y_part_col_name = paste0("y.", e$bmerge_x_arg_col_name)
       y_part_type     = e$bmerge_x_arg_type
 
-      stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type, class = "dt_merge_incompatible_type_error")
+      stopf("Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns.", x_part_col_name, x_part_type, y_part_col_name, y_part_type)
     }
   )