@@ -792,59 +792,62 @@ if (loaded[["DBI"]] && loaded[["RSQLite"]]) {
792792 # ln, rn: lhs names, rhs names, symmult: symmetric mult
793793 mult_all = function(tbl, cols, ...) sprintf(
794794 "(\n SELECT %s FROM %s \n) %s",
795- paste(setdiff(cols,"row_id"), collapse=", "), tbl, tbl
795+ paste(setdiff(cols, "row_id"), collapse=", "), tbl, tbl
796796 )
797797 mult_one = function(tbl, cols, on, mult) sprintf(
798798 "(SELECT %s FROM (\n SELECT *, ROW_NUMBER() OVER (PARTITION BY %s ORDER BY row_id %s) AS rownum FROM %s \n) %s WHERE rownum=1) %s",
799- paste(setdiff(cols,c("row_id","rownum")), collapse=", "),
799+ paste(setdiff(cols, c("row_id", "rownum")), collapse=", "),
800800 paste(on, collapse=", "),
801801 if (mult=="first") "ASC" else "DESC",
802802 tbl, tbl, tbl
803803 )
804804 sql = function(how, on, mult, ln, rn, symmult=FALSE, notjoin=FALSE) {
805- stopifnot(length(on)== 1L)
805+ stopifnot(length(on) == 1L)
806806 # building sql query
807- if (how== "full") {
807+ if (how == "full") {
808808 return(sprintf(
809809 "%s \n UNION ALL \n %s",
810- sql("left", on, mult, ln, rn, symmult=mult%in%c("first","last")),
811- sql("right", on, mult, ln, rn, symmult=mult%in%c("first","last"), notjoin=TRUE)
810+ sql("left", on, mult, ln, rn, symmult=mult %in% c("first", "last")),
811+ sql("right", on, mult, ln, rn, symmult=mult %in% c("first", "last"), notjoin=TRUE)
812812 ))
813813 }
814814 nm = list()
815815 nm[["lhs"]] = ln; nm[["rhs"]] = rn
816816 using = sprintf("USING (%s)", paste(on, collapse=", "))
817817 lhs = "lhs"; rhs = "rhs"
818- join = if (how== "inner") {
819- if (mult== "all") sprintf("%s \n INNER JOIN \n %s \n %s", mult_all(lhs, nm[[lhs]]), mult_all(rhs, nm[[rhs]]), using)
820- else sprintf("%s \n INNER JOIN \n %s \n %s", mult_one(lhs, nm[[lhs]], on, mult), mult_one(rhs, nm[[rhs]], on, mult), using)
821- } else if (how== "left") {
822- if (mult== "all") sprintf("%s \n LEFT JOIN \n %s \n %s", mult_all(lhs, nm[[lhs]]), mult_all(rhs, nm[[rhs]]), using)
823- else sprintf("%s \n LEFT JOIN \n %s \n %s", (if (symmult) mult_one else mult_all)(lhs, nm[[lhs]], on, mult), mult_one(rhs, nm[[rhs]], on, mult), using)
824- } else if (how== "right") { ## lhs-rhs swap happens here, mult_one is applied on new rhs
825- if (mult== "all") sprintf("%s \n LEFT JOIN \n %s \n %s", mult_all(rhs, nm[[rhs]]), mult_all(lhs, nm[[lhs]]), using)
826- else sprintf("%s \n LEFT JOIN \n %s \n %s", (if (symmult) mult_one else mult_all)(rhs, nm[[rhs]], on, mult), mult_one(lhs, nm[[lhs]], on, mult), using)
818+ join = if (how == "inner") {
819+ if (mult == "all") sprintf("%s \n INNER JOIN \n %s \n %s", mult_all(lhs, nm[[lhs]]), mult_all(rhs, nm[[rhs]]), using)
820+ else sprintf("%s \n INNER JOIN \n %s \n %s", mult_one(lhs, nm[[lhs]], on, mult), mult_one(rhs, nm[[rhs]], on, mult), using)
821+ } else if (how == "left") {
822+ if (mult == "all") sprintf("%s \n LEFT JOIN \n %s \n %s", mult_all(lhs, nm[[lhs]]), mult_all(rhs, nm[[rhs]]), using)
823+ else sprintf("%s \n LEFT JOIN \n %s \n %s", (if (symmult) mult_one else mult_all)(lhs, nm[[lhs]], on, mult), mult_one(rhs, nm[[rhs]], on, mult), using)
824+ } else if (how == "right") { ## lhs-rhs swap happens here, mult_one is applied on new rhs
825+ if (mult == "all") sprintf("%s \n LEFT JOIN \n %s \n %s", mult_all(rhs, nm[[rhs]]), mult_all(lhs, nm[[lhs]]), using)
826+ else sprintf("%s \n LEFT JOIN \n %s \n %s", (if (symmult) mult_one else mult_all)(rhs, nm[[rhs]], on, mult), mult_one(lhs, nm[[lhs]], on, mult), using)
827827 }
828- if (how== "right") {lhs = "rhs"; rhs = "lhs"} ## this name swap is for notjoin and select below
828+ if (how == "right") {lhs = "rhs"; rhs = "lhs"} ## this name swap is for notjoin and select below
829829 where = if (!notjoin) "" else sprintf("\nWHERE %s IS NULL", paste(rhs, on, sep="."))
830- select = sprintf("%s, %s, %s", paste(lhs, on, sep="."),
831- paste("lhs", setdiff(nm[["lhs"]], c("row_id",on)),sep=".",collapse=", "),
832- paste("rhs", setdiff(nm[["rhs"]], c("row_id",on)),sep=".",collapse=", "))
830+ select = sprintf("%s, %s, %s",
831+ paste(lhs, on, sep="."),
832+ paste("lhs", setdiff(nm[["lhs"]], c("row_id", on)), sep=".", collapse=", "),
833+ paste("rhs", setdiff(nm[["rhs"]], c("row_id", on)), sep=".", collapse=", "))
833834 sprintf("SELECT %s FROM \n %s%s", select, join, where)
834835 }
835836
837+ dbSend <- function(..., silent=FALSE) try(suppressWarnings(DBI::dbSendQuery(...)), silent=silent)
838+
836839 # .conn SQLite connection, if provided it will use it instead of creating temporary one
837840 # .drop logical TRUE (default) will drop db tables before and after and populate new, when FALSE it expects tables to be populated
838841 join.sql.equal = function(l, on, how="inner", mult="all", allow.cartesian=TRUE, .conn, .drop=TRUE, .debug=interactive(), ans, err=FALSE) {
839842 nm = names(l)
840- stopifnot(is.null(nm) || identical(nm, c("x","i")) || identical(nm, c("lhs","rhs")))
841- names(l) = c("lhs","rhs")
843+ stopifnot(is.null(nm) || identical(nm, c("x", "i")) || identical(nm, c("lhs", "rhs")))
844+ names(l) = c("lhs", "rhs")
842845 lhs = l[["lhs"]]; rhs = l[["rhs"]]
843846 stopifnot(is.data.table(lhs), is.data.table(rhs),
844847 is.character(how), is.character(mult), length(mult)==1L,
845848 is.character(on),
846849 is.logical(allow.cartesian), is.logical(.drop))
847- if (err && mult== "error") {
850+ if (err && mult == "error") {
848851 dt = try(silent=TRUE, mergelist(list(lhs, rhs), on=on, how=how, mult=mult))
849852 if (!inherits(dt, "try-error")) {
850853 if (.debug) browser()
@@ -863,14 +866,14 @@ if (loaded[["DBI"]] && loaded[["RSQLite"]]) {
863866 # preparing sql environment
864867 conn = if (new.conn <- missing(.conn)) DBI::dbConnect(RSQLite::SQLite()) else .conn
865868 if (.drop) {
866- try(suppressWarnings(DBI::dbSendQuery( conn, "DROP TABLE lhs;")) , silent=TRUE)
867- try(suppressWarnings(DBI::dbSendQuery( conn, "DROP TABLE rhs;")) , silent=TRUE)
869+ dbSend( conn, "DROP TABLE lhs;", silent=TRUE)
870+ dbSend( conn, "DROP TABLE rhs;", silent=TRUE)
868871 DBI::dbWriteTable(conn, name="lhs", value=lhs)
869872 DBI::dbWriteTable(conn, name="rhs", value=rhs)
870873 }
871874 # building sql query
872875 s = sql(how, on, mult, names(lhs), names(rhs))
873- s = paste0(s,";\n")
876+ s = paste0(s, ";\n")
874877 # run data.table and SQLite
875878 dt = mergelist(list(lhs[,!"row_id"], rhs[,!"row_id"]), on=on, how=how, mult=mult)
876879 sq = try(silent=TRUE, as.data.table(DBI::dbGetQuery(conn, s)))
@@ -882,8 +885,8 @@ if (loaded[["DBI"]] && loaded[["RSQLite"]]) {
882885 if (.debug) {message("dt and sq must be data.table already"); browser()}
883886 stop("dt and sq must be data.table already")
884887 }
885- if (how %in% c("inner","full")) {
886- dt2 = mergelist(list(rhs[,!"row_id"], lhs[,!"row_id"]), on=on, how=how, mult=mult)
888+ if (how %in% c("inner", "full")) {
889+ dt2 = mergelist(list(rhs[, !"row_id"], lhs[, !"row_id"]), on=on, how=how, mult=mult)
887890 setcolorder(dt2, neworder=names(dt))
888891 setattr(dt, "index", integer())
889892 setattr(dt2, "index", integer())
@@ -907,8 +910,8 @@ if (loaded[["DBI"]] && loaded[["RSQLite"]]) {
907910 }
908911 }
909912 if (.drop) {
910- suppressWarnings(DBI::dbSendQuery( conn, "DROP TABLE lhs;") )
911- suppressWarnings(DBI::dbSendQuery( conn, "DROP TABLE rhs;") )
913+ dbSend( conn, "DROP TABLE lhs;")
914+ dbSend( conn, "DROP TABLE rhs;")
912915 }
913916 if (new.conn) suppressWarnings(DBI::dbDisconnect(conn))
914917 if (isTRUE(b) && !isTRUE(a)) {
@@ -917,37 +920,37 @@ if (loaded[["DBI"]] && loaded[["RSQLite"]]) {
917920 }
918921 if (!isTRUE(a)) {
919922 if (.debug) browser()
920- cat(sep="\n",c(
923+ cat(sep="\n", c(
921924 sprintf("# dtq:\nmergelist(l, on='%s', how='%s', mult='%s')", paste(on, collapse=", "), how, mult),
922925 sprintf("# sql:\n%s", s),
923926 a, "\n"))
924927 }
925928 isTRUE(a)
926929 }
927930
928- batch.join.sql.equal = function(cases, on, hows=c("inner","left","right","full"), mults=c("all","first","last"), .debug=FALSE) {
931+ batch.join.sql.equal = function(cases, on, hows=c("inner", "left", "right", "full"), mults=c("all", "first", "last"), .debug=FALSE) {
929932 if ("error" %in% mults) stop("mult=error is not supported")
930933 p = proc.time()[[3L]]
931934 conn = DBI::dbConnect(RSQLite::SQLite())
932935 ans = list()
933936 dup_n = 0L
934937 for (case in cases) {
935- l = data (case)
936- stopifnot(c("lhs","rhs") %in% names(l))
938+ l = case_data (case)
939+ stopifnot(c("lhs", "rhs") %in% names(l))
937940 case = as.character(case)
938941 lhs = l$lhs; rhs = l$rhs
939942 ans[[case]] = list()
940943 # reuse tables, to test if affects sqlite efficiency
941- try(suppressWarnings(DBI::dbSendQuery( conn, "DROP TABLE lhs;")) , silent = TRUE)
942- try(suppressWarnings(DBI::dbSendQuery( conn, "DROP TABLE rhs;")) , silent = TRUE)
944+ dbSend( conn, "DROP TABLE lhs;", silent = TRUE)
945+ dbSend( conn, "DROP TABLE rhs;", silent = TRUE)
943946 # row_id column required as SQL is not ordered, creating on R side
944947 if (!"row_id" %in% names(lhs)) lhs = copy(lhs)[, "row_id" := seq_len(.N)]
945948 if (!"row_id" %in% names(rhs)) rhs = copy(rhs)[, "row_id" := seq_len(.N)]
946949 DBI::dbWriteTable(conn, name="lhs", value=lhs)
947950 DBI::dbWriteTable(conn, name="rhs", value=rhs)
948951 len = prod(length(cases), length(hows), length(mults))
949952 if (len > (len.warn <- getOption("tests.length.warning", 1e3)))
950- warning(sprintf( "You are about to run %s number of tests. To suppress this warning use 'tests.length.warning' option, set to numeric threshold or Inf.", len.warn) )
953+ warning("You are about to run ", len.warn, " tests. To suppress this warning use the 'tests.length.warning' option." )
951954 for (how in hows) {
952955 ans[[case]][[how]] = list()
953956 for (mult in mults) {
@@ -958,64 +961,64 @@ if (loaded[["DBI"]] && loaded[["RSQLite"]]) {
958961 ans[[case]][[how]][[mult]] = join.sql.equal(list(lhs=lhs, rhs=rhs), on=on, how=how, mult=mult, .conn=conn, .drop=FALSE, .debug=.debug)
959962 }
960963 }
961- suppressWarnings(DBI::dbSendQuery( conn, "DROP TABLE lhs;") )
962- suppressWarnings(DBI::dbSendQuery( conn, "DROP TABLE rhs;") )
964+ dbSend( conn, "DROP TABLE lhs;")
965+ dbSend( conn, "DROP TABLE rhs;")
963966 }
964967 suppressWarnings(DBI::dbDisconnect(conn))
965968 cat(sprintf("batch.join.sql.equal: %s%s tests completed in %.1fs\n",
966969 len, if (dup_n) sprintf(" (%s duplicated)", dup_n) else "", proc.time()[[3L]] - p))
967970 ans
968971 }
969- data = function(case) {
972+ case_data = function(case) {
970973 set.seed(108)
971974 if (case == 1L) { # 2 match
972- lhs = data.table(id = c(1L,5L,3L,7L), v1=1:4)
973- rhs = data.table(id = c(2L,4L,3L,5L), v2=1:4)
975+ lhs = data.table(id = c(1L, 5L, 3L, 7L), v1=1:4)
976+ rhs = data.table(id = c(2L, 4L, 3L, 5L), v2=1:4)
974977 } else if (case == 2L) { # 4 match
975- lhs = data.table(id = c(1L,5L,3L,7L), v1=1:4)
976- rhs = data.table(id = c(7L,5L,3L,1L), v2=1:4)
978+ lhs = data.table(id = c(1L, 5L, 3L, 7L), v1=1:4)
979+ rhs = data.table(id = c(7L, 5L, 3L, 1L), v2=1:4)
977980 } else if (case == 3L) { # 1 match
978- lhs = data.table(id =c(1L,5L,3L,7L), v1=1:4)
979- rhs = data.table(id =c(1L,2L,4L,6L), v2=1:4)
981+ lhs = data.table(id =c(1L, 5L, 3L, 7L), v1=1:4)
982+ rhs = data.table(id =c(1L, 2L, 4L, 6L), v2=1:4)
980983 } else if (case == 4L) { # 0 match
981- lhs = data.table(id =c(1L,5L,3L,7L), v1=1:4)
982- rhs = data.table(id =c(0L,2L,4L,6L), v2=1:4)
984+ lhs = data.table(id =c(1L, 5L, 3L, 7L), v1=1:4)
985+ rhs = data.table(id =c(0L, 2L, 4L, 6L), v2=1:4)
983986 } else if (case == 5L) { # 0 match dup
984- lhs = data.table(id =c(1L,5L,3L,7L), v1=1:4)
985- rhs = data.table(id =c(0L,2L,2L,6L), v2=1:4)
987+ lhs = data.table(id =c(1L, 5L, 3L, 7L), v1=1:4)
988+ rhs = data.table(id =c(0L, 2L, 2L, 6L), v2=1:4)
986989 } else if (case == 6L) { # 1 match dup
987- lhs = data.table(id =c(1L,5L,3L,7L), v1=1:4)
988- rhs = data.table(id =c(1L,2L,2L,6L), v2=1:4)
990+ lhs = data.table(id =c(1L, 5L, 3L, 7L), v1=1:4)
991+ rhs = data.table(id =c(1L, 2L, 2L, 6L), v2=1:4)
989992 } else if (case == 7L) { # 1 match dup match
990- lhs = data.table(id =c(1L,5L,3L,7L), v1=1:4)
991- rhs = data.table(id =c(3L,3L,4L,6L), v2=1:4)
993+ lhs = data.table(id =c(1L, 5L, 3L, 7L), v1=1:4)
994+ rhs = data.table(id =c(3L, 3L, 4L, 6L), v2=1:4)
992995 } else if (case == 8L) { # 2 match 2 dup match
993- lhs = data.table(id =c(1L,5L,3L,7L), v1=1:4)
994- rhs = data.table(id = c(3L,3L,7L,7L), v2=1:4)
996+ lhs = data.table(id =c(1L, 5L, 3L, 7L), v1=1:4)
997+ rhs = data.table(id = c(3L, 3L, 7L, 7L), v2=1:4)
995998 } else if (case == 9L) { # 2 dup 2 dup
996- lhs = data.table(id = c(1L,5L,1L,5L), v1=1:4)
997- rhs = data.table(id = c(5L,5L,1L,1L), v2=1:4)
999+ lhs = data.table(id = c(1L, 5L, 1L, 5L), v1=1:4)
1000+ rhs = data.table(id = c(5L, 5L, 1L, 1L), v2=1:4)
9981001 } else if (case == 10L) { # 4 dup 4 dup match
999- lhs = data.table(id = c(1L,1L,1L,1L), v1=1:4)
1000- rhs = data.table(id = c(1L,1L,1L,1L), v2=1:4)
1002+ lhs = data.table(id = c(1L, 1L, 1L, 1L), v1=1:4)
1003+ rhs = data.table(id = c(1L, 1L, 1L, 1L), v2=1:4)
10011004 } else if (case == 11L) { # 4 dup 4 dup nomatch
1002- lhs = data.table(id = c(1L,1L,1L,1L), v1=1:4)
1003- rhs = data.table(id = c(2L,2L,2L,2L), v2=1:4)
1005+ lhs = data.table(id = c(1L, 1L, 1L, 1L), v1=1:4)
1006+ rhs = data.table(id = c(2L, 2L, 2L, 2L), v2=1:4)
10041007 } else if (case == 12L) { # no match, no overlap
10051008 lhs = data.table(id = c(1:4), v1=1:4)
10061009 rhs = data.table(id = c(6:9), v2=1:4)
10071010 } else if (case == 13L) { # all i matches
1008- lhs = data.table(id = c(1L,5L,3L,7L,9L), v1=1:5)
1009- rhs = data.table(id = c(7L,5L,3L,1L), v2=1:4)
1011+ lhs = data.table(id = c(1L, 5L, 3L, 7L, 9L), v1=1:5)
1012+ rhs = data.table(id = c(7L, 5L, 3L, 1L), v2=1:4)
10101013 } else if (case == 14L) { # dup match and 1 non-match
10111014 ## inner join short circuit test
10121015 ## what if some row is excluded but another is duplicated? nrow(i) match
1013- lhs = data.table(id = c(1L,5L,3L,7L,3L), v1=1:5)
1014- rhs = data.table(id = c(7L,5L,3L,2L), v2=1:4)
1016+ lhs = data.table(id = c(1L, 5L, 3L, 7L, 3L), v1=1:5)
1017+ rhs = data.table(id = c(7L, 5L, 3L, 2L), v2=1:4)
10151018 } else if (case == 15L) {
10161019 # does not raise error on mult="error" because dups '13' does not have matching rows!
1017- lhs = data.table(id = as.integer(c( 17,14,11,10,5,1, 19,7, 16,15) ), v1=1:10)
1018- rhs = data.table(id = as.integer(c(6, 20,13,1,8, 13,3, 10,17,9) ), v2=1:10)
1020+ lhs = data.table(id = INT( 17, 14, 11, 10, 5, 1, 19, 7, 16, 15 ), v1=1:10)
1021+ rhs = data.table(id = INT(6, 20, 13, 1, 8, 13, 3, 10, 17, 9 ), v2=1:10)
10191022 } else if (case == 16L) {
10201023 lhs = data.table(id = sample(10L, 10L, TRUE), v1=1:10)
10211024 rhs = data.table(id = sample(10L, 10L, TRUE), v2=1:10)
@@ -1038,28 +1041,28 @@ if (loaded[["DBI"]] && loaded[["RSQLite"]]) {
10381041 lhs = data.table(id=1:2, v1=1:2)
10391042 rhs = data.table(id=1:2, v2=1:2)
10401043 } else if (case==23L) { ## cross join
1041- lhs = data.table(id=c(1L,1L), v1=1:2)
1042- rhs = data.table(id=c(1L,1L), v2=1:2)
1044+ lhs = data.table(id=c(1L, 1L), v1=1:2)
1045+ rhs = data.table(id=c(1L, 1L), v2=1:2)
10431046 } else if (case==24L) { ## cartesian match, dups on both sides of match
1044- lhs = data.table(id=c(1L,1:2), v1=1:3)
1045- rhs = data.table(id=c(1L,1L,3L), v2=1:3)
1047+ lhs = data.table(id=c(1L, 1:2), v1=1:3)
1048+ rhs = data.table(id=c(1L, 1L, 3L), v2=1:3)
10461049 } else if (case==25L) { ## duplicates in RHS
10471050 lhs = data.table(id=1:2, v1=1:2)
1048- rhs = data.table(id=c(2L,2:3), v2=1:3)
1051+ rhs = data.table(id=c(2L, 2:3), v2=1:3)
10491052 } else if (case==26L) { ## duplicates in RHS and LHS, some RHS dups does not have matches in LHS (merge.data.table+mult fails)
1050- lhs = data.table(id=c(1:3,3L), v1=1:4)
1051- rhs = data.table(id=c(1L,1L,3:4,4L), v2=1:5)
1053+ lhs = data.table(id=c(1:3, 3L), v1=1:4)
1054+ rhs = data.table(id=c(1L, 1L, 3:4, 4L), v2=1:5)
10521055 } else if (case==27L) { ## duplicates in RHS and LHS, some LHS dups does not have matches in RHS
1053- lhs = data.table(id=c(1L,1L,3:4,4L), v1=1:5)
1054- rhs = data.table(id=c(1:3,3L), v2=1:4)
1056+ lhs = data.table(id=c(1L, 1L, 3:4, 4L), v1=1:5)
1057+ rhs = data.table(id=c(1:3, 3L), v2=1:4)
10551058 } else if (case==28L) { ## duplicates in RHS and LHS
1056- lhs = data.table(id=c(1:3,3L), v1=1:4)
1057- rhs = data.table(id=c(1L,1L,3:4), v2=1:4)
1059+ lhs = data.table(id=c(1:3, 3L), v1=1:4)
1060+ rhs = data.table(id=c(1L, 1L, 3:4), v2=1:4)
10581061 } else if (case==29L) { ## duplicates in RHS
10591062 lhs = data.table(id=1:2, v1=1:2)
1060- rhs = data.table(id=c(2L,2:3), v2=1:3)
1063+ rhs = data.table(id=c(2L, 2:3), v2=1:3)
10611064 } else if (case==30L) { ## duplicates in LHS
1062- lhs = data.table(id=c(1:2,2L), v1=1:3)
1065+ lhs = data.table(id=c(1:2, 2L), v1=1:3)
10631066 rhs = data.table(id=2:3, v2=1:2)
10641067 } else if (case==31L) {
10651068 lhs = data.table(id=integer(), v1=integer())
@@ -1070,7 +1073,7 @@ if (loaded[["DBI"]] && loaded[["RSQLite"]]) {
10701073
10711074 # tests ----
10721075
1073- y = batch.join.sql.equal(cases=1:31, on="id", hows=c("inner","left","right","full"), mults=c("all","first","last"), .debug=interactive())
1076+ y = batch.join.sql.equal(cases=1:31, on="id", hows=c("inner", "left", "right", "full"), mults=c("all", "first", "last"), .debug=interactive())
10741077 y = rapply(y, isTRUE)
10751078 if (!all(y))
10761079 stop(sprintf("join tests failed for %s cases:\n%s", sum(!y), paste(" ", names(y)[!y], collapse="\n")))
0 commit comments