Skip to content

merge could utilize i index #4380

@jangorecki

Description

@jangorecki
# data gen
Rscript -e 'library(data.table); set.seed(108); N = 1e8; fwrite(data.table(id = sample(N), v1=1L), "d1.csv"); fwrite(data.table(id = sample(N), v2=2L), "d2.csv")'

# join
R -q
library(data.table)
d1 = fread("d1.csv")
d2 = fread("d2.csv")
system.time(d1[d2, on="id"])
#   user  system elapsed 
# 51.929   9.861  24.909 
q("no")

R -q
library(data.table)
d1 = fread("d1.csv")
d2 = fread("d2.csv", key="id")
system.time(d1[d2, on="id"])
#   user  system elapsed 
# 29.645   6.587  13.537 
q("no")

R -q
library(data.table)
d1 = fread("d1.csv")
d2 = fread("d2.csv", index="id")
system.time(d1[d2, on="id"])
#   user  system elapsed 
# 51.679   9.918  23.874 
q("no")

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions