@@ -1510,79 +1510,77 @@ end
15101510 @test m1[! , :a ] == m2[! , :a ]
15111511end
15121512
1513- if Sys. WORD_SIZE == 64
1514- @testset " threaded correctness" begin
1515- df1 = DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ])
1516- df1. left_row = axes (df1, 1 )
1517- df2 = DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ])
1518- df2. right_row = axes (df2, 1 )
1519-
1520- @test innerjoin (df1, df2, on= :id ) ≅
1521- DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 , right_row= 1 : 10 ^ 6 )
1522- @test leftjoin (df1, df2, on= :id ) ≅
1523- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], left_row= 1 : 10 ^ 6 + 2 ,
1524- right_row= [1 : 10 ^ 6 ; missing ; missing ])
1525- @test rightjoin (df1, df2, on= :id ) ≅
1526- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1527- left_row= [1 : 10 ^ 6 ; fill (missing , 4 )],
1528- right_row= 1 : 10 ^ 6 + 4 )
1529- @test outerjoin (df1, df2, on= :id ) ≅
1530- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1531- left_row= [1 : 10 ^ 6 + 2 ; fill (missing , 4 )],
1532- right_row= [1 : 10 ^ 6 ; missing ; missing ; 10 ^ 6 + 1 : 10 ^ 6 + 4 ])
1533- @test semijoin (df1, df2, on= :id ) ≅
1534- DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 )
1535- @test antijoin (df1, df2, on= :id ) ≅
1536- DataFrame (id= 10 ^ 7 + 1 : 10 ^ 7 + 2 , left_row= 10 ^ 6 + 1 : 10 ^ 6 + 2 )
1537-
1538- Random. seed! (1234 )
1539- for i in 1 : 4
1540- df1 = df1[shuffle (axes (df1, 1 )), :]
1541- df2 = df2[shuffle (axes (df2, 1 )), :]
1542-
1543- @test sort! (innerjoin (df1, df2, on= :id )) ≅
1544- DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 , right_row= 1 : 10 ^ 6 )
1545- @test sort! (leftjoin (df1, df2, on= :id )) ≅
1546- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], left_row= 1 : 10 ^ 6 + 2 ,
1547- right_row= [1 : 10 ^ 6 ; missing ; missing ])
1548- @test sort! (rightjoin (df1, df2, on= :id )) ≅
1549- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1550- left_row= [1 : 10 ^ 6 ; fill (missing , 4 )],
1551- right_row= 1 : 10 ^ 6 + 4 )
1552- @test sort! (outerjoin (df1, df2, on= :id )) ≅
1553- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1554- left_row= [1 : 10 ^ 6 + 2 ; fill (missing , 4 )],
1555- right_row= [1 : 10 ^ 6 ; missing ; missing ; 10 ^ 6 + 1 : 10 ^ 6 + 4 ])
1556- @test sort! (semijoin (df1, df2, on= :id )) ≅
1557- DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 )
1558- @test sort! (antijoin (df1, df2, on= :id )) ≅
1559- DataFrame (id= 10 ^ 7 + 1 : 10 ^ 7 + 2 , left_row= 10 ^ 6 + 1 : 10 ^ 6 + 2 )
1560- end
1513+ @testset " threaded correctness" begin
1514+ df1 = DataFrame (id= [1 : 10 ^ 5 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ])
1515+ df1. left_row = axes (df1, 1 )
1516+ df2 = DataFrame (id= [1 : 10 ^ 5 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ])
1517+ df2. right_row = axes (df2, 1 )
1518+
1519+ @test innerjoin (df1, df2, on= :id ) ≅
1520+ DataFrame (id= 1 : 10 ^ 5 , left_row= 1 : 10 ^ 5 , right_row= 1 : 10 ^ 5 )
1521+ @test leftjoin (df1, df2, on= :id ) ≅
1522+ DataFrame (id= [1 : 10 ^ 5 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], left_row= 1 : 10 ^ 5 + 2 ,
1523+ right_row= [1 : 10 ^ 5 ; missing ; missing ])
1524+ @test rightjoin (df1, df2, on= :id ) ≅
1525+ DataFrame (id= [1 : 10 ^ 5 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1526+ left_row= [1 : 10 ^ 5 ; fill (missing , 4 )],
1527+ right_row= 1 : 10 ^ 5 + 4 )
1528+ @test outerjoin (df1, df2, on= :id ) ≅
1529+ DataFrame (id= [1 : 10 ^ 5 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1530+ left_row= [1 : 10 ^ 5 + 2 ; fill (missing , 4 )],
1531+ right_row= [1 : 10 ^ 5 ; missing ; missing ; 10 ^ 5 + 1 : 10 ^ 5 + 4 ])
1532+ @test semijoin (df1, df2, on= :id ) ≅
1533+ DataFrame (id= 1 : 10 ^ 5 , left_row= 1 : 10 ^ 5 )
1534+ @test antijoin (df1, df2, on= :id ) ≅
1535+ DataFrame (id= 10 ^ 7 + 1 : 10 ^ 7 + 2 , left_row= 10 ^ 5 + 1 : 10 ^ 5 + 2 )
15611536
1562- # test correctness of column order
1563- df1 = DataFrame (a= Int8 (1 ), id2= - [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], b= Int8 (2 ),
1564- id1= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], c= Int8 (3 ), d= Int8 (4 ))
1565- df2 = DataFrame (e= Int8 (5 ), id1= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ], f= Int8 (6 ), g= Int8 (7 ),
1566- id2= - [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ], h= Int8 (8 ))
1567-
1568- @test innerjoin (df1, df2, on= [:id1 , :id2 ]) ≅
1569- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1570- c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1571- @test leftjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1572- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1573- c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1574- @test rightjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1575- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1576- c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1577- @test outerjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1578- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1579- c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1580- @test semijoin (df1, df2, on= [:id1 , :id2 ]) ≅
1581- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 , c= Int8 (3 ), d= Int8 (4 ))
1582- @test antijoin (df1, df2, on= [:id1 , :id2 ]) ≅
1583- DataFrame (a= Int8 (1 ), id2= - (10 ^ 7 + 1 : 10 ^ 7 + 2 ), b= Int8 (2 ), id1= (10 ^ 7 + 1 : 10 ^ 7 + 2 ),
1584- c= Int8 (3 ), d= Int8 (4 ))
1537+ Random. seed! (1234 )
1538+ for i in 1 : 4
1539+ df1 = df1[shuffle (axes (df1, 1 )), :]
1540+ df2 = df2[shuffle (axes (df2, 1 )), :]
1541+
1542+ @test sort! (innerjoin (df1, df2, on= :id )) ≅
1543+ DataFrame (id= 1 : 10 ^ 5 , left_row= 1 : 10 ^ 5 , right_row= 1 : 10 ^ 5 )
1544+ @test sort! (leftjoin (df1, df2, on= :id )) ≅
1545+ DataFrame (id= [1 : 10 ^ 5 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], left_row= 1 : 10 ^ 5 + 2 ,
1546+ right_row= [1 : 10 ^ 5 ; missing ; missing ])
1547+ @test sort! (rightjoin (df1, df2, on= :id )) ≅
1548+ DataFrame (id= [1 : 10 ^ 5 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1549+ left_row= [1 : 10 ^ 5 ; fill (missing , 4 )],
1550+ right_row= 1 : 10 ^ 5 + 4 )
1551+ @test sort! (outerjoin (df1, df2, on= :id )) ≅
1552+ DataFrame (id= [1 : 10 ^ 5 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1553+ left_row= [1 : 10 ^ 5 + 2 ; fill (missing , 4 )],
1554+ right_row= [1 : 10 ^ 5 ; missing ; missing ; 10 ^ 5 + 1 : 10 ^ 5 + 4 ])
1555+ @test sort! (semijoin (df1, df2, on= :id )) ≅
1556+ DataFrame (id= 1 : 10 ^ 5 , left_row= 1 : 10 ^ 5 )
1557+ @test sort! (antijoin (df1, df2, on= :id )) ≅
1558+ DataFrame (id= 10 ^ 7 + 1 : 10 ^ 7 + 2 , left_row= 10 ^ 5 + 1 : 10 ^ 5 + 2 )
15851559 end
1560+
1561+ # test correctness of column order
1562+ df1 = DataFrame (a= Int8 (1 ), id2= - [1 : 10 ^ 5 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], b= Int8 (2 ),
1563+ id1= [1 : 10 ^ 5 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], c= Int8 (3 ), d= Int8 (4 ))
1564+ df2 = DataFrame (e= Int8 (5 ), id1= [1 : 10 ^ 5 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ], f= Int8 (6 ), g= Int8 (7 ),
1565+ id2= - [1 : 10 ^ 5 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ], h= Int8 (8 ))
1566+
1567+ @test innerjoin (df1, df2, on= [:id1 , :id2 ]) ≅
1568+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 5 ), b= Int8 (2 ), id1= 1 : 10 ^ 5 ,
1569+ c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1570+ @test leftjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 5 , :] ≅
1571+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 5 ), b= Int8 (2 ), id1= 1 : 10 ^ 5 ,
1572+ c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1573+ @test rightjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 5 , :] ≅
1574+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 5 ), b= Int8 (2 ), id1= 1 : 10 ^ 5 ,
1575+ c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1576+ @test outerjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 5 , :] ≅
1577+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 5 ), b= Int8 (2 ), id1= 1 : 10 ^ 5 ,
1578+ c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1579+ @test semijoin (df1, df2, on= [:id1 , :id2 ]) ≅
1580+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 5 ), b= Int8 (2 ), id1= 1 : 10 ^ 5 , c= Int8 (3 ), d= Int8 (4 ))
1581+ @test antijoin (df1, df2, on= [:id1 , :id2 ]) ≅
1582+ DataFrame (a= Int8 (1 ), id2= - (10 ^ 7 + 1 : 10 ^ 7 + 2 ), b= Int8 (2 ), id1= (10 ^ 7 + 1 : 10 ^ 7 + 2 ),
1583+ c= Int8 (3 ), d= Int8 (4 ))
15861584end
15871585
15881586@testset " matchmissing :notequal correctness" begin
0 commit comments