@@ -1510,171 +1510,53 @@ end
15101510 @test m1[! , :a ] == m2[! , :a ]
15111511end
15121512
1513- @testset " threaded correctness " begin
1514- try
1513+ if Sys . WORD_SIZE == 64
1514+ @testset " threaded correctness " begin
15151515 df1 = DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ])
15161516 df1. left_row = axes (df1, 1 )
15171517 df2 = DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ])
15181518 df2. right_row = axes (df2, 1 )
15191519
1520- @test try
1521- innerjoin (df1, df2, on= :id ) ≅
1522- DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 , right_row= 1 : 10 ^ 6 )
1523- catch e
1524- if Int === Int32 && e isa OutOfMemoryError
1525- @warn " OutOfMemoryError. Skipping innerjoin test."
1526- true
1527- else
1528- rethrow (e)
1529- end
1530- end
1531-
1532- @test try
1533- leftjoin (df1, df2, on= :id ) ≅
1534- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], left_row= 1 : 10 ^ 6 + 2 ,
1535- right_row= [1 : 10 ^ 6 ; missing ; missing ])
1536- catch e
1537- if Int === Int32 && e isa OutOfMemoryError
1538- @warn " OutOfMemoryError. Skipping leftjoin test."
1539- true
1540- else
1541- rethrow (e)
1542- end
1543- end
1544-
1545- @test try
1546- rightjoin (df1, df2, on= :id ) ≅
1547- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1548- left_row= [1 : 10 ^ 6 ; fill (missing , 4 )],
1549- right_row= 1 : 10 ^ 6 + 4 )
1550- catch e
1551- if Int === Int32 && e isa OutOfMemoryError
1552- @warn " OutOfMemoryError. Skipping rightjoin test."
1553- true
1554- else
1555- rethrow (e)
1556- end
1557- end
1558-
1559- @test try
1560- outerjoin (df1, df2, on= :id ) ≅
1561- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1562- left_row= [1 : 10 ^ 6 + 2 ; fill (missing , 4 )],
1563- right_row= [1 : 10 ^ 6 ; missing ; missing ; 10 ^ 6 + 1 : 10 ^ 6 + 4 ])
1564- catch e
1565- if Int === Int32 && e isa OutOfMemoryError
1566- @warn " OutOfMemoryError. Skipping outerjoin test."
1567- true
1568- else
1569- rethrow (e)
1570- end
1571- end
1572-
1573- @test try
1574- semijoin (df1, df2, on= :id ) ≅
1575- DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 )
1576- catch e
1577- if Int === Int32 && e isa OutOfMemoryError
1578- @warn " OutOfMemoryError. Skipping semijoin test."
1579- true
1580- else
1581- rethrow (e)
1582- end
1583- end
1584-
1585- @test try
1586- antijoin (df1, df2, on= :id ) ≅
1587- DataFrame (id= 10 ^ 7 + 1 : 10 ^ 7 + 2 , left_row= 10 ^ 6 + 1 : 10 ^ 6 + 2 )
1588- catch e
1589- if Int === Int32 && e isa OutOfMemoryError
1590- @warn " OutOfMemoryError. Skipping antijoin test."
1591- true
1592- else
1593- rethrow (e)
1594- end
1595- end
1520+ @test innerjoin (df1, df2, on= :id ) ≅
1521+ DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 , right_row= 1 : 10 ^ 6 )
1522+ @test leftjoin (df1, df2, on= :id ) ≅
1523+ DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], left_row= 1 : 10 ^ 6 + 2 ,
1524+ right_row= [1 : 10 ^ 6 ; missing ; missing ])
1525+ @test rightjoin (df1, df2, on= :id ) ≅
1526+ DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1527+ left_row= [1 : 10 ^ 6 ; fill (missing , 4 )],
1528+ right_row= 1 : 10 ^ 6 + 4 )
1529+ @test outerjoin (df1, df2, on= :id ) ≅
1530+ DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1531+ left_row= [1 : 10 ^ 6 + 2 ; fill (missing , 4 )],
1532+ right_row= [1 : 10 ^ 6 ; missing ; missing ; 10 ^ 6 + 1 : 10 ^ 6 + 4 ])
1533+ @test semijoin (df1, df2, on= :id ) ≅
1534+ DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 )
1535+ @test antijoin (df1, df2, on= :id ) ≅
1536+ DataFrame (id= 10 ^ 7 + 1 : 10 ^ 7 + 2 , left_row= 10 ^ 6 + 1 : 10 ^ 6 + 2 )
15961537
15971538 Random. seed! (1234 )
15981539 for i in 1 : 4
15991540 df1 = df1[shuffle (axes (df1, 1 )), :]
16001541 df2 = df2[shuffle (axes (df2, 1 )), :]
16011542
1602- @test try
1603- sort! (innerjoin (df1, df2, on= :id )) ≅
1604- DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 , right_row= 1 : 10 ^ 6 )
1605- catch e
1606- if Int === Int32 && e isa OutOfMemoryError
1607- @warn " OutOfMemoryError. Skipping innerjoin test."
1608- true
1609- else
1610- rethrow (e)
1611- end
1612- end
1613-
1614- @test try
1615- sort! (leftjoin (df1, df2, on= :id )) ≅
1616- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], left_row= 1 : 10 ^ 6 + 2 ,
1617- right_row= [1 : 10 ^ 6 ; missing ; missing ])
1618- catch e
1619- if Int === Int32 && e isa OutOfMemoryError
1620- @warn " OutOfMemoryError. Skipping leftjoin test."
1621- true
1622- else
1623- rethrow (e)
1624- end
1625- end
1626-
1627- @test try
1628- sort! (rightjoin (df1, df2, on= :id )) ≅
1629- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1630- left_row= [1 : 10 ^ 6 ; fill (missing , 4 )],
1631- right_row= 1 : 10 ^ 6 + 4 )
1632- catch e
1633- if Int === Int32 && e isa OutOfMemoryError
1634- @warn " OutOfMemoryError. Skipping rightjoin test."
1635- true
1636- else
1637- rethrow (e)
1638- end
1639- end
1640-
1641- @test try
1642- sort! (outerjoin (df1, df2, on= :id )) ≅
1643- DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1644- left_row= [1 : 10 ^ 6 + 2 ; fill (missing , 4 )],
1645- right_row= [1 : 10 ^ 6 ; missing ; missing ; 10 ^ 6 + 1 : 10 ^ 6 + 4 ])
1646- catch e
1647- if Int === Int32 && e isa OutOfMemoryError
1648- @warn " OutOfMemoryError. Skipping outerjoin test."
1649- true
1650- else
1651- rethrow (e)
1652- end
1653- end
1654-
1655- @test try
1656- sort! (semijoin (df1, df2, on= :id )) ≅
1657- DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 )
1658- catch e
1659- if Int === Int32 && e isa OutOfMemoryError
1660- @warn " OutOfMemoryError. Skipping semijoin test."
1661- true
1662- else
1663- rethrow (e)
1664- end
1665- end
1666-
1667- @test try
1668- sort! (antijoin (df1, df2, on= :id )) ≅
1669- DataFrame (id= 10 ^ 7 + 1 : 10 ^ 7 + 2 , left_row= 10 ^ 6 + 1 : 10 ^ 6 + 2 )
1670- catch e
1671- if Int === Int32 && e isa OutOfMemoryError
1672- @warn " OutOfMemoryError. Skipping antijoin test."
1673- true
1674- else
1675- rethrow (e)
1676- end
1677- end
1543+ @test sort! (innerjoin (df1, df2, on= :id )) ≅
1544+ DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 , right_row= 1 : 10 ^ 6 )
1545+ @test sort! (leftjoin (df1, df2, on= :id )) ≅
1546+ DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ], left_row= 1 : 10 ^ 6 + 2 ,
1547+ right_row= [1 : 10 ^ 6 ; missing ; missing ])
1548+ @test sort! (rightjoin (df1, df2, on= :id )) ≅
1549+ DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1550+ left_row= [1 : 10 ^ 6 ; fill (missing , 4 )],
1551+ right_row= 1 : 10 ^ 6 + 4 )
1552+ @test sort! (outerjoin (df1, df2, on= :id )) ≅
1553+ DataFrame (id= [1 : 10 ^ 6 ; 10 ^ 7 + 1 : 10 ^ 7 + 2 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ],
1554+ left_row= [1 : 10 ^ 6 + 2 ; fill (missing , 4 )],
1555+ right_row= [1 : 10 ^ 6 ; missing ; missing ; 10 ^ 6 + 1 : 10 ^ 6 + 4 ])
1556+ @test sort! (semijoin (df1, df2, on= :id )) ≅
1557+ DataFrame (id= 1 : 10 ^ 6 , left_row= 1 : 10 ^ 6 )
1558+ @test sort! (antijoin (df1, df2, on= :id )) ≅
1559+ DataFrame (id= 10 ^ 7 + 1 : 10 ^ 7 + 2 , left_row= 10 ^ 6 + 1 : 10 ^ 6 + 2 )
16781560 end
16791561
16801562 # test correctness of column order
@@ -1683,88 +1565,23 @@ end
16831565 df2 = DataFrame (e= Int8 (5 ), id1= [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ], f= Int8 (6 ), g= Int8 (7 ),
16841566 id2= - [1 : 10 ^ 6 ; 10 ^ 8 + 1 : 10 ^ 8 + 4 ], h= Int8 (8 ))
16851567
1686- @test try
1687- innerjoin (df1, df2, on= [:id1 , :id2 ]) ≅
1688- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1689- c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1690- catch e
1691- if Int === Int32 && e isa OutOfMemoryError
1692- @warn " OutOfMemoryError. Skipping innerjoin test."
1693- true
1694- else
1695- rethrow (e)
1696- end
1697- end
1698-
1699- @test try
1700- leftjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1701- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1702- c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1703- catch e
1704- if Int === Int32 && e isa OutOfMemoryError
1705- @warn " OutOfMemoryError. Skipping leftjoin test."
1706- true
1707- else
1708- rethrow (e)
1709- end
1710- end
1711-
1712- @test try
1713- rightjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1714- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1715- c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1716- catch e
1717- if Int === Int32 && e isa OutOfMemoryError
1718- @warn " OutOfMemoryError. Skipping rightjoin test."
1719- true
1720- else
1721- rethrow (e)
1722- end
1723- end
1724-
1725- @test try
1726- outerjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1727- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1728- c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1729- catch e
1730- if Int === Int32 && e isa OutOfMemoryError
1731- @warn " OutOfMemoryError. Skipping outerjoin test."
1732- true
1733- else
1734- rethrow (e)
1735- end
1736- end
1737-
1738- @test try
1739- semijoin (df1, df2, on= [:id1 , :id2 ]) ≅
1740- DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 , c= Int8 (3 ), d= Int8 (4 ))
1741- catch e
1742- if Int === Int32 && e isa OutOfMemoryError
1743- @warn " OutOfMemoryError. Skipping semijoin test."
1744- true
1745- else
1746- rethrow (e)
1747- end
1748- end
1749-
1750- @test try
1751- antijoin (df1, df2, on= [:id1 , :id2 ]) ≅
1752- DataFrame (a= Int8 (1 ), id2= - (10 ^ 7 + 1 : 10 ^ 7 + 2 ), b= Int8 (2 ), id1= (10 ^ 7 + 1 : 10 ^ 7 + 2 ),
1753- c= Int8 (3 ), d= Int8 (4 ))
1754- catch e
1755- if Int === Int32 && e isa OutOfMemoryError
1756- @warn " OutOfMemoryError. Skipping antijoin test."
1757- true
1758- else
1759- rethrow (e)
1760- end
1761- end
1762- catch e
1763- if Int === Int32 && e isa OutOfMemoryError
1764- @warn " OutOfMemoryError. Skipping antijoin test."
1765- else
1766- rethrow (e)
1767- end
1568+ @test innerjoin (df1, df2, on= [:id1 , :id2 ]) ≅
1569+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1570+ c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1571+ @test leftjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1572+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1573+ c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1574+ @test rightjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1575+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1576+ c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1577+ @test outerjoin (df1, df2, on= [:id1 , :id2 ])[1 : 10 ^ 6 , :] ≅
1578+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 ,
1579+ c= Int8 (3 ), d= Int8 (4 ), e= Int8 (5 ), f= Int8 (6 ), g= Int8 (7 ), h= Int8 (8 ))
1580+ @test semijoin (df1, df2, on= [:id1 , :id2 ]) ≅
1581+ DataFrame (a= Int8 (1 ), id2= - (1 : 10 ^ 6 ), b= Int8 (2 ), id1= 1 : 10 ^ 6 , c= Int8 (3 ), d= Int8 (4 ))
1582+ @test antijoin (df1, df2, on= [:id1 , :id2 ]) ≅
1583+ DataFrame (a= Int8 (1 ), id2= - (10 ^ 7 + 1 : 10 ^ 7 + 2 ), b= Int8 (2 ), id1= (10 ^ 7 + 1 : 10 ^ 7 + 2 ),
1584+ c= Int8 (3 ), d= Int8 (4 ))
17681585 end
17691586end
17701587
0 commit comments