@@ -1486,6 +1486,99 @@ def test_drop_duplicates(data):
1486
1486
df_equals (modin_df , pandas .DataFrame ({"A" : [], "B" : [], "C" : []}))
1487
1487
1488
1488
1489
+ def test_drop_duplicates_with_missing_index_values ():
1490
+ data = {
1491
+ "columns" : ["value" , "time" , "id" ],
1492
+ "index" : [
1493
+ 4 ,
1494
+ 5 ,
1495
+ 6 ,
1496
+ 7 ,
1497
+ 8 ,
1498
+ 9 ,
1499
+ 10 ,
1500
+ 11 ,
1501
+ 12 ,
1502
+ 13 ,
1503
+ 14 ,
1504
+ 15 ,
1505
+ 20 ,
1506
+ 21 ,
1507
+ 22 ,
1508
+ 23 ,
1509
+ 24 ,
1510
+ 25 ,
1511
+ 26 ,
1512
+ 27 ,
1513
+ 32 ,
1514
+ 33 ,
1515
+ 34 ,
1516
+ 35 ,
1517
+ 36 ,
1518
+ 37 ,
1519
+ 38 ,
1520
+ 39 ,
1521
+ 40 ,
1522
+ 41 ,
1523
+ ],
1524
+ "data" : [
1525
+ ["3" , 1279213398000.0 , 88.0 ],
1526
+ ["3" , 1279204682000.0 , 88.0 ],
1527
+ ["0" , 1245772835000.0 , 448.0 ],
1528
+ ["0" , 1270564258000.0 , 32.0 ],
1529
+ ["0" , 1267106669000.0 , 118.0 ],
1530
+ ["7" , 1300621123000.0 , 5.0 ],
1531
+ ["0" , 1251130752000.0 , 957.0 ],
1532
+ ["0" , 1311683506000.0 , 62.0 ],
1533
+ ["9" , 1283692698000.0 , 89.0 ],
1534
+ ["9" , 1270234253000.0 , 64.0 ],
1535
+ ["0" , 1285088818000.0 , 50.0 ],
1536
+ ["0" , 1218212725000.0 , 695.0 ],
1537
+ ["2" , 1383933968000.0 , 348.0 ],
1538
+ ["0" , 1368227625000.0 , 257.0 ],
1539
+ ["1" , 1454514093000.0 , 446.0 ],
1540
+ ["1" , 1428497427000.0 , 134.0 ],
1541
+ ["1" , 1459184936000.0 , 568.0 ],
1542
+ ["1" , 1502293302000.0 , 599.0 ],
1543
+ ["1" , 1491833358000.0 , 829.0 ],
1544
+ ["1" , 1485431534000.0 , 806.0 ],
1545
+ ["8" , 1351800505000.0 , 101.0 ],
1546
+ ["0" , 1357247721000.0 , 916.0 ],
1547
+ ["0" , 1335804423000.0 , 370.0 ],
1548
+ ["24" , 1327547726000.0 , 720.0 ],
1549
+ ["0" , 1332334140000.0 , 415.0 ],
1550
+ ["0" , 1309543100000.0 , 30.0 ],
1551
+ ["18" , 1309541141000.0 , 30.0 ],
1552
+ ["0" , 1298979435000.0 , 48.0 ],
1553
+ ["14" , 1276098160000.0 , 59.0 ],
1554
+ ["0" , 1233936302000.0 , 109.0 ],
1555
+ ],
1556
+ }
1557
+
1558
+ pandas_df = pandas .DataFrame (
1559
+ data ["data" ], index = data ["index" ], columns = data ["columns" ]
1560
+ )
1561
+ modin_df = pd .DataFrame (data ["data" ], index = data ["index" ], columns = data ["columns" ])
1562
+ modin_result = modin_df .sort_values (["id" , "time" ]).drop_duplicates (["id" ])
1563
+ pandas_result = pandas_df .sort_values (["id" , "time" ]).drop_duplicates (["id" ])
1564
+ df_equals (modin_result , pandas_result )
1565
+
1566
+
1567
+ def test_drop_duplicates_after_sort ():
1568
+ data = [
1569
+ {"value" : 1 , "time" : 2 },
1570
+ {"value" : 1 , "time" : 1 },
1571
+ {"value" : 2 , "time" : 1 },
1572
+ {"value" : 2 , "time" : 2 },
1573
+ ]
1574
+ modin_df = pd .DataFrame (data )
1575
+ pandas_df = pandas .DataFrame (data )
1576
+
1577
+ modin_result = modin_df .sort_values (["value" , "time" ]).drop_duplicates (["value" ])
1578
+ pandas_result = pandas_df .sort_values (["value" , "time" ]).drop_duplicates (["value" ])
1579
+ df_equals (modin_result , pandas_result )
1580
+
1581
+
1489
1582
@pytest .mark .parametrize ("data" , test_data_values , ids = test_data_keys )
1490
1583
@pytest .mark .parametrize ("axis" , axis_values , ids = axis_keys )
1491
1584
@pytest .mark .parametrize ("how" , ["any" , "all" ], ids = ["any" , "all" ])
0 commit comments