@@ -871,7 +871,7 @@ def test_drop_duplicates(self):
871
871
tbl = self .table
872
872
df = self .get_cars_df
873
873
# drop duplicates for single subset
874
- tbl_dropped = tbl .drop_duplicates (casout = {'replace' :True }, subset = 'Make' )
874
+ tbl_dropped = tbl .drop_duplicates (casout = {'replace' : True }, subset = 'Make' )
875
875
df_dropped = df .drop_duplicates (subset = 'Make' )
876
876
877
877
# Equivalent to pandas in size
@@ -881,7 +881,8 @@ def test_drop_duplicates(self):
881
881
self .assertEquals (tbl_dropped ['Make' ].nunique (), len (tbl_dropped ))
882
882
883
883
# drop duplicates for multi-element subset
884
- tbl_dropped_multi = tbl .drop_duplicates (casout = {'replace' :True }, subset = ['Country' , 'Type' ])
884
+ tbl_dropped_multi = tbl .drop_duplicates (casout = {'replace' : True },
885
+ subset = ['Country' , 'Type' ])
885
886
df_dropped_multi = df .drop_duplicates (subset = ['Country' , 'Type' ])
886
887
887
888
# Equivalent to pandas in size
@@ -890,20 +891,22 @@ def test_drop_duplicates(self):
890
891
# We need some rows where all values for each col are duplicate
891
892
nDuplicates = 7
892
893
fetchTable = self .s .fetch (table = self .table , to = nDuplicates )['Fetch' ]
893
- # Really wants to convert char to varChar, we need to specify our way out of this
894
- subset = self .s .upload_frame (fetchTable , casout = {'replace' :True },
895
- importOptions = {'fileType' :'CSV' ,
896
- 'vars' :[{'name' :'Make' , 'type' :'CHAR' , 'length' :13 },
897
- {'name' :'Model' , 'type' :'CHAR' , 'length' :40 },
898
- {'name' :'Type' , 'type' :'CHAR' , 'length' :8 },
899
- {'name' :'Origin' , 'type' :'CHAR' , 'length' :6 },
900
- {'name' :'DriveTrain' , 'type' :'CHAR' , 'length' :5 }
901
- ]})
894
+ # Must specify char type and explicit length
895
+ importOptions = {'fileType' : 'CSV' ,
896
+ 'vars' : [{'name' : 'Make' , 'type' : 'CHAR' , 'length' : 13 },
897
+ {'name' : 'Model' , 'type' : 'CHAR' , 'length' : 40 },
898
+ {'name' : 'Type' , 'type' : 'CHAR' , 'length' : 8 },
899
+ {'name' : 'Origin' , 'type' : 'CHAR' , 'length' : 6 },
900
+ {'name' : 'DriveTrain' , 'type' : 'CHAR' , 'length' : 5 }
901
+ ]}
902
+ subset = self .s .upload_frame (fetchTable , casout = {'replace' : True },
903
+ importOptions = importOptions )
904
+
902
905
# This table is like tbl, but with nDuplicate fully duplicate rows
903
906
duplicate_table = tbl .append (subset )
904
907
905
908
# Drop duplicates without subset (checks all cols)
906
- tbl_dropped_all = duplicate_table .drop_duplicates (casout = {'replace' :True })
909
+ tbl_dropped_all = duplicate_table .drop_duplicates (casout = {'replace' : True })
907
910
908
911
# Make sure that the correct amount of rows were dropped
909
912
self .assertEquals (len (tbl ), len (tbl_dropped_all ))
0 commit comments