@@ -5821,8 +5821,38 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
5821
5821
out ._sortby = list (self ._sortby )
5822
5822
return out
5823
5823
5824
- # def drop_duplicates(self, *args, **kwargs):
5825
- # raise NotImplementedError
5824
+ def drop_duplicates (self , casout , subset = []):
5825
+ self ._loadactionset ('deduplication' )
5826
+
5827
+ cols = [x for x in list (self .columns )]
5828
+ # Determine what columns/combo of columns we are looking for duplicates
5829
+ if not subset :
5830
+ # Subset empty -> we look in all columns for duplicates
5831
+ for col in cols :
5832
+ subset .append (col )
5833
+ else :
5834
+ # If subset is just a string, iteration will be through characters
5835
+ if isinstance (subset , six .string_types ):
5836
+ subset = [subset ]
5837
+ # Determine if all provided columns in subset are in the table
5838
+ for col in subset :
5839
+ if col not in cols :
5840
+ raise ValueError ("Provided column " + col + " is not in the table." )
5841
+
5842
+ # We run this aciton to drop duplicates from the original table
5843
+ # It is not returned -> we have to manually grab results from casout
5844
+ self .groupby (subset )._retrieve ('deduplication.deduplicate' , casout = casout , noDuplicateKeys = True )
5845
+
5846
+ # Fetch the output table
5847
+ # out = self._retrieve('table.fetch', table={'name': casout.get('name'), 'caslib': casout.get('caslib')})['Fetch']
5848
+ if isinstance (casout , CASTable ):
5849
+ out = casout
5850
+ elif isinstance (casout , dict ):
5851
+ out = self .get_connection ().CASTable (** casout )
5852
+ else :
5853
+ out = self .get_connection ().CASTable (casout )
5854
+
5855
+ return out
5826
5856
5827
5857
# def duplicated(self, *args, **kwargs):
5828
5858
# raise NotImplementedError
0 commit comments