@@ -579,7 +579,7 @@ def test_column_nunique(self):
579
579
580
580
tblgrp = tbl ['MSRP' ].groupby (['Origin' , 'Cylinders' ], as_index = False ).nunique ()
581
581
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
582
- self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = None )
582
+ self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = [ 'Origin' , 'Cylinders' , 'MSRP' ] )
583
583
584
584
def test_nunique (self ):
585
585
tbl = self .table .sort_values (SORT_KEYS )
@@ -663,7 +663,7 @@ def test_column_max(self):
663
663
664
664
tblgrp = tbl ['EngineSize' ].groupby ('Origin' , as_index = False ).max ()
665
665
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
666
- self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = None )
666
+ self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = [ 'Origin' , 'EngineSize' ] )
667
667
668
668
@unittest .skipIf (int (pd .__version__ .split ('.' )[1 ]) < 16 , 'Need newer version of Pandas' )
669
669
def test_max (self ):
@@ -698,7 +698,7 @@ def test_max(self):
698
698
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
699
699
# Drop Model since they get sorted differently
700
700
self .assertTablesEqual (dfgrp .drop ('Model' , axis = 1 ), tblgrp .drop ('Model' , axis = 1 ),
701
- sortby = None , include_index = True )
701
+ sortby = [ 'Origin' , 'Make' , 'Type' , 'DriveTrain' ] )
702
702
703
703
def test_column_min (self ):
704
704
df = self .get_cars_df ().sort_values (SORT_KEYS )
@@ -722,7 +722,7 @@ def test_column_min(self):
722
722
723
723
tblgrp = tbl ['EngineSize' ].groupby ('Origin' , as_index = False ).min ()
724
724
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
725
- self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = None )
725
+ self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = [ 'Origin' , 'EngineSize' ] )
726
726
727
727
@unittest .skipIf (int (pd .__version__ .split ('.' )[1 ]) < 16 , 'Need newer version of Pandas' )
728
728
def test_min (self ):
@@ -757,7 +757,7 @@ def test_min(self):
757
757
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
758
758
# Drop Type since it gets sorted differently
759
759
self .assertTablesEqual (dfgrp .drop ('Type' , axis = 1 ), tblgrp .drop ('Type' , axis = 1 ),
760
- sortby = None )
760
+ sortby = [ 'Origin' , 'Make' , 'Model' ] )
761
761
762
762
def test_column_mean (self ):
763
763
df = self .get_cars_df ().sort_values (SORT_KEYS )
@@ -781,7 +781,7 @@ def test_column_mean(self):
781
781
782
782
tblgrp = tbl ['EngineSize' ].groupby ('Origin' , as_index = False ).mean ()
783
783
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
784
- self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = None , decimals = 5 )
784
+ self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = [ 'Origin' , 'EngineSize' ] , decimals = 5 )
785
785
786
786
@unittest .skipIf (sys .version_info .major < 3 , 'Need newer version of Python' )
787
787
def test_mean (self ):
@@ -804,7 +804,7 @@ def test_mean(self):
804
804
dfgrp = df .groupby ('Origin' , as_index = False ).mean ()
805
805
tblgrp = tbl .groupby ('Origin' , as_index = False ).mean ()
806
806
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
807
- self .assertTablesEqual (dfgrp , tblgrp , sortby = None , decimals = 5 )
807
+ self .assertTablesEqual (dfgrp , tblgrp , sortby = [ 'Origin' , 'MSRP' , 'Invoice' ] , decimals = 5 )
808
808
809
809
def test_column_median (self ):
810
810
df = self .get_cars_df ().sort_values (SORT_KEYS )
@@ -978,7 +978,7 @@ def test_column_sum(self):
978
978
979
979
tblgrp = tbl ['EngineSize' ].groupby ('Origin' , as_index = False ).sum ()
980
980
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
981
- self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = None , decimals = 5 )
981
+ self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = [ 'Origin' , 'EngineSize' ] , decimals = 5 )
982
982
983
983
def test_sum (self ):
984
984
df = self .get_cars_df ().sort_values (SORT_KEYS )
@@ -1000,7 +1000,7 @@ def test_sum(self):
1000
1000
dfgrp = df .groupby ('Origin' , as_index = False ).sum ()
1001
1001
tblgrp = tbl .groupby ('Origin' , as_index = False ).sum ()
1002
1002
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
1003
- self .assertTablesEqual (dfgrp , tblgrp , decimals = 5 , sortby = None )
1003
+ self .assertTablesEqual (dfgrp , tblgrp , decimals = 5 , sortby = [ 'Origin' , 'MSRP' , 'Invoice' ] )
1004
1004
1005
1005
def test_column_std (self ):
1006
1006
df = self .get_cars_df ().sort_values (SORT_KEYS )
@@ -1024,7 +1024,7 @@ def test_column_std(self):
1024
1024
1025
1025
tblgrp = tbl ['EngineSize' ].groupby ('Origin' , as_index = False ).std ()
1026
1026
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
1027
- self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = None , decimals = 5 )
1027
+ self .assertTablesEqual (dfgrp .reset_index (), tblgrp , sortby = [ 'Origin' , 'EngineSize' ] , decimals = 5 )
1028
1028
1029
1029
def test_std (self ):
1030
1030
df = self .get_cars_df ().sort_values (SORT_KEYS )
@@ -1046,7 +1046,7 @@ def test_std(self):
1046
1046
#dfgrp = df.groupby('Origin', as_index=False).std()
1047
1047
tblgrp = tbl .groupby ('Origin' , as_index = False ).std ()
1048
1048
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
1049
- self .assertTablesEqual (dfgrp .reset_index (), tblgrp , decimals = 5 , sortby = None )
1049
+ self .assertTablesEqual (dfgrp .reset_index (), tblgrp , decimals = 5 , sortby = [ 'Origin' , 'MSRP' , 'Invoice' ] )
1050
1050
1051
1051
def test_column_var (self ):
1052
1052
df = self .get_cars_df ().sort_values (SORT_KEYS )
@@ -1074,7 +1074,7 @@ def test_column_var(self):
1074
1074
# For some reason Pandas drops this column, but I think it should be there.
1075
1075
tblgrp = tblgrp .drop ('Origin' , axis = 1 )
1076
1076
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
1077
- self .assertTablesEqual (dfgrp , tblgrp , decimals = 5 , sortby = None )
1077
+ self .assertTablesEqual (dfgrp , tblgrp , decimals = 5 , sortby = [ 'EngineSize' ] )
1078
1078
1079
1079
def test_var (self ):
1080
1080
df = self .get_cars_df ().sort_values (SORT_KEYS )
@@ -1096,7 +1096,7 @@ def test_var(self):
1096
1096
dfgrp = df .groupby ('Origin' , as_index = False ).var ()
1097
1097
tblgrp = tbl .groupby ('Origin' , as_index = False ).var ()
1098
1098
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
1099
- self .assertTablesEqual (dfgrp , tblgrp , decimals = 3 , sortby = None )
1099
+ self .assertTablesEqual (dfgrp , tblgrp , decimals = 3 , sortby = [ 'Origin' , 'MSRP' , 'Invoice' ] )
1100
1100
1101
1101
def test_column_nmiss (self ):
1102
1102
# TODO: Not supported by Pandas; need comparison values
@@ -1119,6 +1119,9 @@ def test_column_nmiss(self):
1119
1119
self .assertEqual (len (tblgrp ), 3 )
1120
1120
1121
1121
# Test character missing values
1122
+ swat .options .cas .trace_actions = True
1123
+ swat .options .cas .trace_ui_actions = True
1124
+ swat .options .cas .print_messages = True
1122
1125
tbl = self .table .replace ({'Make' : {'Buick' : '' }})
1123
1126
1124
1127
tblgrp = tbl .groupby ('Origin' )['Make' ].nmiss ()
@@ -1138,12 +1141,10 @@ def test_column_nmiss(self):
1138
1141
#
1139
1142
swat .options .cas .dataset .bygroup_casout_threshold = 2
1140
1143
1141
- swat .options .cas .print_messages = True
1142
1144
tblgrp = tbl ['Cylinders' ].groupby ('Origin' ).nmiss ()
1143
1145
self .assertEqual (tblgrp .__class__ .__name__ , 'CASTable' )
1144
1146
self .assertEqual (len (tblgrp ), 3 )
1145
1147
tblgrp = tblgrp .to_frame ().set_index ('Origin' )['Cylinders' ]
1146
- print (tblgrp )
1147
1148
self .assertEqual (tblgrp .loc ['Asia' ], 2 )
1148
1149
self .assertEqual (tblgrp .loc ['Europe' ], 0 )
1149
1150
self .assertEqual (tblgrp .loc ['USA' ], 0 )
0 commit comments