@@ -58,7 +58,7 @@ def test_standard_scaler_normalizes(penguins_df_default_index, new_penguins_df):
5858 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
5959 )
6060
61- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
61+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
6262
6363
6464def test_standard_scaler_normalizeds_fit_transform (new_penguins_df ):
@@ -82,7 +82,7 @@ def test_standard_scaler_normalizeds_fit_transform(new_penguins_df):
8282 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
8383 )
8484
85- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
85+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
8686
8787
8888def test_standard_scaler_series_normalizes (penguins_df_default_index , new_penguins_df ):
@@ -110,7 +110,7 @@ def test_standard_scaler_series_normalizes(penguins_df_default_index, new_pengui
110110 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
111111 )
112112
113- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
113+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
114114
115115
116116def test_standard_scaler_save_load (new_penguins_df , dataset_id ):
@@ -125,6 +125,22 @@ def test_standard_scaler_save_load(new_penguins_df, dataset_id):
125125 assert isinstance (reloaded_transformer , preprocessing .StandardScaler )
126126 assert reloaded_transformer ._bqml_model is not None
127127
128+ result = reloaded_transformer .transform (
129+ new_penguins_df [["culmen_length_mm" , "culmen_depth_mm" , "flipper_length_mm" ]]
130+ ).to_pandas ()
131+
132+ expected = pd .DataFrame (
133+ {
134+ "standard_scaled_culmen_length_mm" : [1.313249 , - 0.20198 , - 1.111118 ],
135+ "standard_scaled_culmen_depth_mm" : [1.17072 , - 1.272416 , 0.101848 ],
136+ "standard_scaled_flipper_length_mm" : [1.251089 , - 1.196588 , - 0.054338 ],
137+ },
138+ dtype = "Float64" ,
139+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
140+ )
141+
142+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
143+
128144
129145def test_max_abs_scaler_normalizes (penguins_df_default_index , new_penguins_df ):
130146 # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.MaxAbsScaler, when BQML's change is in prod.
@@ -157,7 +173,7 @@ def test_max_abs_scaler_normalizes(penguins_df_default_index, new_penguins_df):
157173 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
158174 )
159175
160- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
176+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
161177
162178
163179def test_max_abs_scaler_normalizeds_fit_transform (new_penguins_df ):
@@ -176,7 +192,7 @@ def test_max_abs_scaler_normalizeds_fit_transform(new_penguins_df):
176192 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
177193 )
178194
179- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
195+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
180196
181197
182198def test_max_abs_scaler_series_normalizes (penguins_df_default_index , new_penguins_df ):
@@ -199,7 +215,7 @@ def test_max_abs_scaler_series_normalizes(penguins_df_default_index, new_penguin
199215 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
200216 )
201217
202- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
218+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
203219
204220
205221def test_max_abs_scaler_save_load (new_penguins_df , dataset_id ):
@@ -214,6 +230,22 @@ def test_max_abs_scaler_save_load(new_penguins_df, dataset_id):
214230 assert isinstance (reloaded_transformer , preprocessing .MaxAbsScaler )
215231 assert reloaded_transformer ._bqml_model is not None
216232
233+ result = reloaded_transformer .transform (
234+ new_penguins_df [["culmen_length_mm" , "culmen_depth_mm" , "flipper_length_mm" ]]
235+ ).to_pandas ()
236+
237+ expected = pd .DataFrame (
238+ {
239+ "max_abs_scaled_culmen_length_mm" : [1.0 , 0.974684 , 0.959494 ],
240+ "max_abs_scaled_culmen_depth_mm" : [1.0 , 0.914894 , 0.962766 ],
241+ "max_abs_scaled_flipper_length_mm" : [1.0 , 0.923469 , 0.959184 ],
242+ },
243+ dtype = "Float64" ,
244+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
245+ )
246+
247+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
248+
217249
218250def test_min_max_scaler_normalized_fit_transform (new_penguins_df ):
219251 scaler = preprocessing .MinMaxScaler ()
@@ -231,7 +263,7 @@ def test_min_max_scaler_normalized_fit_transform(new_penguins_df):
231263 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
232264 )
233265
234- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
266+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
235267
236268
237269def test_min_max_scaler_series_normalizes (penguins_df_default_index , new_penguins_df ):
@@ -255,7 +287,7 @@ def test_min_max_scaler_series_normalizes(penguins_df_default_index, new_penguin
255287 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
256288 )
257289
258- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
290+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
259291
260292
261293def test_min_max_scaler_normalizes (penguins_df_default_index , new_penguins_df ):
@@ -290,7 +322,7 @@ def test_min_max_scaler_normalizes(penguins_df_default_index, new_penguins_df):
290322 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
291323 )
292324
293- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
325+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
294326
295327
296328def test_min_max_scaler_save_load (new_penguins_df , dataset_id ):
@@ -305,6 +337,22 @@ def test_min_max_scaler_save_load(new_penguins_df, dataset_id):
305337 assert isinstance (reloaded_transformer , preprocessing .MinMaxScaler )
306338 assert reloaded_transformer ._bqml_model is not None
307339
340+ result = reloaded_transformer .fit_transform (
341+ new_penguins_df [["culmen_length_mm" , "culmen_depth_mm" , "flipper_length_mm" ]]
342+ ).to_pandas ()
343+
344+ expected = pd .DataFrame (
345+ {
346+ "min_max_scaled_culmen_length_mm" : [1.0 , 0.375 , 0.0 ],
347+ "min_max_scaled_culmen_depth_mm" : [1.0 , 0.0 , 0.5625 ],
348+ "min_max_scaled_flipper_length_mm" : [1.0 , 0.0 , 0.466667 ],
349+ },
350+ dtype = "Float64" ,
351+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
352+ )
353+
354+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
355+
308356
309357def test_k_bins_discretizer_normalized_fit_transform_default_params (new_penguins_df ):
310358 discretizer = preprocessing .KBinsDiscretizer (strategy = "uniform" )
@@ -322,7 +370,7 @@ def test_k_bins_discretizer_normalized_fit_transform_default_params(new_penguins
322370 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
323371 )
324372
325- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
373+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
326374
327375
328376def test_k_bins_discretizer_series_normalizes (
@@ -344,7 +392,7 @@ def test_k_bins_discretizer_series_normalizes(
344392 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
345393 )
346394
347- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
395+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
348396
349397
350398def test_k_bins_discretizer_normalizes (penguins_df_default_index , new_penguins_df ):
@@ -374,7 +422,7 @@ def test_k_bins_discretizer_normalizes(penguins_df_default_index, new_penguins_d
374422 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
375423 )
376424
377- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
425+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
378426
379427
380428def test_k_bins_discretizer_normalizes_different_params (
@@ -406,7 +454,7 @@ def test_k_bins_discretizer_normalizes_different_params(
406454 index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
407455 )
408456
409- pd .testing .assert_frame_equal (result , expected , rtol = 1e-3 )
457+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
410458
411459
412460def test_k_bins_discretizer_save_load (new_penguins_df , dataset_id ):
@@ -423,6 +471,22 @@ def test_k_bins_discretizer_save_load(new_penguins_df, dataset_id):
423471 assert reloaded_transformer .strategy == transformer .strategy
424472 assert reloaded_transformer ._bqml_model is not None
425473
474+ result = reloaded_transformer .fit_transform (
475+ new_penguins_df [["culmen_length_mm" , "culmen_depth_mm" , "flipper_length_mm" ]]
476+ ).to_pandas ()
477+
478+ expected = pd .DataFrame (
479+ {
480+ "kbinsdiscretizer_culmen_length_mm" : ["bin_6" , "bin_4" , "bin_2" ],
481+ "kbinsdiscretizer_culmen_depth_mm" : ["bin_6" , "bin_2" , "bin_5" ],
482+ "kbinsdiscretizer_flipper_length_mm" : ["bin_6" , "bin_2" , "bin_4" ],
483+ },
484+ dtype = "string[pyarrow]" ,
485+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
486+ )
487+
488+ pd .testing .assert_frame_equal (result , expected , rtol = 0.1 )
489+
426490
427491def test_one_hot_encoder_default_params (new_penguins_df ):
428492 encoder = preprocessing .OneHotEncoder ()
@@ -560,6 +624,29 @@ def test_one_hot_encoder_save_load(new_penguins_df, dataset_id):
560624 assert reloaded_transformer .max_categories == transformer .max_categories
561625 assert reloaded_transformer ._bqml_model is not None
562626
627+ result = reloaded_transformer .fit_transform (
628+ new_penguins_df [["species" , "sex" ]]
629+ ).to_pandas ()
630+
631+ expected = pd .DataFrame (
632+ {
633+ "onehotencoded_species" : [
634+ [{"index" : 1 , "value" : 1.0 }],
635+ [{"index" : 1 , "value" : 1.0 }],
636+ [{"index" : 2 , "value" : 1.0 }],
637+ ],
638+ "onehotencoded_sex" : [
639+ [{"index" : 2 , "value" : 1.0 }],
640+ [{"index" : 1 , "value" : 1.0 }],
641+ [{"index" : 1 , "value" : 1.0 }],
642+ ],
643+ },
644+ dtype = ONE_HOT_ENCODED_DTYPE ,
645+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
646+ )
647+
648+ pd .testing .assert_frame_equal (result , expected )
649+
563650
564651def test_label_encoder_default_params (new_penguins_df ):
565652 encoder = preprocessing .LabelEncoder ()
@@ -677,5 +764,21 @@ def test_label_encoder_save_load(new_penguins_df, dataset_id):
677764 assert reloaded_transformer .max_categories == transformer .max_categories
678765 assert reloaded_transformer ._bqml_model is not None
679766
767+ result = reloaded_transformer .transform (new_penguins_df ).to_pandas ()
768+
769+ expected = pd .DataFrame (
770+ {
771+ "labelencoded_species" : [
772+ 1 ,
773+ 1 ,
774+ 2 ,
775+ ],
776+ },
777+ dtype = "Int64" ,
778+ index = pd .Index ([1633 , 1672 , 1690 ], name = "tag_number" , dtype = "Int64" ),
779+ )
780+
781+ pd .testing .assert_frame_equal (result , expected )
782+
680783
681784# TODO(garrettwu): add OneHotEncoder tests to compare with sklearn.
0 commit comments