Skip to content

Commit c254e9c

Browse files
authored
chore: cleanup type errors in tests/system/small/ml/test_metrics.py (#755)
1 parent 61f18cb commit c254e9c

File tree

2 files changed

+29
-49
lines changed

2 files changed

+29
-49
lines changed

bigframes/ml/metrics/_metrics.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def recall_score(
227227
y_true: Union[bpd.DataFrame, bpd.Series],
228228
y_pred: Union[bpd.DataFrame, bpd.Series],
229229
*,
230-
average: str = "binary",
230+
average: typing.Optional[str] = "binary",
231231
) -> pd.Series:
232232
# TODO(ashleyxu): support more average type, default to "binary"
233233
if average is not None:
@@ -264,7 +264,7 @@ def precision_score(
264264
y_true: Union[bpd.DataFrame, bpd.Series],
265265
y_pred: Union[bpd.DataFrame, bpd.Series],
266266
*,
267-
average: str = "binary",
267+
average: typing.Optional[str] = "binary",
268268
) -> pd.Series:
269269
# TODO(ashleyxu): support more average type, default to "binary"
270270
if average is not None:
@@ -303,7 +303,7 @@ def f1_score(
303303
y_true: Union[bpd.DataFrame, bpd.Series],
304304
y_pred: Union[bpd.DataFrame, bpd.Series],
305305
*,
306-
average: str = "binary",
306+
average: typing.Optional[str] = "binary",
307307
) -> pd.Series:
308308
# TODO(ashleyxu): support more average type, default to "binary"
309309
y_true_series, y_pred_series = utils.convert_to_series(y_true, y_pred)

tests/system/small/ml/test_metrics.py

Lines changed: 26 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -219,23 +219,21 @@ def test_roc_curve_binary_classification_prediction_matches_sklearn(session):
219219
)
220220

221221
# sklearn returns float64 np arrays
222-
np_fpr = fpr.to_pandas().astype("float64").array
223-
np_tpr = tpr.to_pandas().astype("float64").array
222+
np_fpr = fpr.to_pandas().astype("float64").array.to_numpy()
223+
np_tpr = tpr.to_pandas().astype("float64").array.to_numpy()
224224
np_thresholds = thresholds.to_pandas().astype("float64").array
225225

226226
np.testing.assert_array_equal(
227227
# skip testing the first value, as it is redundant and inconsistent across sklearn versions
228228
np_thresholds[1:],
229229
expected_thresholds[1:],
230230
)
231-
# TODO(b/340872435): fix type error
232231
np.testing.assert_array_equal(
233-
np_fpr, # type: ignore
232+
np_fpr,
234233
expected_fpr,
235234
)
236-
# TODO(b/340872435): fix type error
237235
np.testing.assert_array_equal(
238-
np_tpr, # type: ignore
236+
np_tpr,
239237
expected_tpr,
240238
)
241239

@@ -316,23 +314,21 @@ def test_roc_curve_binary_classification_decision_matches_sklearn(session):
316314
)
317315

318316
# sklearn returns float64 np arrays
319-
np_fpr = fpr.to_pandas().astype("float64").array
320-
np_tpr = tpr.to_pandas().astype("float64").array
317+
np_fpr = fpr.to_pandas().astype("float64").array.to_numpy()
318+
np_tpr = tpr.to_pandas().astype("float64").array.to_numpy()
321319
np_thresholds = thresholds.to_pandas().astype("float64").array
322320

323321
np.testing.assert_array_equal(
324322
# skip testing the first value, as it is redundant and inconsistent across sklearn versions
325323
np_thresholds[1:],
326324
expected_thresholds[1:],
327325
)
328-
# TODO(b/340872435): fix type error
329326
np.testing.assert_array_equal(
330-
np_fpr, # type: ignore
327+
np_fpr,
331328
expected_fpr,
332329
)
333-
# TODO(b/340872435): fix type error
334330
np.testing.assert_array_equal(
335-
np_tpr, # type: ignore
331+
np_tpr,
336332
expected_tpr,
337333
)
338334

@@ -519,14 +515,10 @@ def test_confusion_matrix_column_index(session):
519515
).astype("Int64")
520516
df = session.read_pandas(pd_df)
521517
confusion_matrix = metrics.confusion_matrix(df[["y_true"]], df[["y_pred"]])
522-
# TODO(b/340872435): fix type error
523-
expected_pd_df = (
524-
pd.DataFrame( # type: ignore
525-
{1: [1, 0, 1, 0], 2: [0, 0, 2, 0], 3: [0, 0, 0, 0], 4: [0, 1, 0, 1]}
526-
)
527-
.astype("int64")
528-
.set_index([pd.Index([1, 2, 3, 4])])
529-
)
518+
expected_pd_df = pd.DataFrame(
519+
{1: [1, 0, 1, 0], 2: [0, 0, 2, 0], 3: [0, 0, 0, 0], 4: [0, 1, 0, 1]},
520+
index=[1, 2, 3, 4],
521+
).astype("int64")
530522
pd.testing.assert_frame_equal(
531523
confusion_matrix, expected_pd_df, check_index_type=False
532524
)
@@ -562,9 +554,8 @@ def test_confusion_matrix_str_matches_sklearn(session):
562554
expected_confusion_matrix = sklearn_metrics.confusion_matrix(
563555
pd_df[["y_true"]], pd_df[["y_pred"]]
564556
)
565-
# TODO(b/340872435): fix type error
566-
expected_pd_df = pd.DataFrame(expected_confusion_matrix).set_index( # type: ignore
567-
[pd.Index(["ant", "bird", "cat"])]
557+
expected_pd_df = pd.DataFrame(
558+
expected_confusion_matrix, index=["ant", "bird", "cat"]
568559
)
569560
expected_pd_df.columns = pd.Index(["ant", "bird", "cat"])
570561
pd.testing.assert_frame_equal(
@@ -601,9 +592,8 @@ def test_recall_score(session):
601592
}
602593
).astype("Int64")
603594
df = session.read_pandas(pd_df)
604-
# TODO(b/340872435): fix type error
605595
recall = metrics.recall_score(
606-
df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None # type: ignore
596+
df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None
607597
)
608598
expected_values = [1.000000, 0.000000, 0.666667]
609599
expected_index = [0, 1, 2]
@@ -620,8 +610,7 @@ def test_recall_score_matches_sklearn(session):
620610
}
621611
).astype("Int64")
622612
df = session.read_pandas(pd_df)
623-
# TODO(b/340872435): fix type error
624-
recall = metrics.recall_score(df[["y_true"]], df[["y_pred"]], average=None) # type: ignore
613+
recall = metrics.recall_score(df[["y_true"]], df[["y_pred"]], average=None)
625614
expected_values = sklearn_metrics.recall_score(
626615
pd_df[["y_true"]], pd_df[["y_pred"]], average=None
627616
)
@@ -638,8 +627,7 @@ def test_recall_score_str_matches_sklearn(session):
638627
}
639628
).astype("str")
640629
df = session.read_pandas(pd_df)
641-
# TODO(b/340872435): fix type error
642-
recall = metrics.recall_score(df[["y_true"]], df[["y_pred"]], average=None) # type: ignore
630+
recall = metrics.recall_score(df[["y_true"]], df[["y_pred"]], average=None)
643631
expected_values = sklearn_metrics.recall_score(
644632
pd_df[["y_true"]], pd_df[["y_pred"]], average=None
645633
)
@@ -656,8 +644,7 @@ def test_recall_score_series(session):
656644
}
657645
).astype("Int64")
658646
df = session.read_pandas(pd_df)
659-
# TODO(b/340872435): fix type error
660-
recall = metrics.recall_score(df["y_true"], df["y_pred"], average=None) # type: ignore
647+
recall = metrics.recall_score(df["y_true"], df["y_pred"], average=None)
661648
expected_values = [1.000000, 0.000000, 0.666667]
662649
expected_index = [0, 1, 2]
663650
expected_recall = pd.Series(expected_values, index=expected_index)
@@ -673,9 +660,8 @@ def test_precision_score(session):
673660
}
674661
).astype("Int64")
675662
df = session.read_pandas(pd_df)
676-
# TODO(b/340872435): fix type error
677663
precision_score = metrics.precision_score(
678-
df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None # type: ignore
664+
df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None
679665
)
680666
expected_values = [0.666667, 0.000000, 0.666667]
681667
expected_index = [0, 1, 2]
@@ -696,7 +682,7 @@ def test_precision_score_matches_sklearn(session):
696682
df = session.read_pandas(pd_df)
697683
# TODO(b/340872435): fix type error
698684
precision_score = metrics.precision_score(
699-
df[["y_true"]], df[["y_pred"]], average=None # type: ignore
685+
df[["y_true"]], df[["y_pred"]], average=None
700686
)
701687
expected_values = sklearn_metrics.precision_score(
702688
pd_df[["y_true"]], pd_df[["y_pred"]], average=None
@@ -716,9 +702,8 @@ def test_precision_score_str_matches_sklearn(session):
716702
}
717703
).astype("str")
718704
df = session.read_pandas(pd_df)
719-
# TODO(b/340872435): fix type error
720705
precision_score = metrics.precision_score(
721-
df[["y_true"]], df[["y_pred"]], average=None # type: ignore
706+
df[["y_true"]], df[["y_pred"]], average=None
722707
)
723708
expected_values = sklearn_metrics.precision_score(
724709
pd_df[["y_true"]], pd_df[["y_pred"]], average=None
@@ -738,8 +723,7 @@ def test_precision_score_series(session):
738723
}
739724
).astype("Int64")
740725
df = session.read_pandas(pd_df)
741-
# TODO(b/340872435): fix type error
742-
precision_score = metrics.precision_score(df["y_true"], df["y_pred"], average=None) # type: ignore
726+
precision_score = metrics.precision_score(df["y_true"], df["y_pred"], average=None)
743727
expected_values = [0.666667, 0.000000, 0.666667]
744728
expected_index = [0, 1, 2]
745729
expected_precision = pd.Series(expected_values, index=expected_index)
@@ -757,9 +741,8 @@ def test_f1_score(session):
757741
}
758742
).astype("Int64")
759743
df = session.read_pandas(pd_df)
760-
# TODO(b/340872435): fix type error
761744
f1_score = metrics.f1_score(
762-
df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None # type: ignore
745+
df[["y_true_arbitrary_name"]], df[["y_pred_arbitrary_name"]], average=None
763746
)
764747
expected_values = [0.8, 0.000000, 0.666667]
765748
expected_index = [0, 1, 2]
@@ -776,8 +759,7 @@ def test_f1_score_matches_sklearn(session):
776759
}
777760
).astype("Int64")
778761
df = session.read_pandas(pd_df)
779-
# TODO(b/340872435): fix type error
780-
f1_score = metrics.f1_score(df[["y_true"]], df[["y_pred"]], average=None) # type: ignore
762+
f1_score = metrics.f1_score(df[["y_true"]], df[["y_pred"]], average=None)
781763
expected_values = sklearn_metrics.f1_score(
782764
pd_df[["y_true"]], pd_df[["y_pred"]], average=None
783765
)
@@ -794,8 +776,7 @@ def test_f1_score_str_matches_sklearn(session):
794776
}
795777
).astype("str")
796778
df = session.read_pandas(pd_df)
797-
# TODO(b/340872435): fix type error
798-
f1_score = metrics.f1_score(df[["y_true"]], df[["y_pred"]], average=None) # type: ignore
779+
f1_score = metrics.f1_score(df[["y_true"]], df[["y_pred"]], average=None)
799780
expected_values = sklearn_metrics.f1_score(
800781
pd_df[["y_true"]], pd_df[["y_pred"]], average=None
801782
)
@@ -812,8 +793,7 @@ def test_f1_score_series(session):
812793
}
813794
).astype("Int64")
814795
df = session.read_pandas(pd_df)
815-
# TODO(b/340872435): fix type error
816-
f1_score = metrics.f1_score(df["y_true"], df["y_pred"], average=None) # type: ignore
796+
f1_score = metrics.f1_score(df["y_true"], df["y_pred"], average=None)
817797
expected_values = [0.8, 0.000000, 0.666667]
818798
expected_index = [0, 1, 2]
819799
expected_f1 = pd.Series(expected_values, index=expected_index)

0 commit comments

Comments
 (0)