Skip to content

Commit fd8bc89

Browse files
Genesis929tswast
andauthored
deps: remove scikit-learn and sqlalchemy as required dependencies (#1296)
* chore: remove unused dependencies. * remove dependency from test constrains. * update test dependencies. * update nox dependencies * update * update trapz * update value error * remove todo * update dep * update test env * code update * remove scipy in test * add scipt=1.7.1 to test constraints * update scipy version * update constraint * update scipy test * skip some tests when sklearn doesn't exists * Remove sklearn for unit except 3.12 Co-authored-by: Tim Sweña (Swast) <[email protected]> * update auc * remove scipy again --------- Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent 2659ea6 commit fd8bc89

File tree

11 files changed

+69
-26
lines changed

11 files changed

+69
-26
lines changed

bigframes/ml/metrics/_metrics.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import bigframes_vendored.sklearn.metrics._regression as vendored_metrics_regression
2626
import numpy as np
2727
import pandas as pd
28-
import sklearn.metrics as sklearn_metrics # type: ignore
2928

3029
from bigframes.ml import utils
3130
import bigframes.pandas as bpd
@@ -176,9 +175,9 @@ def auc(
176175
) -> float:
177176
x_series, y_series = utils.batch_convert_to_series(x, y)
178177

179-
# TODO(b/286410053) Support ML exceptions and error handling.
180-
auc = sklearn_metrics.auc(x_series.to_pandas(), y_series.to_pandas())
181-
return auc
178+
x_pandas = x_series.to_pandas()
179+
y_pandas = y_series.to_pandas()
180+
return vendored_metrics_ranking.auc(x_pandas, y_pandas)
182181

183182

184183
auc.__doc__ = inspect.getdoc(vendored_metrics_ranking.auc)

noxfile.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@
7272
UNIT_TEST_LOCAL_DEPENDENCIES: List[str] = []
7373
UNIT_TEST_DEPENDENCIES: List[str] = []
7474
UNIT_TEST_EXTRAS: List[str] = []
75-
UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {"3.12": ["polars"]}
75+
UNIT_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
76+
"3.12": ["polars", "scikit-learn"],
77+
}
7678

7779
# 3.10 is needed for Windows tests as it is the only version installed in the
7880
# bigframes-windows container image. For more information, search
@@ -96,8 +98,13 @@
9698
]
9799
SYSTEM_TEST_LOCAL_DEPENDENCIES: List[str] = []
98100
SYSTEM_TEST_DEPENDENCIES: List[str] = []
99-
SYSTEM_TEST_EXTRAS: List[str] = ["tests"]
100-
SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {}
101+
SYSTEM_TEST_EXTRAS: List[str] = []
102+
SYSTEM_TEST_EXTRAS_BY_PYTHON: Dict[str, List[str]] = {
103+
"3.9": ["tests"],
104+
"3.10": ["tests"],
105+
"3.12": ["tests", "scikit-learn"],
106+
"3.13": ["tests"],
107+
}
101108

102109
LOGGING_NAME_ENV_VAR = "BIGFRAMES_PERFORMANCE_LOG_NAME"
103110

@@ -468,8 +475,7 @@ def cover(session):
468475
@nox.session(python=DEFAULT_PYTHON_VERSION)
469476
def docs(session):
470477
"""Build the docs for this library."""
471-
472-
session.install("-e", ".")
478+
session.install("-e", ".[scikit-learn]")
473479
session.install(
474480
# We need to pin to specific versions of the `sphinxcontrib-*` packages
475481
# which still support sphinx 4.x.
@@ -510,7 +516,7 @@ def docs(session):
510516
def docfx(session):
511517
"""Build the docfx yaml files for this library."""
512518

513-
session.install("-e", ".")
519+
session.install("-e", ".[scikit-learn]")
514520
session.install(
515521
# We need to pin to specific versions of the `sphinxcontrib-*` packages
516522
# which still support sphinx 4.x.
@@ -652,6 +658,8 @@ def prerelease(session: nox.sessions.Session, tests_path, extra_pytest_options=(
652658
if match.group(1) not in already_installed
653659
]
654660

661+
print(already_installed)
662+
655663
# We use --no-deps to ensure that pre-release versions aren't overwritten
656664
# by the version ranges in setup.py.
657665
session.install(*deps)

scripts/test_publish_api_coverage.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
from . import publish_api_coverage
2121

22+
pytest.importorskip("sklearn")
23+
2224

2325
@pytest.fixture
2426
def api_coverage_df():

setup.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@
5555
"pyarrow >=10.0.1",
5656
"pydata-google-auth >=1.8.2",
5757
"requests >=2.27.1",
58-
"scikit-learn >=1.2.2",
59-
"sqlalchemy >=1.4,<3.0dev",
6058
"sqlglot >=23.6.3",
6159
"tabulate >=0.9",
6260
"ipywidgets >=7.7.1",
@@ -77,8 +75,15 @@
7775
"tests": [],
7876
# used for local engine, which is only needed for unit tests at present.
7977
"polars": ["polars >= 1.7.0"],
78+
"scikit-learn": ["scikit-learn>=1.2.2"],
8079
# Packages required for basic development flow.
81-
"dev": ["pytest", "pytest-mock", "pre-commit", "nox", "google-cloud-testutils"],
80+
"dev": [
81+
"pytest",
82+
"pytest-mock",
83+
"pre-commit",
84+
"nox",
85+
"google-cloud-testutils",
86+
],
8287
}
8388
extras["all"] = list(sorted(frozenset(itertools.chain.from_iterable(extras.values()))))
8489

testing/constraints-3.9.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ pyarrow==10.0.1
2020
pydata-google-auth==1.8.2
2121
requests==2.27.1
2222
scikit-learn==1.2.2
23-
sqlalchemy==1.4
2423
sqlglot==23.6.3
2524
tabulate==0.9
2625
ipywidgets==7.7.1

tests/system/small/ml/test_metrics.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import numpy as np
1818
import pandas as pd
1919
import pytest
20-
import sklearn.metrics as sklearn_metrics # type: ignore
2120

2221
import bigframes
2322
from bigframes.ml import metrics
@@ -66,6 +65,7 @@ def test_r2_score_force_finite(session):
6665

6766

6867
def test_r2_score_ok_fit_matches_sklearn(session):
68+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
6969
pd_df = pd.DataFrame({"y_true": [1, 2, 3, 4, 5], "y_pred": [2, 3, 4, 3, 6]})
7070

7171
df = session.read_pandas(pd_df)
@@ -113,6 +113,7 @@ def test_accuracy_score_not_normailze(session):
113113

114114

115115
def test_accuracy_score_fit_matches_sklearn(session):
116+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
116117
pd_df = pd.DataFrame({"y_true": [1, 2, 3, 4, 5], "y_pred": [2, 3, 4, 3, 6]})
117118

118119
df = session.read_pandas(pd_df)
@@ -203,6 +204,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session):
203204

204205

205206
def test_roc_curve_binary_classification_prediction_matches_sklearn(session):
207+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
206208
pd_df = pd.DataFrame(
207209
{
208210
"y_true": [0, 0, 1, 1, 0, 1, 0, 1, 1, 1],
@@ -294,6 +296,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session):
294296

295297

296298
def test_roc_curve_binary_classification_decision_matches_sklearn(session):
299+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
297300
# Instead of operating on probabilities, assume a 70% decision threshold
298301
# has been applied, and operate on the final output
299302
y_score = [0.1, 0.4, 0.35, 0.8, 0.65, 0.9, 0.5, 0.3, 0.6, 0.45]
@@ -420,6 +423,7 @@ def test_roc_auc_score_returns_expected(session):
420423

421424

422425
def test_roc_auc_score_returns_matches_sklearn(session):
426+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
423427
pd_df = pd.DataFrame(
424428
{
425429
"y_true": [0, 0, 1, 1, 0, 1, 0, 1, 1, 1],
@@ -525,6 +529,7 @@ def test_confusion_matrix_column_index(session):
525529

526530

527531
def test_confusion_matrix_matches_sklearn(session):
532+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
528533
pd_df = pd.DataFrame(
529534
{
530535
"y_true": [2, 3, 3, 3, 4, 1],
@@ -543,6 +548,7 @@ def test_confusion_matrix_matches_sklearn(session):
543548

544549

545550
def test_confusion_matrix_str_matches_sklearn(session):
551+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
546552
pd_df = pd.DataFrame(
547553
{
548554
"y_true": ["cat", "ant", "cat", "cat", "ant", "bird"],
@@ -603,6 +609,7 @@ def test_recall_score(session):
603609

604610

605611
def test_recall_score_matches_sklearn(session):
612+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
606613
pd_df = pd.DataFrame(
607614
{
608615
"y_true": [2, 0, 2, 2, 0, 1],
@@ -620,6 +627,7 @@ def test_recall_score_matches_sklearn(session):
620627

621628

622629
def test_recall_score_str_matches_sklearn(session):
630+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
623631
pd_df = pd.DataFrame(
624632
{
625633
"y_true": ["cat", "ant", "cat", "cat", "ant", "bird"],
@@ -673,6 +681,7 @@ def test_precision_score(session):
673681

674682

675683
def test_precision_score_matches_sklearn(session):
684+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
676685
pd_df = pd.DataFrame(
677686
{
678687
"y_true": [2, 0, 2, 2, 0, 1],
@@ -695,6 +704,7 @@ def test_precision_score_matches_sklearn(session):
695704

696705

697706
def test_precision_score_str_matches_sklearn(session):
707+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
698708
pd_df = pd.DataFrame(
699709
{
700710
"y_true": ["cat", "ant", "cat", "cat", "ant", "bird"],
@@ -752,6 +762,7 @@ def test_f1_score(session):
752762

753763

754764
def test_f1_score_matches_sklearn(session):
765+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
755766
pd_df = pd.DataFrame(
756767
{
757768
"y_true": [2, 0, 2, 2, 0, 1],
@@ -769,6 +780,7 @@ def test_f1_score_matches_sklearn(session):
769780

770781

771782
def test_f1_score_str_matches_sklearn(session):
783+
sklearn_metrics = pytest.importorskip("sklearn.metrics")
772784
pd_df = pd.DataFrame(
773785
{
774786
"y_true": ["cat", "ant", "cat", "cat", "ant", "bird"],

tests/system/small/test_series.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,8 @@ def test_series_replace_dict(scalars_dfs, replacement_dict):
641641
),
642642
)
643643
def test_series_interpolate(method):
644+
pytest.importorskip("scipy")
645+
644646
values = [None, 1, 2, None, None, 16, None]
645647
index = [-3.2, 11.4, 3.56, 4, 4.32, 5.55, 76.8]
646648
pd_series = pd.Series(values, index)

tests/unit/ml/test_api_primitives.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
# limitations under the License.
1414

1515
import pytest
16-
import sklearn.decomposition as sklearn_decomposition # type: ignore
17-
import sklearn.linear_model as sklearn_linear_model # type: ignore
1816

1917
import bigframes.ml.decomposition
2018
import bigframes.ml.linear_model
@@ -35,8 +33,9 @@ def test_base_estimator_repr():
3533
assert pca_estimator.__repr__() == "PCA(n_components=7)"
3634

3735

38-
@pytest.mark.skipif(sklearn_linear_model is None, reason="requires sklearn")
3936
def test_base_estimator_repr_matches_sklearn():
37+
sklearn_decomposition = pytest.importorskip("sklearn.decomposition")
38+
sklearn_linear_model = pytest.importorskip("sklearn.linear_model")
4039
estimator = bigframes.ml.linear_model.LinearRegression()
4140
sklearn_estimator = sklearn_linear_model.LinearRegression()
4241
assert estimator.__repr__() == sklearn_estimator.__repr__()

tests/unit/ml/test_compose.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515

1616
from google.cloud import bigquery
1717
import pytest
18-
import sklearn.compose as sklearn_compose # type: ignore
19-
import sklearn.preprocessing as sklearn_preprocessing # type: ignore
2018

2119
from bigframes.ml import compose, preprocessing
2220
from bigframes.ml.compose import ColumnTransformer, SQLScalarColumnTransformer
@@ -119,6 +117,8 @@ def test_columntransformer_repr():
119117

120118

121119
def test_columntransformer_repr_matches_sklearn():
120+
sklearn_compose = pytest.importorskip("sklearn.compose")
121+
sklearn_preprocessing = pytest.importorskip("sklearn.preprocessing")
122122
bf_column_transformer = compose.ColumnTransformer(
123123
[
124124
(

tests/unit/ml/test_pipeline.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@
1313
# limitations under the License.
1414

1515
import pytest
16-
import sklearn.compose as sklearn_compose # type: ignore
17-
import sklearn.linear_model as sklearn_linear_model # type: ignore
18-
import sklearn.pipeline as sklearn_pipeline # type: ignore
19-
import sklearn.preprocessing as sklearn_preprocessing # type: ignore
2016

2117
from bigframes.ml import compose, forecasting, linear_model, pipeline, preprocessing
2218

@@ -57,8 +53,11 @@ def test_pipeline_repr():
5753
)
5854

5955

60-
@pytest.mark.skipif(sklearn_pipeline is None, reason="requires sklearn")
6156
def test_pipeline_repr_matches_sklearn():
57+
sklearn_compose = pytest.importorskip("sklearn.compose")
58+
sklearn_linear_model = pytest.importorskip("sklearn.linear_model")
59+
sklearn_pipeline = pytest.importorskip("sklearn.pipeline")
60+
sklearn_preprocessing = pytest.importorskip("sklearn.preprocessing")
6261
bf_pl = pipeline.Pipeline(
6362
[
6463
(

0 commit comments

Comments
 (0)