Upgrade LightGBM and remove nullable handling (#4237)

eccabay · web-flow · commit 9c4cfb561efb · 2023-07-14T15:19:37.000-04:00
* Remove nullable type incompatibility tests

* Remove nullable type handling from lightgbm

* Bump min lightgbm version to 4.0.0
diff --git a/.github/meta.yaml b/.github/meta.yaml
@@ -75,7 +75,7 @@ outputs:
         - ipywidgets >=7.5, <8.0.5
         - xgboost >=1.7.0
         - catboost >=1.1.1
-        - lightgbm >=2.3.1
+        - lightgbm >=4.0.0
         - lime >=0.2.0.1
         - python >=3.8.*
         - imbalanced-learn >=0.9.1, <0.11.0
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -6,6 +6,7 @@ Release Notes
     * Fixes
     * Changes
         * Unpinned sktime version :pr:`4214`
+        * Bumped minimum lightgbm version to 4.0.0 for nullable type handling :pr:`4237`
     * Documentation Changes
     * Testing Changes
 
diff --git a/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py b/evalml/pipelines/components/estimators/classifiers/lightgbm_classifier.py
@@ -84,11 +84,6 @@ class LightGBMClassifier(Estimator):
     SEED_MAX = SEED_BOUNDS.max_bound
     """SEED_BOUNDS.max_bound"""
 
-    # Incompatibility: https://github.com/alteryx/evalml/issues/3924
-    # TODO: Remove when support is added https://github.com/alteryx/evalml/issues/4017
-    _integer_nullable_incompatibilities = ["X", "y"]
-    _boolean_nullable_incompatibilities = ["X"]
-
     def __init__(
         self,
         boosting_type="gbdt",
@@ -191,9 +186,8 @@ def fit(self, X, y=None):
         X = infer_feature_types(X)
         if y is not None:
             y = infer_feature_types(y)
-        X_d, y_d = self._handle_nullable_types(X, y)
-        X_encoded = self._encode_categories(X_d, fit=True)
-        y_encoded = self._encode_labels(y_d)
+        X_encoded = self._encode_categories(X, fit=True)
+        y_encoded = self._encode_labels(y)
         self._component_obj.fit(X_encoded, y_encoded)
         return self
 
@@ -207,8 +201,7 @@ def predict(self, X):
             pd.DataFrame: Predicted values.
         """
         X_encoded = self._encode_categories(X)
-        X_d, _ = self._handle_nullable_types(X_encoded)
-        predictions = super().predict(X_d)
+        predictions = super().predict(X_encoded)
         if not self._label_encoder:
             return predictions
         predictions = self._label_encoder.inverse_transform(
@@ -226,5 +219,4 @@ def predict_proba(self, X):
             pd.DataFrame: Predicted probability values.
         """
         X_encoded = self._encode_categories(X)
-        X_d, _ = self._handle_nullable_types(X_encoded)
-        return super().predict_proba(X_d)
+        return super().predict_proba(X_encoded)
diff --git a/evalml/pipelines/components/estimators/regressors/lightgbm_regressor.py b/evalml/pipelines/components/estimators/regressors/lightgbm_regressor.py
@@ -74,11 +74,6 @@ class LightGBMRegressor(Estimator):
     SEED_MAX = SEED_BOUNDS.max_bound
     """SEED_BOUNDS.max_bound"""
 
-    # Incompatibility: https://github.com/alteryx/evalml/issues/3924
-    # TODO: Remove when support is added https://github.com/alteryx/evalml/issues/4017
-    _integer_nullable_incompatibilities = ["X", "y"]
-    _boolean_nullable_incompatibilities = ["X", "y"]
-
     def __init__(
         self,
         boosting_type="gbdt",
@@ -169,8 +164,7 @@ def fit(self, X, y=None):
         X_encoded = self._encode_categories(X, fit=True)
         if y is not None:
             y = infer_feature_types(y)
-        X_d, y_d = self._handle_nullable_types(X_encoded, y)
-        self._component_obj.fit(X_d, y_d)
+        self._component_obj.fit(X_encoded, y)
         return self
 
     def predict(self, X):
@@ -183,5 +177,4 @@ def predict(self, X):
             pd.Series: Predicted values.
         """
         X_encoded = self._encode_categories(X)
-        X_d, _ = self._handle_nullable_types(X_encoded)
-        return super().predict(X_d)
+        return super().predict(X_encoded)
diff --git a/evalml/tests/component_tests/test_lgbm_classifier.py b/evalml/tests/component_tests/test_lgbm_classifier.py
@@ -352,42 +352,3 @@ def test_lgbm_with_nullable_types(
 
     assert not preds.isnull().any().any()
     assert not pred_probs.isnull().any().any()
-
-
-@pytest.mark.parametrize(
-    "nullable_y_ltype",
-    ["IntegerNullable", "AgeNullable"],
-)
-@pytest.mark.parametrize(
-    "handle_incompatibility",
-    [
-        True,
-        pytest.param(
-            False,
-            marks=pytest.mark.xfail(strict=True, raises=ValueError),
-        ),
-    ],
-)
-def test_lgbm_classifier_nullable_type_incompatibility(
-    nullable_type_target,
-    nullable_type_test_data,
-    lgbm,
-    handle_incompatibility,
-    nullable_y_ltype,
-):
-    """Testing that the nullable type incompatibility that caused us to add handling for LightGBMClassifier
-    is still present in sklearn's LGBMClassifier component. If this test is causing the test suite to fail
-    because the code below no longer raises the expected ValueError, we should confirm that the nullable
-    types now work for our use case and remove the nullable type handling logic from LightGBMClassifier.
-    """
-    y = nullable_type_target(ltype=nullable_y_ltype, has_nans=False)
-    X = nullable_type_test_data(has_nans=False)
-    X = X.ww.select(include=["numeric", "Boolean", "BooleanNullable"])
-
-    if handle_incompatibility:
-        lgb = LightGBMClassifier()
-        X, y = lgb._handle_nullable_types(X, y)
-
-    sk_lgb = lgbm.sklearn.LGBMClassifier()
-    sk_lgb.fit(X, y)
-    sk_lgb.predict(X)
diff --git a/evalml/tests/component_tests/test_lgbm_regressor.py b/evalml/tests/component_tests/test_lgbm_regressor.py
@@ -250,42 +250,3 @@ def test_lgbm_with_nullable_types(
     preds = lgb.predict(X.ww.copy())
 
     assert not preds.isnull().any().any()
-
-
-@pytest.mark.parametrize(
-    "nullable_y_ltype",
-    ["IntegerNullable", "AgeNullable", "BooleanNullable"],
-)
-@pytest.mark.parametrize(
-    "handle_incompatibility",
-    [
-        True,
-        pytest.param(
-            False,
-            marks=pytest.mark.xfail(strict=True, raises=ValueError),
-        ),
-    ],
-)
-def test_lgbm_regressor_nullable_type_incompatibility(
-    nullable_type_target,
-    nullable_type_test_data,
-    lgbm,
-    handle_incompatibility,
-    nullable_y_ltype,
-):
-    """Testing that the nullable type incompatibility that caused us to add handling for LightGBMRegressor
-    is still present in sklearn's LGBMRegressor component. If this test is causing the test suite to fail
-    because the code below no longer raises the expected ValueError, we should confirm that the nullable
-    types now work for our use case and remove the nullable type handling logic from LightGBMRegressor.
-    """
-    y = nullable_type_target(ltype=nullable_y_ltype, has_nans=False)
-    X = nullable_type_test_data(has_nans=False)
-    X = X.ww.select(include=["numeric", "Boolean", "BooleanNullable"])
-
-    if handle_incompatibility:
-        lgb = LightGBMRegressor()
-        X, y = lgb._handle_nullable_types(X, y)
-
-    sk_lgb = lgbm.sklearn.LGBMRegressor()
-    sk_lgb.fit(X, y)
-    sk_lgb.predict(X)
diff --git a/evalml/tests/dependency_update_check/latest_dependency_versions.txt b/evalml/tests/dependency_update_check/latest_dependency_versions.txt
@@ -12,7 +12,7 @@ holidays==0.20
 imbalanced-learn==0.10.1
 ipywidgets==8.0.4
 kaleido==0.2.1
-lightgbm==3.3.5
+lightgbm==4.0.0
 lime==0.2.0.1
 matplotlib==3.7.2
 matplotlib-inline==0.1.6
diff --git a/evalml/tests/dependency_update_check/minimum_requirements.txt b/evalml/tests/dependency_update_check/minimum_requirements.txt
@@ -12,7 +12,7 @@ holidays==0.13
 imbalanced-learn==0.9.1
 ipywidgets==7.5
 kaleido==0.1.0
-lightgbm==2.3.1
+lightgbm==4.0.0
 lime==0.2.0.1
 matplotlib==3.3.3
 networkx==2.6
diff --git a/evalml/tests/dependency_update_check/minimum_test_requirements.txt b/evalml/tests/dependency_update_check/minimum_test_requirements.txt
@@ -15,7 +15,7 @@ holidays==0.13
 imbalanced-learn==0.9.1
 ipywidgets==7.5
 kaleido==0.1.0
-lightgbm==2.3.1
+lightgbm==4.0.0
 lime==0.2.0.1
 matplotlib==3.3.3
 nbval==0.9.3
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,7 +52,7 @@ dependencies = [
     "ipywidgets >= 7.5, < 8.0.5",
     "xgboost >= 1.7.0",
     "catboost >= 1.1.1",
-    "lightgbm >= 2.3.1",
+    "lightgbm >= 4.0.0",
     "matplotlib >= 3.3.3",
     "graphviz >= 0.13; platform_system!='Windows'",
     "seaborn >= 0.11.1",