Support gamma objective in LGBMRegressor (#484)

janjagusch · web-flow · commit b5dc2f61ce94 · 2021-08-12T12:01:49.000+02:00
* support objective=gamma in lgbm regressor
* add unit test for objective functions
* add test case for objective='regression'
* set fraction of rows that need to be almost equal to 0.9999
* skip objective test if onnxmltools version is lower than 1.3
Signed-off-by: Jan-Benedikt Jagusch &lt;jan.jagusch@gmail.com&gt;
diff --git a/onnxmltools/convert/lightgbm/_parse.py b/onnxmltools/convert/lightgbm/_parse.py
@@ -27,8 +27,7 @@ def __init__(self, booster):
         if (_model_dict['objective'].startswith('binary') or
                 _model_dict['objective'].startswith('multiclass')):
             self.operator_name = 'LgbmClassifier'
-        elif (_model_dict['objective'].startswith('regression') or
-                _model_dict['objective'].startswith('poisson')):
+        elif _model_dict['objective'].startswith(('regression', 'poisson', 'gamma')):
             self.operator_name = 'LgbmRegressor'
         else:
             # Other objectives are not supported.
diff --git a/onnxmltools/convert/lightgbm/operator_converters/LightGbm.py b/onnxmltools/convert/lightgbm/operator_converters/LightGbm.py
@@ -234,7 +234,7 @@ def convert_lightgbm(scope, operator, container):
         n_classes = 1  # Regressor has only one output variable
         attrs['post_transform'] = 'NONE'
         attrs['n_targets'] = n_classes
-    elif gbm_text['objective'].startswith('poisson'):
+    elif gbm_text['objective'].startswith(('poisson', 'gamma')):
         n_classes = 1  # Regressor has only one output variable
         attrs['n_targets'] = n_classes
         # 'Exp' is not a supported post_transform value in the ONNX spec yet,
diff --git a/tests/lightgbm/test_objective_functions.py b/tests/lightgbm/test_objective_functions.py
@@ -0,0 +1,89 @@
+import unittest
+from typing import Dict, List, Tuple
+
+import numpy as np
+import onnxruntime
+import pandas as pd
+from onnx import ModelProto
+from onnxconverter_common.data_types import DoubleTensorType, TensorType
+from onnxmltools import convert_lightgbm
+from onnxruntime import InferenceSession
+from pandas.core.frame import DataFrame
+
+from lightgbm import LGBMRegressor
+
+_N_ROWS=10_000
+_N_COLS=10
+_N_DECIMALS=5
+_FRAC = 0.9999
+
+_X = pd.DataFrame(np.random.random(size=(_N_ROWS, _N_COLS)))
+_Y = pd.Series(np.random.random(size=_N_ROWS))
+
+_DTYPE_MAP: Dict[str, TensorType] = {
+    "float64": DoubleTensorType,
+}
+
+
+class ObjectiveTest(unittest.TestCase):
+
+    _objectives: Tuple[str] = (
+        "regression",
+        "poisson",
+        "gamma",
+    )
+
+    @staticmethod
+    def _calc_initial_types(X: DataFrame) -> List[Tuple[str, TensorType]]:
+        dtypes = set(str(dtype) for dtype in X.dtypes)
+        if len(dtypes) > 1:
+            raise RuntimeError(f"Test expects homogenous input matrix. Found multiple dtypes: {dtypes}.")
+        dtype = dtypes.pop()
+        tensor_type = _DTYPE_MAP[dtype]
+        return [("input", tensor_type(X.shape))]
+
+    @staticmethod
+    def _predict_with_onnx(model: ModelProto, X: DataFrame) -> np.array:
+        session = InferenceSession(model.SerializeToString())
+        output_names = [s_output.name for s_output in session.get_outputs()]
+        input_names = [s_input.name for s_input in session.get_inputs()]
+        if len(input_names) > 1:
+            raise RuntimeError(f"Test expects one input. Found multiple inputs: {input_names}.")
+        input_name = input_names[0]
+        return session.run(output_names, {input_name: X.values})[0][:, 0]
+
+    @staticmethod
+    def _assert_almost_equal(actual: np.array, desired: np.array, decimal: int=7, frac: float=1.0):
+        """
+        Assert that almost all rows in actual and desired are almost equal to each other.
+
+        Similar to np.testing.assert_almost_equal but allows to define a fraction of rows to be almost
+        equal instead of expecting all rows to be almost equal.
+        """
+        assert 0 <= frac <= 1, "frac must be in range(0, 1)."
+        success_abs = (abs(actual - desired) <= (10 ** -decimal)).sum()
+        success_rel = success_abs / len(actual)
+        assert success_rel >= frac, f"Only {success_abs} out of {len(actual)} rows are almost equal to {decimal} decimals."
+
+    @unittest.skipIf(tuple(int(ver) for ver in onnxruntime.__version__.split(".")) < (1, 3), "not supported in this library version")
+    def test_objective(self):
+        """
+        Test if a LGBMRegressor a with certain objective (e.g. 'poisson') can be converted to ONNX
+        and whether the ONNX graph and the original model produce almost equal predictions.
+
+        Note that this tests is a bit flaky because of precision differences with ONNX and LightGBM
+        and therefore sometimes fails randomly. In these cases, a retry should resolve the issue.
+        """
+        for objective in self._objectives:
+            with self.subTest(X=_X, objective=objective):
+                regressor = LGBMRegressor(objective=objective)
+                regressor.fit(_X, _Y)
+                regressor_onnx: ModelProto = convert_lightgbm(regressor, initial_types=self._calc_initial_types(_X))
+                y_pred = regressor.predict(_X)
+                y_pred_onnx = self._predict_with_onnx(regressor_onnx, _X)
+                self._assert_almost_equal(
+                    y_pred,
+                    y_pred_onnx,
+                    decimal=_N_DECIMALS,
+                    frac=_FRAC,
+                )