Fixes #421, support unsigned integer as class type (#426)

xadupre · sdpython · wenbingl · web-flow · commit a8493581e8ac · 2021-01-06T17:05:52.000-08:00
* Add example on xgboost mnist Signed-off-by: xavier dupré <xavier.dupre@gmail.com> * fixes #421, allow unsigned integers as class label Signed-off-by: xavier dupré <xavier.dupre@gmail.com> Co-authored-by: xavier dupré <xavier.dupre@gmail.com> Co-authored-by: Wenbing Li <10278425+wenbingl@users.noreply.github.com>
diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py
@@ -242,7 +242,7 @@ def convert(scope, operator, container):
 
         classes = xgb_node.classes_
         if (np.issubdtype(classes.dtype, np.floating) or
-                np.issubdtype(classes.dtype, np.signedinteger)):
+                np.issubdtype(classes.dtype, np.integer)):
             attr_pairs['classlabels_int64s'] = classes.astype('int')
         else:
             classes = np.array([s.encode('utf-8') for s in classes])
diff --git a/onnxmltools/convert/xgboost/shape_calculators/Classifier.py b/onnxmltools/convert/xgboost/shape_calculators/Classifier.py
@@ -34,7 +34,7 @@ def calculate_xgboost_classifier_output_shapes(operator):
             ncl = 2
     classes = xgb_node.classes_
     if (np.issubdtype(classes.dtype, np.floating) or
-            np.issubdtype(classes.dtype, np.signedinteger)):
+            np.issubdtype(classes.dtype, np.integer)):
         operator.outputs[0].type = Int64TensorType(shape=[N])
     else:
         operator.outputs[0].type = StringTensorType(shape=[N])
diff --git a/tests/xgboost/test_xgboost_converters.py b/tests/xgboost/test_xgboost_converters.py
@@ -6,21 +6,25 @@
 import unittest
 import numpy as np
 import pandas
-from sklearn.datasets import load_diabetes, load_iris, make_classification
+from sklearn.datasets import (
+    load_diabetes, load_iris, make_classification, load_digits)
 from sklearn.model_selection import train_test_split
 from xgboost import XGBRegressor, XGBClassifier, train, DMatrix
+from sklearn.preprocessing import StandardScaler
 from onnxmltools.convert import convert_xgboost
 from onnxmltools.convert.common.data_types import FloatTensorType
 from onnxmltools.utils import dump_data_and_model
 
 
-def _fit_classification_model(model, n_classes, is_str=False):
+def _fit_classification_model(model, n_classes, is_str=False, dtype=None):
     x, y = make_classification(n_classes=n_classes, n_features=100,
                                n_samples=1000,
                                random_state=42, n_informative=7)
     y = y.astype(np.str) if is_str else y.astype(np.int64)
     x_train, x_test, y_train, _ = train_test_split(x, y, test_size=0.5,
                                                    random_state=42)
+    if dtype is not None:
+        y_train = y_train.astype(dtype)
     model.fit(x_train, y_train)
     return model, x_test.astype(np.float32)
 
@@ -67,6 +71,24 @@ def test_xgb_classifier(self):
             "< StrictVersion('1.3.0')",
         )
 
+    @unittest.skipIf(sys.version_info[0] == 2,
+                     reason="xgboost converter not tested on python 2")
+    def test_xgb_classifier_uint8(self):
+        xgb, x_test = _fit_classification_model(
+            XGBClassifier(), 2, dtype=np.uint8)
+        conv_model = convert_xgboost(
+            xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
+        self.assertTrue(conv_model is not None)
+        dump_data_and_model(
+            x_test,
+            xgb,
+            conv_model,
+            basename="SklearnXGBClassifier",
+            allow_failure="StrictVersion("
+            "onnx.__version__)"
+            "< StrictVersion('1.3.0')",
+        )
+
     @unittest.skipIf(sys.version_info[0] == 2,
                      reason="xgboost converter not tested on python 2")
     def test_xgb_classifier_multi(self):
@@ -260,6 +282,30 @@ def test_xgboost_10(self):
             allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
             basename="XGBBoosterRegBug")
 
+    def test_xgboost_example_mnist(self):
+        """
+        Train a simple xgboost model and store associated artefacts.
+        """
+        X, y = load_digits(return_X_y=True)
+        X_train, X_test, y_train, y_test = train_test_split(X, y)
+        X_train = X_train.reshape((X_train.shape[0], -1))
+        X_test = X_test.reshape((X_test.shape[0], -1))
+
+        scaler = StandardScaler()
+        X_train = scaler.fit_transform(X_train)
+        X_test = scaler.transform(X_test)
+        clf = XGBClassifier(objective="multi:softprob", n_jobs=-1)
+        clf.fit(X_train, y_train)
+
+        sh = [None, X_train.shape[1]]
+        onnx_model = convert_xgboost(
+            clf, initial_types=[('input', FloatTensorType(sh))])
+        
+        dump_data_and_model(
+            X_test.astype(np.float32), clf, onnx_model,
+            allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
+            basename="XGBoostExample")
+
 
 if __name__ == "__main__":
     unittest.main()