Skip to content

Commit 26e2429

Browse files
authored
Fix DecisionTree, RandomForest, StopWords, Tokenizer failing in #468 (sparkml converters) (#471)
* enable spark on CI * update init.py * update CI
1 parent 582540e commit 26e2429

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+724
-677
lines changed

.azure-pipelines/linux-CI-nightly.yml

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,16 @@ jobs:
1313
vmImage: 'Ubuntu-16.04'
1414
strategy:
1515
matrix:
16-
Python36-nightly:
17-
python.version: '3.6'
18-
ONNX_PATH: onnx==1.7.0
16+
Python39-nightly:
17+
python.version: '3.9'
18+
ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
19+
COREML_PATH: git+https://github.com/apple/[email protected]
20+
Python38-nightly:
21+
python.version: '3.8'
1922
ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
2023
COREML_PATH: git+https://github.com/apple/[email protected]
2124
Python37-nightly:
2225
python.version: '3.7'
23-
ONNX_PATH: onnx==1.8.0
2426
ORT_PATH: -i https://test.pypi.org/simple/ ort-nightly
2527
COREML_PATH: git+https://github.com/apple/[email protected]
2628
maxParallel: 3
@@ -43,20 +45,17 @@ jobs:
4345
conda install -c conda-forge cmake
4446
python -m pip install $(COREML_PATH)
4547
python -m pip install $(ONNX_PATH)
46-
python -m pip install tensorflow-cpu==1.15.0
47-
python -m pip install tf2onnx==1.5.6
48-
python -m pip install git+https://github.com/microsoft/onnxconverter-common
49-
python -m pip install git+https://github.com/onnx/keras-onnx
48+
python -m pip install hummingbird-ml --no-deps
5049
python -m pip install -r requirements.txt
5150
python -m pip install -r requirements-dev.txt
5251
python -m pip install $(ORT_PATH)
5352
python -m pip install pytest
5453
displayName: 'Install dependencies'
5554
5655
- script: |
57-
python -c "import onnxconverter_common"
58-
python -c "import onnxruntime"
5956
pip install -e .
57+
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
58+
python -c "import onnxruntime;print(onnxruntime.__version__)"
6059
pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
6160
displayName: 'pytest - onnxmltools'
6261

.azure-pipelines/linux-conda-CI.yml

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,27 @@ jobs:
1010

1111
- job: 'Test'
1212
pool:
13-
vmImage: 'Ubuntu-16.04'
13+
vmImage: 'ubuntu-latest'
1414
strategy:
1515
matrix:
16-
Python36-141-RT050:
17-
python.version: '3.6'
18-
ONNX_PATH: onnx==1.4.1
19-
ONNXRT_PATH: onnxruntime==0.5.0
16+
Python39-190-RT180-xgb11:
17+
python.version: '3.9'
18+
ONNX_PATH: onnx==1.9.0
19+
ONNXRT_PATH: onnxruntime==1.8.0
2020
COREML_PATH: git+https://github.com/apple/[email protected]
21-
xgboost.version: ''
21+
xgboost.version: '>=1.2'
22+
Python38-181-RT170-xgb11:
23+
python.version: '3.8'
24+
ONNX_PATH: onnx==1.8.1
25+
ONNXRT_PATH: onnxruntime==1.7.0
26+
COREML_PATH: git+https://github.com/apple/[email protected]
27+
xgboost.version: '>=1.2'
28+
Python37-180-RT160-xgb11:
29+
python.version: '3.7'
30+
ONNX_PATH: onnx==1.8.0
31+
ONNXRT_PATH: onnxruntime==1.6.0
32+
COREML_PATH: git+https://github.com/apple/[email protected]
33+
xgboost.version: '>=1.2'
2234
Python37-150-RT100:
2335
python.version: '3.7'
2436
ONNX_PATH: onnx==1.5.0
@@ -49,18 +61,6 @@ jobs:
4961
ONNXRT_PATH: onnxruntime==1.6.0
5062
COREML_PATH: git+https://github.com/apple/[email protected]
5163
xgboost.version: '>=1.0'
52-
Python37-180-RT160-xgb11:
53-
python.version: '3.7'
54-
ONNX_PATH: onnx==1.8.0
55-
ONNXRT_PATH: onnxruntime==1.6.0
56-
COREML_PATH: git+https://github.com/apple/[email protected]
57-
xgboost.version: '>=1.2'
58-
Python38-181-RT170-xgb11:
59-
python.version: '3.7'
60-
ONNX_PATH: onnx==1.8.1
61-
ONNXRT_PATH: onnxruntime==1.7.0
62-
COREML_PATH: git+https://github.com/apple/[email protected]
63-
xgboost.version: '>=1.2'
6464
maxParallel: 3
6565

6666
steps:
@@ -81,11 +81,9 @@ jobs:
8181
conda install -c conda-forge cmake
8282
pip install $(COREML_PATH)
8383
pip install $(ONNX_PATH)
84-
python -m pip install tensorflow-cpu==1.15.0
85-
python -m pip install tf2onnx==1.5.6
86-
python -m pip install git+https://github.com/microsoft/onnxconverter-common
87-
python -m pip install git+https://github.com/onnx/keras-onnx
84+
pip install hummingbird-ml --no-deps
8885
pip install -r requirements.txt
86+
pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
8987
pip install -r requirements-dev.txt
9088
pip install xgboost$(xgboost.version)
9189
pip install $(ONNXRT_PATH)
@@ -101,9 +99,10 @@ jobs:
10199
displayName: 'local installation'
102100
103101
- script: |
104-
python -c "import onnxconverter_common"
105-
python -c "import onnxruntime"
106-
pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
102+
export PYTHONPATH=.
103+
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
104+
python -c "import onnxruntime;print(onnxruntime.__version__)"
105+
pytest tests --doctest-modules --junitxml=junit/test-results.xml
107106
displayName: 'pytest - onnxmltools'
108107
109108
- task: PublishTestResults@2

.azure-pipelines/win32-CI-nightly.yml

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,19 @@ jobs:
1010

1111
- job: 'Test'
1212
pool:
13-
vmImage: 'vs2017-win2016'
13+
vmImage: 'windows-latest'
1414
strategy:
1515
matrix:
16-
Python36-nightly:
17-
python.version: '3.6'
18-
ONNX_PATH: onnx==1.7.0
16+
Python39-nightly:
17+
python.version: '3.9'
18+
ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
19+
COREML_PATH: git+https://github.com/apple/[email protected]
20+
Python38-nightly:
21+
python.version: '3.8'
1922
ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
2023
COREML_PATH: git+https://github.com/apple/[email protected]
2124
Python37-nightly:
2225
python.version: '3.7'
23-
ONNX_PATH: onnx==1.8.0
2426
ONNXRT_PATH: -i https://test.pypi.org/simple/ ort-nightly
2527
COREML_PATH: git+https://github.com/apple/[email protected]
2628
maxParallel: 3
@@ -40,22 +42,18 @@ jobs:
4042
- script: |
4143
call activate py$(python.version)
4244
python -m pip install --upgrade pip numpy
43-
echo Test numpy installation... && python -c "import numpy"
4445
pip install %COREML_PATH% %ONNX_PATH%
45-
python -m pip install tensorflow-cpu==1.15.0
46-
python -m pip install tf2onnx==1.5.6
47-
python -m pip install git+https://github.com/microsoft/onnxconverter-common
48-
python -m pip install git+https://github.com/onnx/keras-onnx
49-
echo Test onnxconverter-common installation... && python -c "import onnxconverter_common"
46+
pip install humming-bird-ml --no-deps
5047
pip install -r requirements.txt
5148
pip install -r requirements-dev.txt
5249
pip install %ONNXRT_PATH%
53-
echo Test onnxruntime installation... && python -c "import onnxruntime"
5450
displayName: 'Install dependencies'
5551
5652
- script: |
5753
call activate py$(python.version)
5854
pip install -e .
55+
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
56+
python -c "import onnxruntime;print(onnxruntime.__version__)"
5957
python -m pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
6058
displayName: 'pytest - onnxmltools'
6159

.azure-pipelines/win32-conda-CI.yml

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,27 @@ jobs:
1010

1111
- job: 'Test'
1212
pool:
13-
vmImage: 'vs2017-win2016'
13+
vmImage: 'windows-latest'
1414
strategy:
1515
matrix:
16-
Python36-141-RT030:
17-
python.version: '3.6'
18-
ONNX_PATH: onnx==1.4.1
19-
ONNXRT_PATH: onnxruntime==0.3.0
16+
Python39-190-RT180:
17+
python.version: '3.9'
18+
ONNX_PATH: onnx==1.9.0
19+
ONNXRT_PATH: onnxruntime==1.8.0
2020
COREML_PATH: git+https://github.com/apple/[email protected]
2121
sklearn.version: ''
2222

23-
Python37-150-RT040:
23+
Python38-181-RT170:
24+
python.version: '3.8'
25+
ONNX_PATH: onnx==1.8.1
26+
ONNXRT_PATH: onnxruntime==1.7.0
27+
COREML_PATH: git+https://github.com/apple/[email protected]
28+
sklearn.version: ''
29+
30+
Python37-180-RT160:
2431
python.version: '3.7'
25-
ONNX_PATH: onnx==1.5.0
26-
ONNXRT_PATH: onnxruntime==0.4.0
32+
ONNX_PATH: onnx==1.8.0
33+
ONNXRT_PATH: onnxruntime==1.6.0
2734
COREML_PATH: git+https://github.com/apple/[email protected]
2835
sklearn.version: ''
2936

@@ -41,20 +48,6 @@ jobs:
4148
COREML_PATH: git+https://github.com/apple/[email protected]
4249
sklearn.version: ''
4350

44-
Python37-180-RT160:
45-
python.version: '3.7'
46-
ONNX_PATH: onnx==1.8.0
47-
ONNXRT_PATH: onnxruntime==1.6.0
48-
COREML_PATH: git+https://github.com/apple/[email protected]
49-
sklearn.version: ''
50-
51-
Python38-181-RT170:
52-
python.version: '3.8'
53-
ONNX_PATH: onnx==1.8.1
54-
ONNXRT_PATH: onnxruntime==1.7.0
55-
COREML_PATH: git+https://github.com/apple/[email protected]
56-
sklearn.version: ''
57-
5851
maxParallel: 3
5952

6053
steps:
@@ -74,17 +67,12 @@ jobs:
7467
python -m pip install --upgrade pip numpy
7568
echo Test numpy installation... && python -c "import numpy"
7669
python -m pip install %COREML_PATH% %ONNX_PATH%
77-
python -m pip install tensorflow-cpu==1.15.0
78-
python -m pip install tf2onnx==1.5.6
79-
python -m pip install git+https://github.com/microsoft/onnxconverter-common
80-
python -m pip install git+https://github.com/onnx/keras-onnx
81-
echo Test onnxconverter-common installation... && python -c "import onnxconverter_common"
70+
python -m pip install humming-bird-ml --no-deps
8271
python -m pip install -r requirements.txt
72+
python -m pip install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
8373
python -m pip install -r requirements-dev.txt
8474
python -m pip install %ONNXRT_PATH%
8575
python -m pip install scikit-learn$(sklearn.version)
86-
echo Test onnxruntime installation... && python -c "import onnxruntime"
87-
echo "debug environment" && path
8876
python -m pip show pytest
8977
displayName: 'Install dependencies'
9078
@@ -96,7 +84,10 @@ jobs:
9684
- script: |
9785
call activate py$(python.version)
9886
python -m pip install -e .
99-
python -m pytest tests --ignore=tests/sparkml --doctest-modules --junitxml=junit/test-results.xml
87+
export PYTHONPATH=.
88+
python -c "import onnxconverter_common;print(onnxconverter_common.__version__)"
89+
python -c "import onnxruntime;print(onnxruntime.__version__)"
90+
python -m pytest tests --doctest-modules --junitxml=junit/test-results.xml
10091
displayName: 'pytest - onnxmltools'
10192
10293
- task: PublishTestResults@2

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
# Introduction
1111
ONNXMLTools enables you to convert models from different machine learning toolkits into [ONNX](https://onnx.ai). Currently the following toolkits are supported:
12-
* Keras (a wrapper of [keras2onnx converter](https://github.com/onnx/keras-onnx/))
1312
* Tensorflow (a wrapper of [tf2onnx converter](https://github.com/onnx/tensorflow-onnx/))
1413
* scikit-learn (a wrapper of [skl2onnx converter](https://github.com/onnx/sklearn-onnx/))
1514
* Apple Core ML

docs/index.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ Currently the following toolkits are supported:
3232
* `XGBoost <https://xgboost.readthedocs.io/en/latest/>`_
3333

3434
*onnxmltools* leverages existing converting library,
35-
`keras-onnx <https://github.com/onnx/keras-onnx>`_,
3635
`sklearn-onnx <https://github.com/onnx/sklearn-onnx>`_,
3736
`tensorflow-onnx <https://github.com/onnx/tensorflow-onnx>`_
3837
and implements converters for the other libraries.
Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1-
# SPDX-License-Identifier: Apache-2.0
2-
3-
from onnxconverter_common.utils import * # noqa
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
try:
4+
from onnxconverter_common.utils import hummingbird_installed # noqa
5+
except ImportError:
6+
def hummingbird_installed():
7+
"""
8+
Checks that *Hummingbird* is available.
9+
"""
10+
try:
11+
import hummingbird.ml # noqa: F401
12+
13+
return True
14+
except ImportError:
15+
return False
16+
17+
from onnxconverter_common.utils import * # noqa

onnxmltools/convert/lightgbm/convert.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
from uuid import uuid4
4+
import onnx
45
import lightgbm
5-
import warnings
66
from onnxconverter_common.onnx_ex import get_maximum_opset_supported
7-
import onnx
87
from ..common._topology import convert_topology
98
from ..common.utils import hummingbird_installed
109
from ._parse import parse_lightgbm, WrappedBooster
@@ -57,19 +56,12 @@ def convert(model, name=None, initial_types=None, doc_string='', target_opset=No
5756
onnx_ml_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
5857

5958
if without_onnx_ml:
60-
from hummingbird.ml import convert
61-
from hummingbird.ml import constants
62-
63-
if target_opset == 13:
64-
warnings.warn('Pytorch-onnx does not support opset 13 yet, use opset 12 instead.')
65-
target_opset = 12
66-
59+
from hummingbird.ml import convert, constants
6760
extra_config = {}
68-
extra_config[constants.ONNX_INITIAL_TYPES] = initial_types
61+
# extra_config[constants.ONNX_INITIAL_TYPES] = initial_types
6962
extra_config[constants.ONNX_OUTPUT_MODEL_NAME] = name
7063
extra_config[constants.ONNX_TARGET_OPSET] = target_opset
7164
onnx_model = convert(onnx_ml_model, "onnx", extra_config=extra_config).model
72-
7365
return onnx_model
7466

7567
return onnx_ml_model

onnxmltools/convert/sparkml/operator_converters/min_hash_lsh.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
22

33
from onnx import onnx_pb as onnx_proto
4-
from ...common._apply_operation import apply_add, apply_mul, apply_sum, apply_div, apply_sub, \
5-
apply_concat, apply_cast
4+
from ...common._apply_operation import (
5+
apply_add, apply_mul, apply_sum, apply_div, apply_sub,
6+
apply_concat, apply_cast)
67
from ...common._registration import register_converter, register_shape_calculator
7-
from ...common.data_types import FloatTensorType
8+
from ...common.data_types import FloatTensorType, DoubleTensorType
89
from ...common.utils import check_input_and_output_numbers, check_input_and_output_types
9-
from ..utils import SparkMlConversionError
1010
from .tree_ensemble_common import save_read_sparkml_model_data
1111

1212
MinHashLSH_HASH_PRIME = 2038074743
@@ -23,10 +23,7 @@ def get_rand_coefficients(operator):
2323

2424

2525
def convert_min_hash_lsh(scope, operator, container):
26-
spark = operator.raw_params['SparkSession']
2726
int_type = onnx_proto.TensorProto.INT64
28-
if spark.version < '2.4.0':
29-
int_type = onnx_proto.TensorProto.INT32
3027
rand_coefficients = get_rand_coefficients(operator)
3128
coeffs = []
3229
for i in range(0, len(rand_coefficients), 2):
@@ -75,11 +72,10 @@ def convert_min_hash_lsh(scope, operator, container):
7572

7673
def calculate_min_hash_lsh_output_shapes(operator):
7774
check_input_and_output_numbers(operator, output_count_range=1)
78-
check_input_and_output_types(operator, good_input_types=[FloatTensorType])
75+
check_input_and_output_types(
76+
operator, good_input_types=[FloatTensorType, DoubleTensorType])
7977

8078
N = operator.inputs[0].type.shape[0]
81-
if N != 1:
82-
raise SparkMlConversionError('MinHashLSHModel converter cannot handle batch size of more than 1')
8379
C = len(get_rand_coefficients(operator)) // 2
8480
operator.outputs[0].type = FloatTensorType([N, C])
8581

0 commit comments

Comments
 (0)