onnx
diff --git a/‎README.md‎
Lines changed: 16 additions & 1 deletion b/‎README.md‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎onnxmltools/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎onnxmltools/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎onnxmltools/convert/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎onnxmltools/convert/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎onnxmltools/convert/common/_container.py‎
Lines changed: 2 additions & 0 deletions b/‎onnxmltools/convert/common/_container.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎onnxmltools/convert/h2o/__init__.py‎
Lines changed: 7 additions & 0 deletions b/‎onnxmltools/convert/h2o/__init__.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎onnxmltools/convert/h2o/_parse.py‎
Lines changed: 53 additions & 0 deletions b/‎onnxmltools/convert/h2o/_parse.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎onnxmltools/convert/h2o/convert.py‎
Lines changed: 71 additions & 0 deletions b/‎onnxmltools/convert/h2o/convert.py‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎onnxmltools/convert/h2o/operator_converters/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎onnxmltools/convert/h2o/operator_converters/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎onnxmltools/convert/h2o/operator_converters/h2o.py‎
Lines changed: 166 additions & 0 deletions b/‎onnxmltools/convert/h2o/operator_converters/h2o.py‎
Lines changed: 166 additions & 0 deletions
diff --git a/‎onnxmltools/convert/h2o/shape_calculators/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎onnxmltools/convert/h2o/shape_calculators/__init__.py‎
Lines changed: 8 additions & 0 deletions
@@ -15,6 +15,7 @@ ONNXMLTools enables you to convert models from different machine learning toolki
 * LightGBM
 * libsvm
 * XGBoost
+* H2O
 
 ## Install
 You can install latest release of ONNXMLTools from [PyPi](https://pypi.org/project/onnxmltools/):
@@ -28,14 +29,15 @@ pip install git+https://github.com/onnx/onnxmltools
 If you choose to install `onnxmltools` from its source code, you must set the environment variable `ONNX_ML=1` before installing the `onnx` package. 
 
 ## Dependencies
-This package relies on ONNX, NumPy, and ProtoBuf. If you are converting a model from scikit-learn, Core ML, Keras, LightGBM, SparkML, XGBoost, or LibSVM, you will need an environment with the respective package installed from the list below:
+This package relies on ONNX, NumPy, and ProtoBuf. If you are converting a model from scikit-learn, Core ML, Keras, LightGBM, SparkML, XGBoost, H2O or LibSVM, you will need an environment with the respective package installed from the list below:
 1. scikit-learn
 2. CoreMLTools
 3. Keras (version 2.0.8 or higher) with the corresponding Tensorflow version
 4. LightGBM (scikit-learn interface)
 5. SparkML
 6. XGBoost (scikit-learn interface)
 7. libsvm
+8. H2O
 
 ONNXMLTools has been tested with Python **2.7**, **3.5**, **3.6**, and **3.7**.  
   `Note: some wrapped converters may not support python 2.x anymore.`
@@ -98,6 +100,19 @@ Please refer to the following documents:
  * [Conversion Framework](onnxmltools/convert/README.md)
  * [Spark ML to ONNX Model Conversion](onnxmltools/convert/sparkml/README.md)
 
+## H2O to ONNX Conversion
+Below is a code snippet to convert a H2O MOJO model into an ONNX model. The only pre-requisity is to have a MOJO model saved on the local file-system.
+
+```python
+import onnxmltools
+
+# Convert the Core ML model into ONNX
+onnx_model = onnxmltools.convert_h2o('/path/to/h2o/gbm_mojo.zip')
+
+# Save as protobuf
+onnxmltools.utils.save_model(onnx_model, 'h2o_gbm.onnx')
+```
+
 # Testing model converters
 
 *onnxmltools* converts models into the ONNX format which
 
@@ -24,6 +24,7 @@
 from .convert import convert_sparkml
 from .convert import convert_tensorflow
 from .convert import convert_xgboost
+from .convert import convert_h2o
 
 from .utils import load_model
 from .utils import save_model
@@ -12,3 +12,4 @@
 from .main import convert_sparkml
 from .main import convert_tensorflow
 from .main import convert_xgboost
+from .main import convert_h2o
@@ -17,6 +17,8 @@ class LightGbmModelContainer(CommonSklearnModelContainer):
 class XGBoostModelContainer(CommonSklearnModelContainer):
     pass
 
+class H2OModelContainer(CommonSklearnModelContainer):
+    pass
 
 class SparkmlModelContainer(RawModelContainer):
 
 
@@ -0,0 +1,7 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+from .convert import convert
@@ -0,0 +1,53 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+from ..common._container import H2OModelContainer
+from ..common._topology import Topology, FloatTensorType
+
+def _parse_h2o(scope, model, inputs):
+    '''
+    :param scope: Scope object
+    :param model: A h2o model data object
+    :param inputs: A list of variables
+    :return: A list of output variables which will be passed to next stage
+    '''
+    this_operator = scope.declare_local_operator("H2OTreeMojo", model)
+    this_operator.inputs = inputs
+
+    if model["params"]["classifier"]:
+        label_variable = scope.declare_local_variable('label', FloatTensorType())
+        probability_map_variable = scope.declare_local_variable('probabilities', FloatTensorType())
+        this_operator.outputs.append(label_variable)
+        this_operator.outputs.append(probability_map_variable)
+    else:
+        variable = scope.declare_local_variable('variable', FloatTensorType())
+        this_operator.outputs.append(variable)
+    return this_operator.outputs
+
+
+def parse_h2o(model, initial_types=None, target_opset=None,
+              custom_conversion_functions=None, custom_shape_calculators=None):
+
+    raw_model_container = H2OModelContainer(model)
+    topology = Topology(raw_model_container, default_batch_size='None',
+                        initial_types=initial_types, target_opset=target_opset,
+                        custom_conversion_functions=custom_conversion_functions,
+                        custom_shape_calculators=custom_shape_calculators)
+    scope = topology.declare_scope('__root__')
+
+    inputs = []
+    for var_name, initial_type in initial_types:
+        inputs.append(scope.declare_local_variable(var_name, initial_type))
+
+    for variable in inputs:
+        raw_model_container.add_input(variable)
+
+    outputs = _parse_h2o(scope, model, inputs)
+
+    for variable in outputs:
+        raw_model_container.add_output(variable)
+
+    return topology
@@ -0,0 +1,71 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+from uuid import uuid4
+import json
+import tempfile
+import h2o
+
+from ...proto import onnx, get_opset_number_from_onnx
+from ..common._topology import convert_topology
+from ..common.data_types import FloatTensorType
+from ._parse import parse_h2o
+
+# Invoke the registration of all our converters and shape calculators
+from . import operator_converters, shape_calculators
+
+
+def convert(model, name=None, initial_types=None, doc_string='', target_opset=None,
+            targeted_onnx=onnx.__version__, custom_conversion_functions=None,
+            custom_shape_calculators=None):
+    '''
+    This function produces an equivalent ONNX model of the given H2O MOJO model.
+    Supported model types:
+    - GBM, with limitations:
+        - poisson, gamma, tweedie distributions not supported
+        - multinomial distribution supported with 3 or more classes (use binomial otherwise)
+    Ohter limitations:
+    - modes with categorical splits not supported
+
+
+    :param model: H2O MOJO model loaded into memory (see below for example)
+    :param name: The name of the graph (type: GraphProto) in the produced ONNX model (type: ModelProto)
+    :param initial_types: a python list. Each element is a tuple of a variable name and a type defined in data_types.py
+    :param doc_string: A string attached onto the produced ONNX model
+    :param target_opset: number, for example, 7 for ONNX 1.2, and 8 for ONNX 1.3.
+    :param targeted_onnx: A string (for example, '1.1.2' and '1.2') used to specify the targeted ONNX version of the
+        produced model. If ONNXMLTools cannot find a compatible ONNX python package, an error may be thrown.
+    :param custom_conversion_functions: a dictionary for specifying the user customized conversion function
+    :param custom_shape_calculators: a dictionary for specifying the user customized shape calculator
+    :return: An ONNX model (type: ModelProto) which is equivalent to the input xgboost model
+
+    :examples:
+
+    >>> from onnxmltools.convert import convert_h2o
+    >>> file = open("/path/to/h2o_mojo.zip", "rb")
+    >>> mojo_content = file.read()
+    >>> file.close()
+    >>> h2o_onnx_model = convert_h2o(mojo_content)
+    '''
+    if name is None:
+        name = str(uuid4().hex)
+    if initial_types is None:
+        initial_types = [('input', FloatTensorType(shape=['None', 'None']))]
+
+    _, model_path = tempfile.mkstemp()
+    f = open(model_path, "wb")
+    f.write(model)
+    f.close()
+    mojo_str = h2o.print_mojo(model_path, format="json")
+    mojo_model = json.loads(mojo_str)
+    if mojo_model["params"]["algo"] != "gbm":
+        raise ValueError("Model type not supported (algo=%s). Only GBM Mojo supported for now." % mojo_model["params"]["algo"])
+
+    target_opset = target_opset if target_opset else get_opset_number_from_onnx()
+    topology = parse_h2o(mojo_model, initial_types, target_opset, custom_conversion_functions, custom_shape_calculators)
+    topology.compile()
+    onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
+    return onnx_model
@@ -0,0 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+# To register converter for scikit-learn operators, import associated modules here.
+from . import h2o
@@ -0,0 +1,166 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+from ...common._registration import register_converter
+
+_LINK_FUNCTION_TO_POST_TRANSFORM = {
+    'identity': 'NONE',
+    'logit': 'LOGISTIC',
+    'ologit': 'LOGISTIC'
+}
+
+
+def _get_post_transform(params):
+    link_function = params["link_function"]
+    family = params["family"]
+    if family == "multinomial":
+        return 'SOFTMAX'
+    elif link_function not in _LINK_FUNCTION_TO_POST_TRANSFORM.keys():
+        raise ValueError("Link function %s not supported." % link_function)
+    else:
+        return _LINK_FUNCTION_TO_POST_TRANSFORM[link_function]
+
+
+def _get_default_tree_attribute_pairs(is_classifier, params):
+    attrs = {
+        'post_transform': _get_post_transform(params)
+    }
+    nclasses = params["nclasses"]
+    if is_classifier:
+        predicted_classes = nclasses if nclasses > 2 else 1
+        attrs['base_values'] = [params["base_score"] for _ in range(0, predicted_classes)]
+    else:
+        attrs['n_targets'] = 1
+        attrs['base_values'] = [params["base_score"]]
+    for k in {'nodes_treeids',  'nodes_nodeids',
+              'nodes_featureids', 'nodes_modes', 'nodes_values',
+              'nodes_truenodeids', 'nodes_falsenodeids', 'nodes_missing_value_tracks_true'}:
+        attrs[k] = []
+    node_attr_prefix = _node_attr_prefix(is_classifier)
+    for k in {'_treeids', '_nodeids', '_ids', '_weights'}:
+        attrs[node_attr_prefix + k] = []
+    return attrs
+
+
+def _add_node(
+        attr_pairs, is_classifier, tree_id, node_id,
+        feature_id, mode, value, true_child_id, false_child_id, weights,
+        missing
+):
+    attr_pairs['nodes_treeids'].append(tree_id)
+    attr_pairs['nodes_nodeids'].append(node_id)
+    attr_pairs['nodes_featureids'].append(feature_id)
+    attr_pairs['nodes_modes'].append(mode)
+    attr_pairs['nodes_values'].append(float(value))
+    attr_pairs['nodes_truenodeids'].append(true_child_id)
+    attr_pairs['nodes_falsenodeids'].append(false_child_id)
+    attr_pairs['nodes_missing_value_tracks_true'].append(missing)
+    if mode == 'LEAF':
+        node_attr_prefix = _node_attr_prefix(is_classifier)
+        for i, w in enumerate(weights):
+            attr_pairs[node_attr_prefix + '_treeids'].append(tree_id)
+            attr_pairs[node_attr_prefix + '_nodeids'].append(node_id)
+            attr_pairs[node_attr_prefix + '_ids'].append(i)
+            attr_pairs[node_attr_prefix + '_weights'].append(float(w))
+
+
+def _node_attr_prefix(is_classifier):
+    return "class" if is_classifier else "target"
+
+
+def _fill_node_attributes(tree_id, node, attr_pairs, is_classifier):
+    if 'leftChild' in node:
+        if node["isCategorical"]:
+            raise ValueError("categorical splits not supported, use one_hot_explicit")
+        else:
+            operator = 'BRANCH_GTE'
+            value = node['splitValue']
+        _add_node(
+            attr_pairs=attr_pairs,
+            is_classifier=is_classifier,
+            tree_id=tree_id,
+            mode=operator,
+            value=value,
+            node_id=node['id'],
+            feature_id=node['colId'],
+            true_child_id=node['rightChild']['id'],
+            false_child_id=node['leftChild']['id'],
+            weights=None,
+            missing=(0 if node["leftward"] else 1),
+        )
+        _fill_node_attributes(tree_id, node["leftChild"], attr_pairs, is_classifier)
+        _fill_node_attributes(tree_id, node["rightChild"], attr_pairs, is_classifier)
+    else:  # leaf
+        weights = [node['predValue']]
+        _add_node(
+            attr_pairs=attr_pairs,
+            is_classifier=is_classifier,
+            tree_id=tree_id,
+            value=0.,
+            node_id=node['id'],
+            feature_id=0, mode='LEAF',
+            true_child_id=0, false_child_id=0,
+            weights=weights,
+            missing=False
+        )
+
+
+def assign_node_ids(node, next_id):
+    if node is None:
+        return next_id
+    node["id"] = next_id
+    next_id += 1
+    next_id = assign_node_ids(node.get("leftChild", None), next_id)
+    return assign_node_ids(node.get("rightChild", None), next_id)
+
+
+def fill_tree_attributes(model, attr_pairs, node_attr_prefix):
+    for tree in model["trees"]:
+        assign_node_ids(tree["root"], 0)
+        _fill_node_attributes(tree["index"], tree["root"], attr_pairs, node_attr_prefix)
+
+
+def convert_regression(scope, operator, container, params):
+    model = operator.raw_operator
+
+    attr_pairs = _get_default_tree_attribute_pairs(False, params)
+    fill_tree_attributes(model, attr_pairs, False)
+
+    # add nodes
+    container.add_node('TreeEnsembleRegressor', operator.input_full_names,
+                       operator.output_full_names, op_domain='ai.onnx.ml',
+                       name=scope.get_unique_operator_name('TreeEnsembleRegressor'), **attr_pairs)
+
+
+def convert_classifier(scope, operator, container, params):
+    if params["family"] == "multinomial" and params["nclasses"] == 2:
+        raise ValueError("Multinomial distribution with two classes not supported, use binomial distribution.")
+    model = operator.raw_operator
+
+    attr_pairs = _get_default_tree_attribute_pairs(True, params)
+    fill_tree_attributes(model, attr_pairs, True)
+
+    n_trees_in_group = params["n_trees_in_group"]
+    attr_pairs['class_ids'] = [v % n_trees_in_group for v in attr_pairs['class_treeids']]
+    attr_pairs['classlabels_strings'] = params["class_labels"]
+
+    container.add_node('TreeEnsembleClassifier', operator.input_full_names,
+                       operator.output_full_names,
+                       op_domain='ai.onnx.ml',
+                       name=scope.get_unique_operator_name('TreeEnsembleClassifier'),
+                       **attr_pairs)
+
+
+def convert_h2o(scope, operator, container):
+    params = operator.raw_operator["params"]
+    is_classifier = params["classifier"]
+    if is_classifier:
+        convert_classifier(scope, operator, container, params)
+    else:
+        convert_regression(scope, operator, container, params)
+
+
+register_converter('H2OTreeMojo', convert_h2o)
@@ -0,0 +1,8 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License. See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+# To register shape calculators for lightgbm operators, import associated modules here.
+from . import h2otreemojo