Fix some bugs found by running onnx-1.1.2 checker (#25)

wschin · web-flow · commit f801d3ed5c18 · 2018-05-02T17:14:04.000-07:00
* Fix some bugs found by running onnx checker

1. One-hot encoder has not "inputlist" attribute
2. Create RNN/LSTM/GRU nodes in a topological order
3. Change attribute name from "values" to "value" for Pad

* Remove lines not needed

* Append initializers' value info's to model input list

* Fix InnerProduct's initializer names

* Fix LSTM and discard one change

* Fix some minor bugs

* Fix a C-style naming problem for convolution conversion

* Address comments and fix one more bug

1. Fix comments about doc
2. Update the interfaces of converting functions so that arguments
   can be passed in

* Fix shape calculator for merge layer (support [N, C] inputs)
diff --git a/onnxmltools/convert/common/_topology.py b/onnxmltools/convert/common/_topology.py
@@ -631,8 +631,24 @@ def convert_topology(topology, model_name, doc_string):
                 container.nodes[i], container.nodes[another_node_id] = \
                     container.nodes[another_node_id], container.nodes[i]
 
+    # When calling ModelComponentContainer's add_initializer(...), nothing is added into the input list. However, in
+    # ONNX initializers should also be model's (GraphProto) inputs. Thus, we create ValueInfoProto objects from
+    # initializers (type: TensorProto) directly and then add them into model's input list.
+    extra_inputs = []  # ValueInfoProto list of the initializers
+    for tensor in container.initializers:
+        # Sometimes (especially when creating optional input values such as RNN's initial hidden state), an initializer
+        # is also one of the original model's input, so it has been added into the container's input list. If this is
+        # the case, we need to skip one iteration to avoid duplicated inputs.
+        if tensor.name in [value_info.name for value_info in container.inputs]:
+            continue
+
+        # Initializers are always tensors so we can just call make_tensor_value_info(...)
+        value_info = helper.make_tensor_value_info(tensor.name, tensor.data_type, tensor.dims)
+        extra_inputs.append(value_info)
+
     # Create a graph from its main components
-    graph = helper.make_graph(container.nodes, model_name, container.inputs, container.outputs, container.initializers)
+    graph = helper.make_graph(container.nodes, model_name, container.inputs + extra_inputs,
+                              container.outputs, container.initializers)
 
     # Add extra information related to the graph
     graph.value_info.extend(container.value_info)
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/BatchNorm.py b/onnxmltools/convert/coreml/operator_converters/neural_network/BatchNorm.py
@@ -31,7 +31,6 @@ def convert_batch_normalization(scope, operator, container):
     inputs.append(bias_tensor_name)
 
     attrs['epsilon'] = params.epsilon
-    attrs['spatial'] = 1  # True
 
     if op_type == 'BatchNormalization':
         mean_tensor_name = scope.get_unique_variable_name(op_type + '_mean')
@@ -43,6 +42,7 @@ def convert_batch_normalization(scope, operator, container):
                                   params.variance.floatValue)
         inputs.append(variance_tensor_name)
         attrs['momentum'] = 0.
+        attrs['spatial'] = 1  # True
 
         if not params.instanceNormalization and params.computeMeanVar:
             # In this case, we apply batch normalization and adjust the statistics stored according the the batch
@@ -63,8 +63,6 @@ def convert_batch_normalization(scope, operator, container):
             attrs['is_test'] = 1  # True
         else:
             raise ValueError('Unsupported operation mode')
-    else:
-        attrs['is_test'] = 1  # True
 
     container.add_node(op_type, inputs, outputs, **attrs)
 
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/BidirectionalLSTM.py b/onnxmltools/convert/coreml/operator_converters/neural_network/BidirectionalLSTM.py
@@ -325,42 +325,39 @@ def convert_bidirectional_lstm(scope, operator, container):
     lstm_attrs['clip'] = lstm_params.cellClipThreshold
     lstm_attrs['input_forget'] = lstm_params.coupledInputAndForgetGate
 
-    # Create output part of CoreML LSTM
-    if lstm_params.sequenceOutput:
-        lstm_y_name = scope.get_unique_variable_name(lstm_op_name + '_Y')
-        lstm_outputs.append(lstm_y_name)
+    # Create the major LSTM operator. We assign a tensor name to each output of LSTM. However, variables can be
+    # undefined in some cases. For example, when output_sequence=False, the first output is not meaningful.
+    lstm_y_name = scope.get_unique_variable_name(lstm_op_name + '_Y')
+    lstm_y_h_name = scope.get_unique_variable_name(lstm_op_name + '_Y_h')
+    lstm_y_c_name = scope.get_unique_variable_name(lstm_op_name + '_Y_c')
+    lstm_outputs.extend([lstm_y_name, lstm_y_h_name, lstm_y_c_name])
+    container.add_node('LSTM', lstm_inputs, lstm_outputs, **lstm_attrs)
 
+    # Create post-processing operators for converting ONNX LSTM outputs to CoreML ones
+    if lstm_params.sequenceOutput:
         container.add_node('Reshape', lstm_y_name, operator.outputs[0].full_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[-1, 2 * hidden_size])
 
         if len(operator.outputs) > 1:
-            lstm_y_h_name = scope.get_unique_variable_name(lstm_op_name + '_Y_h')
-            lstm_outputs.append(lstm_y_h_name)
-
             lstm_y_h_reshape_name = scope.get_unique_variable_name(lstm_op_name + '_Y_h_reshape')
             container.add_node('Reshape', lstm_y_h_name, lstm_y_h_reshape_name,
                                name=scope.get_unique_operator_name('Reshape'), shape=[2, hidden_size])
 
             container.add_node('Split', lstm_y_h_reshape_name,
                                [operator.outputs[1].full_name, operator.outputs[3].full_name],
-                               op_version=2, name=scope.get_unique_operator_name('Split'), split=[1, 1, ], axis=0)
+                               op_version=2, name=scope.get_unique_operator_name('Split'), split=[1, 1], axis=0)
     else:
-        # Here we ingore ONNX RNN's first output because it's useless.
-        lstm_outputs.append(scope.get_unique_variable_name('isolated'))
-
-        # Handle the second output of ONNX LSTM. It will become the first and the second outputs of
-        # CoreML's LSTM.
-        lstm_y_name = scope.get_unique_variable_name(lstm_op_name + '_Y')
-        lstm_outputs.append(lstm_y_name)
+        # Here we ignore ONNX RNN's first output because it's useless. The second output of ONNX LSTM will be used to
+        # generate the first and the second outputs of CoreML LSTM.
 
         # Directly reshape ONNX LSTM's 2nd output to CoreML LSTM's 1st output.
-        container.add_node('Reshape', lstm_y_name, operator.outputs[0].full_name,
+        container.add_node('Reshape', lstm_y_h_name, operator.outputs[0].full_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[1, 2 * hidden_size])
 
         if len(operator.outputs) > 1:
             lstm_y_reshape_name = scope.get_unique_variable_name(lstm_op_name + '_Y_reshape')
 
-            container.add_node('Reshape', lstm_y_name, lstm_y_reshape_name,
+            container.add_node('Reshape', lstm_y_h_name, lstm_y_reshape_name,
                                name=scope.get_unique_operator_name('Reshape'), shape=[2, hidden_size])
 
             container.add_node('Split', lstm_y_reshape_name,
@@ -369,9 +366,6 @@ def convert_bidirectional_lstm(scope, operator, container):
 
     # Output cell state if necessary
     if len(operator.outputs) > 2:
-        lstm_y_c_name = scope.get_unique_variable_name(lstm_op_name + '_Y_c')
-        lstm_outputs.append(lstm_y_c_name)
-
         lstm_y_c_reshape_name = scope.get_unique_variable_name(lstm_op_name + '_Y_c_reshape')
         container.add_node('Reshape', lstm_y_c_name, lstm_y_c_reshape_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[2, hidden_size])
@@ -380,8 +374,5 @@ def convert_bidirectional_lstm(scope, operator, container):
                            [operator.outputs[2].full_name, operator.outputs[4].full_name],
                            op_version=2, name=scope.get_unique_operator_name('Split'), split=[1, 1], axis=0)
 
-    # Create the major LSTM operator
-    container.add_node('LSTM', lstm_inputs, lstm_outputs, **lstm_attrs)
-
 
 register_converter('biDirectionalLSTM', convert_bidirectional_lstm)
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/Convolution.py b/onnxmltools/convert/coreml/operator_converters/neural_network/Convolution.py
@@ -23,13 +23,13 @@ def convert_convolution(scope, operator, container):
     if params.isDeconvolution:
         shape_w[0] = params.kernelChannels
         shape_w[1] = int(params.outputChannels / n_groups)
-    name_w = operator.full_name + '.W'
+    name_w = scope.get_unique_variable_name(operator.full_name + '_W')
     inputs.append(name_w)
     container.add_initializer(name_w, onnx_proto.TensorProto.FLOAT, shape_w, params.weights.floatValue)
 
     if params.hasBias:
         shape_b = [len(params.bias.floatValue)]
-        name_b = operator.full_name + '.B'
+        name_b = scope.get_unique_variable_name(operator.full_name + '_B')
         inputs.append(name_b)
         container.add_initializer(name_b, onnx_proto.TensorProto.FLOAT, shape_b, params.bias.floatValue)
 
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/GRU.py b/onnxmltools/convert/coreml/operator_converters/neural_network/GRU.py
@@ -155,39 +155,32 @@ def convert_gru(scope, operator, container):
     gru_attrs['output_sequence'] = params.sequenceOutput
     gru_attrs['hidden_size'] = hidden_size
 
+    # Create the major GRU operator in ONNX.
+    gru_y_name = scope.get_unique_variable_name(gru_op_name + '_Y')
+    gru_y_h_name = scope.get_unique_variable_name(gru_op_name + '_Y_h')
+    gru_outputs.extend([gru_y_name, gru_y_h_name])
+    container.add_node('GRU', gru_inputs, gru_outputs, **gru_attrs)
+
+    # To simulate CoreML LSTM, we add post-processing operators to adjust ONNX LSTM outputs
     if params.sequenceOutput:
         # Again, the output shapes in ONNX's GRU is not consistent with that in CoreML, so we need
         # to adjust the result produced by ONNX according to CoreML format.
-        gru_y_name = scope.get_unique_variable_name(gru_op_name + '_Y')
-        gru_outputs.append(gru_y_name)
         container.add_node('Reshape', gru_y_name, operator.outputs[0].full_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[-1, hidden_size])
 
         # Handle the second output, the last hidden state of a sequence, if exists.
         if len(operator.outputs) == 2:
-            gru_y_h_name = scope.get_unique_variable_name(gru_op_name + '_Y_h')
-            gru_outputs.append(gru_y_h_name)
             container.add_node('Reshape', gru_y_h_name, operator.outputs[1].full_name,
                                name=scope.get_unique_operator_name('Reshape'), shape=[1, hidden_size])
     else:
         # Recall that when sequence output is false, the first and the second outputs of GRU
         # are identical. Thus, we can ignore ONNX GRU's first output.
-        gru_outputs.append(scope.get_unique_variable_name('isloated'))
-
-        # As the two outputs are always identical, so we just need to compute one of them and
-        # produce the other using identiy operator.
-        gru_y_name = scope.get_unique_variable_name(gru_op_name + '_Y')
-        gru_outputs.append(gru_y_name)
-
-        container.add_node('Reshape', gru_y_name, operator.outputs[0].full_name,
+        container.add_node('Reshape', gru_y_h_name, operator.outputs[0].full_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[1, hidden_size])
 
         if len(operator.outputs) == 2:
             container.add_node('Identity', operator.outputs[0].full_name, operator.outputs[1].full_name,
                                name=scope.get_unique_operator_name('Identity'))
 
-    # Finally, we create the major GRU operator in ONNX.
-    container.add_node('GRU', gru_inputs, gru_outputs, **gru_attrs)
-
 
 register_converter('gru', convert_gru)
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/InnerProduct.py b/onnxmltools/convert/coreml/operator_converters/neural_network/InnerProduct.py
@@ -15,12 +15,12 @@ def convert_inner_product(scope, operator, container):
     outputs = [variable.full_name for variable in operator.outputs]
     attrs = {'name': operator.full_name}
 
-    name_w = operator.full_name + '.W'
+    name_w = scope.get_unique_variable_name(operator.full_name + '_W')
     shape_w = [params.outputChannels, params.inputChannels]
     inputs.append(name_w)
     container.add_initializer(name_w, onnx_proto.TensorProto.FLOAT, shape_w, params.weights.floatValue)
 
-    name_b = operator.full_name + '.B'
+    name_b = scope.get_unique_variable_name(operator.full_name + '_B')
     shape_b = [params.outputChannels]
     inputs.append(name_b)
     if params.hasBias:
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/LSTM.py b/onnxmltools/convert/coreml/operator_converters/neural_network/LSTM.py
@@ -249,28 +249,27 @@ def convert_unidirectional_lstm(scope, operator, container):
     lstm_attrs['clip'] = lstm_params.cellClipThreshold
     lstm_attrs['input_forget'] = lstm_params.coupledInputAndForgetGate
 
+    # Create the main LSTM operator
+    lstm_y_name = scope.get_unique_variable_name(lstm_op_name + '_Y')
+    lstm_y_h_name = scope.get_unique_variable_name(lstm_op_name + '_Y_h')
+    lstm_c_name = scope.get_unique_variable_name(lstm_op_name + '_Y_c')
+    lstm_outputs.extend([lstm_y_name, lstm_y_h_name, lstm_c_name])
+    container.add_node('LSTM', lstm_inputs, lstm_outputs, **lstm_attrs)
+
     # Handle the first output of LSTM
     if lstm_params.sequenceOutput:
         # Handle the first output of LSTM
-        lstm_y_name = scope.get_unique_variable_name(lstm_op_name + '_Y')
-        lstm_outputs.append(lstm_y_name)
         container.add_node('Reshape', lstm_y_name, operator.outputs[0].full_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[-1, hidden_size])
 
         # Handle the second output of LSTM
         if len(operator.outputs) > 1:
-            lstm_y_h_name = scope.get_unique_variable_name(lstm_op_name + '_Y_h')
-            lstm_outputs.append(lstm_y_h_name)
             container.add_node('Reshape', lstm_y_h_name, operator.outputs[1].full_name,
                                name=scope.get_unique_operator_name('Reshape'), shape=[1, hidden_size])
     else:
-        # Here we ingore ONNX RNN's first output because it's useless.
-        lstm_outputs.append(scope.get_unique_variable_name('isolated'))
-
-        # Use the second output of ONNX LSTM to produce the first output of CoreML LSTM
-        lstm_y_name = scope.get_unique_variable_name(lstm_op_name + '_Y')
-        lstm_outputs.append(lstm_y_name)
-        container.add_node('Reshape', lstm_y_name, operator.outputs[0].full_name,
+        # Here we ingore ONNX RNN's first output because it's useless and use the second output of ONNX LSTM to produce
+        # the first output of CoreML LSTM
+        container.add_node('Reshape', lstm_y_h_name, operator.outputs[0].full_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[1, hidden_size])
 
         # Create the second LSTM output from the first output
@@ -280,13 +279,8 @@ def convert_unidirectional_lstm(scope, operator, container):
 
     # Handle the cell state output of LSTM
     if len(operator.outputs) > 2:
-        lstm_c_name = scope.get_unique_variable_name(lstm_op_name + '_Y_c')
-        lstm_outputs.append(lstm_c_name)
         container.add_node('Reshape', lstm_c_name, operator.outputs[2].full_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[1, hidden_size])
 
-    # Finally, the main LSTM operator is created
-    container.add_node('LSTM', lstm_inputs, lstm_outputs, **lstm_attrs)
-
 
 register_converter('uniDirectionalLSTM', convert_unidirectional_lstm)
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/Pad.py b/onnxmltools/convert/coreml/operator_converters/neural_network/Pad.py
@@ -36,7 +36,7 @@ def convert_padding(scope, operator, container):
     attrs['pads'] = pads
 
     if pad_type == 'constant':
-        attrs['values'] = params.constant.value
+        attrs['value'] = params.constant.value
 
     container.add_node(op_type, operator.input_full_names, operator.output_full_names, op_version=2, **attrs)
 
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/Pool.py b/onnxmltools/convert/coreml/operator_converters/neural_network/Pool.py
@@ -76,8 +76,7 @@ def create_legacy_pad(scope, input_name, output_name, H_in, W_in, k_h, k_w,
     #             N_end_index,   C_end_index,   H_end_index,   W_end_index]
     # Because only H- and W-axes are padded in CoreML, we leave padding amounts of N- and C-axes zeros.
     pads = [0, 0, pad_t, pad_l, 0, 0, pad_b, pad_r]
-    attrs = {'name': scope.get_unique_operator_name('Pad'), 'kernel_shape': [k_h, k_w],
-             'strides': [k_h, k_w], 'pads': pads, 'value': padded_value}
+    attrs = {'name': scope.get_unique_operator_name('Pad'), 'pads': pads, 'value': padded_value}
     container.add_node('Pad', input_name, output_name, op_version=2, **attrs)
 
 
diff --git a/onnxmltools/convert/coreml/operator_converters/neural_network/SimpleRNN.py b/onnxmltools/convert/coreml/operator_converters/neural_network/SimpleRNN.py
@@ -188,36 +188,26 @@ def convert_simple_rnn(scope, operator, container):
     rnn_attrs['output_sequence'] = params.sequenceOutput
     rnn_attrs['hidden_size'] = hidden_size
 
+    # We use the collected information to build ONNX's RNN
+    rnn_y_name = scope.get_unique_variable_name(rnn_op_name + '_Y')
+    rnn_h_name = scope.get_unique_variable_name(rnn_op_name + '_Y_h')
+    container.add_node('RNN', rnn_inputs, [rnn_y_name, rnn_h_name], **rnn_attrs)
+
     # Set up outputs' of RNN
-    rnn_outputs = []
     if params.sequenceOutput:
-        # Create ONNX's RNN output, which needs to be reshaped to fit CoreML standard.
-        rnn_y_name = scope.get_unique_variable_name(rnn_op_name + '_Y')
-        rnn_outputs.append(rnn_y_name)
-
         # Connect ONNX's output and CoreML's output via a reshape operator
         container.add_node('Reshape', rnn_y_name, operator.outputs[0].full_name,
                            name=scope.get_unique_operator_name('Reshape'), shape=[-1, hidden_size])
 
         # Handel the second RNN output (aka last hidden state), which is optional.
         if len(operator.outputs) == 2:
-            # Create ONNX's RNN output, which needs to be reshaped to fit CoreML standard.
-            rnn_h_name = scope.get_unique_variable_name(rnn_op_name + '_Y_h')
-            rnn_outputs.append(rnn_h_name)
-
             # Connect ONNX's output and CoreML's output via a reshape operator
             container.add_node('Reshape', rnn_h_name, operator.outputs[1].full_name,
                                name=scope.get_unique_operator_name('Reshape'),
                                shape=[1, hidden_size])
     else:
-        # Here we ignore ONNX RNN's first output by assigning it an isolated name. Isolated names
-        # are not connected with anything else.
-        rnn_outputs.append(scope.get_unique_variable_name('isolated'))
-
-        # According to CoreML, the two outputs are always identical, so we just need to compute one of
-        # them and produce the other one using an identiy operator.
-        rnn_h_name = scope.get_unique_variable_name(rnn_op_name + '_Y_h')
-        rnn_outputs.append(rnn_h_name)
+        # According to CoreML, its two outputs are always identical, so we just need to compute one of them and produce
+        # the other one using an identity operator. Note that the first ONNX RNN output is undefined in this case.
 
         # Reshape last hidden state's ONNX format to its CoreML format
         container.add_node('Reshape', rnn_h_name, operator.outputs[0].full_name,
@@ -228,8 +218,5 @@ def convert_simple_rnn(scope, operator, container):
             container.add_node('Identity', operator.outputs[0].full_name, operator.outputs[1].full_name,
                                name=scope.get_unique_operator_name('Identity'))
 
-    # Finally, we use the collected information to build ONNX's RNN
-    container.add_node('RNN', rnn_inputs, rnn_outputs, **rnn_attrs)
-
 
 register_converter('simpleRecurrent', convert_simple_rnn)
diff --git a/onnxmltools/convert/coreml/shape_calculators/neural_network/Merge.py b/onnxmltools/convert/coreml/shape_calculators/neural_network/Merge.py
@@ -21,8 +21,9 @@ def calculate_merge_output_shapes(operator):
     check_input_and_output_types(operator, good_input_types=[FloatTensorType])
 
     # [TODO] Fix reduce-like shape inference. We now assume all inputs are 4-D.
-    output_shape = [0, 0, 0, 0]
-    for i in range(4):
+    n_dims = max(len(variable.type.shape) for variable in operator.inputs)
+    output_shape = [0] * n_dims
+    for i in range(n_dims):
         input_dims = [variable.type.shape[i] for variable in operator.inputs]
         if 'None' in input_dims:
             output_shape[i] = 'None'
diff --git a/onnxmltools/convert/main.py b/onnxmltools/convert/main.py
@@ -7,17 +7,17 @@
 from .common import utils
 
 
-def convert_sklearn(model, name=None, input_features=None):
+def convert_sklearn(model, name=None, initial_types=None, doc_string=''):
     if not utils.sklearn_installed():
         raise RuntimeError('scikit-learn is not installed. Please install scikit-learn to use this feature.')
 
     from .sklearn.convert import convert
-    return convert(model, name, input_features)
+    return convert(model, name=name, initial_types=initial_types, doc_string=doc_string)
 
 
-def convert_coreml(model, name=None):
+def convert_coreml(model, name=None, initial_types=None, doc_string=''):
     if not utils.coreml_installed():
         raise RuntimeError('coremltools is not installed. Please install coremltools to use this feature.')
 
     from .coreml.convert import convert
-    return convert(model, name)
+    return convert(model, name=name, initial_types=initial_types, doc_string=doc_string)
diff --git a/onnxmltools/convert/sklearn/operator_converters/OneHotEncoder.py b/onnxmltools/convert/sklearn/operator_converters/OneHotEncoder.py