Merge pull request #913 from RandySheriffH/rashuai/FixLSTM

RandySheriffH · web-flow · commit 9ec80aa2a54e · 2020-05-08T10:17:18.000-07:00
Fix LSTM pattern matching for version between 1.15.0 and 2.x.
diff --git a/tests/common.py b/tests/common.py
@@ -332,7 +332,7 @@ def check_op_count(graph, op_type, expected_count):
 
 
 def check_lstm_count(graph, expected_count):
-    return check_op_count(graph, "LSTM", expected_count)
+    return len(group_nodes_by_type(graph)["LSTM"]) == expected_count
 
 
 def check_gru_count(graph, expected_count):
diff --git a/tf2onnx/rewriter/lstm_rewriter.py b/tf2onnx/rewriter/lstm_rewriter.py
@@ -98,15 +98,18 @@ def _get_weight_and_bias_for_lstm_cell(self, context):
         # check https://www.tensorflow.org/versions/r1.8/api_docs/cc/class/tensorflow/ops/bias-add
         # for bias_add data format
         bias_add = match.get_op("bias_add")
-        if bias_add.data_format != "NHWC":
+        if bias_add is not None and bias_add.data_format != "NHWC":
             logger.debug("BiasAdd data_format is not NHWC, SKIP")
             return None
 
         b_e = match.get_op("cell_bias")
-        b = get_weights_from_const_node(self.g, b_e)
-        if b is None or b.shape[0] != w.shape[1]:
-            logger.warning("cell_kernel and cell_bias's dimensions does not match, skip")
-            return None
+        if b_e is None:
+            b = np.array([0 for i in range(len(w[0]))]).astype(w.dtype)
+        else:
+            b = get_weights_from_const_node(self.g, b_e)
+            if b is None or b.shape[0] != w.shape[1]:
+                logger.warning("cell_kernel and cell_bias's dimensions does not match, skip")
+                return None
 
         ft_bias_node = match.get_op("ft_bias")
         ft_bias = get_weights_from_const_node(self.g, ft_bias_node)
diff --git a/tf2onnx/rewriter/rnn_utils.py b/tf2onnx/rewriter/rnn_utils.py
@@ -30,30 +30,31 @@ class REWRITER_RESULT(Enum):
 
 
 # TensorFlow LSTMCell/BasicLSTMCell computation graph matching
-xc_pattern = OpTypePattern('Split', inputs=[
-    OpTypePattern("Const"), # axis for split
-    OpTypePattern("BiasAdd", name="bias_add", inputs=[
-        OpTypePattern("MatMul", inputs=[
-            OpTypePattern("ConcatV2|Concat", name="xh"),
+
+xc_pattern = \
+    OpTypePattern('Split', inputs=[
+        OpTypePattern("Const"), # axis for split
+        OpTypePattern("BiasAdd", name="bias_add", inputs=[
+            OpTypePattern("MatMul", inputs=[
+                OpTypePattern("ConcatV2|Concat", name="xh"),
+                OpTypePattern("Enter", inputs=[
+                    OpTypePattern("*", name="cell_kernel"),
+                ]),
+            ]),
             OpTypePattern("Enter", inputs=[
-                OpTypePattern("*", name="cell_kernel"),
+                OpTypePattern("*", name="cell_bias"),
             ]),
         ]),
-        OpTypePattern("Enter", inputs=[
-            OpTypePattern("*", name="cell_bias"),
-        ]),
-    ]),
-])
-
+    ])
 
 lstmcell_pattern = \
     OpTypePattern('Mul', name='ht', inputs=[
         OpTypePattern("Sigmoid", name="ot", inputs=[xc_pattern]),
         OpTypePattern('Tanh', inputs=[
-            OpTypePattern("Add", name="ct", inputs=[
+            OpTypePattern("Add|AddV2", name="ct", inputs=[
                 OpTypePattern("Mul", name="ct_identity_consumer", inputs=[
                     OpTypePattern("Sigmoid", name="ft", inputs=[
-                        OpTypePattern("Add", inputs=[
+                        OpTypePattern("Add|AddV2", inputs=[
                             xc_pattern,
                             OpTypePattern("*", name="ft_bias"),
                         ]),
@@ -68,6 +69,39 @@ class REWRITER_RESULT(Enum):
         ]),
     ])
 
+xc_pattern_optimized = \
+    OpTypePattern('Split', inputs=[
+        OpTypePattern("Const"),
+        OpTypePattern("Identity", inputs=[
+            OpTypePattern("MatMul", inputs=[
+                OpTypePattern("ConcatV2|Concat", name="xh"),
+                OpTypePattern("Const", name="cell_kernel"),
+            ]),
+        ]),
+    ])
+
+lstmcell_pattern_optimized = \
+    OpTypePattern('Mul', name='ht', inputs=[
+        OpTypePattern("Sigmoid", name="ot", inputs=[xc_pattern_optimized]),
+        OpTypePattern('Tanh', inputs=[
+            OpTypePattern("Add|AddV2", name="ct", inputs=[
+                OpTypePattern("Mul", name="ct_identity_consumer", inputs=[
+                    OpTypePattern("Sigmoid", name="ft", inputs=[
+                        OpTypePattern("Add|AddV2", inputs=[
+                            xc_pattern_optimized,
+                            OpTypePattern("*", name="ft_bias"),
+                        ]),
+                    ]),
+                    OpTypePattern("*"),
+                ]),
+                OpTypePattern("Mul", inputs=[
+                    OpTypePattern("Sigmoid", name="it", inputs=[xc_pattern_optimized]),
+                    OpTypePattern("Tanh", name="gt", inputs=[xc_pattern_optimized]),
+                ]),
+            ]),
+        ]),
+    ])
+
 # input sequence: top to down, left to right
 # split into update gate and reset gate
 gru_split_pattern = \
@@ -237,7 +271,7 @@ class RNNUnitType(Enum):
 
 
 rnn_cell_patterns = {
-    RNNUnitType.LSTMCell: [lstmcell_pattern],
+    RNNUnitType.LSTMCell: [lstmcell_pattern, lstmcell_pattern_optimized],
     RNNUnitType.LSTMBlockCell: [lstmblockcell_pattern],
     RNNUnitType.GRUCell: [grucell_pattern],
     RNNUnitType.GRUBlockCell: [grublockcell_pattern0, grublockcell_pattern1],