Enable explicit batch dim support for getitem and chunk (#70)

Shirong Wu · Wei Wei · commit d418a3701305 · 2022-06-03T17:54:12.000-07:00
Summary: Pull Request resolved: https://github.com/pytorch/fx2trt/pull/70 Enable explicit batch dim support(with limited support for dynamic shape) for acc_op getitem and chunk. Right now the converter can only process 1 dynamic shape dim. Reviewed By: frank-wei, 842974287 Differential Revision: D34454742 fbshipit-source-id: f1bf643ca94b268be7193d332a5819e6bc8d876d
diff --git a/fx/converters/acc_ops_converters.py b/fx/converters/acc_ops_converters.py
@@ -2752,9 +2752,15 @@ def acc_ops_getitem(
     if not isinstance(input_val, TRTTensor):
         return operator.getitem(input_val, slices)  # type: ignore[arg-type]
 
-    assert not has_dynamic_shape(
-        input_val.shape
-    ), "Currently we don't support slicing tensor if it has dynamic shape."
+    if not isinstance(slices, tuple) and not isinstance(slices, list):
+        slices = (slices,)
+
+    dynamic_shape = get_dynamic_dims(input_val.shape)
+    if dynamic_shape:
+        for i, s in zip(input_val.shape, slices):
+            assert i > 0 or (
+                s in [slice(None, None, None), slice(0, None, None), Ellipsis]
+            ), "We don't support slicing tensor on dynamic shape. "
 
     def num_slice_types(slices):
         """
@@ -2776,9 +2782,6 @@ def slice_to_trt_params(py_slice, dim_size):
         size = math.ceil((stop - start) * 1.0 / stride)
         return start, size, stride
 
-    if not isinstance(slices, tuple) and not isinstance(slices, list):
-        slices = (slices,)
-
     if network.has_implicit_batch_dimension:
         # Raise an error if it's trying to subscript batch dimension unless it's
         # slice(None, None, None).
@@ -2831,12 +2834,17 @@ def slice_to_trt_params(py_slice, dim_size):
         stride.append(1)
         i += 1
 
+    if dynamic_shape:
+        size = get_shape_with_dynamic_shape(network, size, input_val, target, name)
+
     layer = network.add_slice(
         input=input_val,
         start=start,
-        shape=size,
+        shape=[] if dynamic_shape else size,
         stride=stride,
     )
+    if dynamic_shape:
+        layer.set_input(2, size)
     set_layer_name(layer, target, name)
 
     # Add shuffle layer to insert dimensions for 'None' and remove dimensions for 'int'.
@@ -3212,15 +3220,15 @@ def acc_ops_chunk(
             "of the TensorRT region!"
         )
 
+    dynamic_shape = has_dynamic_shape(input_val.shape)
     if network.has_implicit_batch_dimension:
         input_dim_size += 1
         dim = get_positive_dim(dim, input_dim_size)
         assert dim != 0, "Can't chunk on batch dim when it's implicit!"
         dim -= 1
     else:
-        assert not has_dynamic_shape(
-            input_val.shape
-        ), "We currently don't support dynamic shape for chunk."
+        if dynamic_shape:
+            assert input_val.shape[dim] != -1, "Can't chunk on dynamic shape dimension!"
         dim = get_positive_dim(dim, input_dim_size)
 
     if chunks > input_val.shape[dim]:
@@ -3243,8 +3251,16 @@ def acc_ops_chunk(
     for i in range(chunks):
         shape = list(input_val.shape)
         shape[dim] = min(split_size, max_offset - offset)
+        if dynamic_shape:
+            shape = get_shape_with_dynamic_shape(
+                network, shape, input_val, target, f"{name}_{i}"
+            )
         start[dim] = offset
-        layer = network.add_slice(input_val, start=start, shape=shape, stride=stride)
+        layer = network.add_slice(
+            input_val, start=start, shape=[] if dynamic_shape else shape, stride=stride
+        )
+        if dynamic_shape:
+            layer.set_input(2, shape)
         offset += split_size
         set_layer_name(layer, target, f"{name}_{i}")
         output.append(layer.get_output(0))
diff --git a/fx/converters/converter_utils.py b/fx/converters/converter_utils.py
@@ -146,7 +146,10 @@ def has_dynamic_shape(shape: Shape) -> bool:
     Returns:
         A boolean value indicates whether there's dynamic dim in the shape.
     """
-    return any(s == -1 for s in shape)
+    count = 0
+    for s in shape:
+        count += 1 if s == -1 else 0
+    return count
 
 
 def get_axes_for_reduce_op(
@@ -342,6 +345,63 @@ def broadcast(
     return a, b
 
 
+def get_shape_with_dynamic_shape(
+    network: TRTNetwork,
+    shape: Union[list, tuple, torch.Tensor],
+    input_val: TRTTensor,
+    target: Target,
+    name: str,
+) -> TRTTensor:
+    """
+    Prepare the real output tensor shape for dynamic shape mode tensor input.
+    How this functions works:
+    Assuming the input_val has actual shape [2048, 256, 512], expected reduce operation
+    output shape is [-1, 128, 256], this function should return [2048, 128, 256] as the actual
+    reduce operation output shape. Steps of calculations are:
+        1. get the actual tensor shape of input_val via add_shape layer;
+        2. create a all 0 tensor [0, 0, 0];
+        3. run elementwise comparision the [0, 0, 0] and [-1, 128, 256] tensor, get a condition tensor [True, False, False];
+        4. use the condition tensor [True, False, False] to do selection between [2048, 256, 512] and [-1, 128, 256], replace
+           all -1 dynamic shape dimensions with actual batch_size value;
+        5. output shape with actual batch_size as [2048, 128, 256]
+
+    Args:
+        network (TRTNetwork): TensorRT network object.
+        shape: calculated shape of the expected output tensor
+        input_val (TRTTensor): A TensorRT ITensor.
+        target (Target): Target of fx node.
+        name (str): The name we want to assign to the created TensorRT layer.
+    Returns:
+        TensorRT ITensors that represents the actual shape of the input_val
+    """
+    # Ger real shape info for input_val
+    input_shape = network.add_shape(input_val).get_output(0)
+
+    scale_layer = network.add_constant(
+        input_shape.shape, np.ascontiguousarray(shape, dtype=np.int32)
+    )
+    set_layer_name(scale_layer, target, f"{name}_scale")
+    scale_res = scale_layer.get_output(0)
+
+    length = input_shape.shape[0]
+    zero_layer = network.add_constant(
+        input_shape.shape, to_numpy(torch.zeros((length), dtype=torch.int32))
+    )
+    set_layer_name(zero_layer, target, f"{name}_zeros")
+
+    condition_val = add_binary_elementwise_layer(
+        network,
+        scale_res,
+        zero_layer.get_output(0),
+        trt.ElementWiseOperation.LESS,
+        target,
+        f"{name}_shape",
+    )
+    select_layer = network.add_select(condition_val, input_shape, scale_res)
+    set_layer_name(select_layer, target, f"{name}_select")
+    return select_layer.get_output(0)
+
+
 def add_binary_elementwise_layer(
     network: TRTNetwork,
     lhs_val: Union[int, float, TRTTensor, torch.Tensor],
diff --git a/test/converters/acc_op/test_chunk.py b/test/converters/acc_op/test_chunk.py
@@ -2,7 +2,7 @@
 import torch
 import torch.nn as nn
 from parameterized import parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase
+from torch.testing._internal.common_fx2trt import InputTensorSpec, AccTestCase
 from torch.testing._internal.common_utils import run_tests
 
 
@@ -26,6 +26,29 @@ def forward(self, x):
             expected_ops={acc_ops.chunk},
         )
 
+    @parameterized.expand(
+        [
+            ("chunk", 3, 1),
+            ("chunk", 2000, 1),
+            ("chunk", 3, -2),
+        ]
+    )
+    def test_chunk_with_dynamic_shape(self, _, chunk, dim):
+        class Chunk(nn.Module):
+            def forward(self, x):
+                return x.chunk(chunk, dim)[0]
+
+        input_specs = [
+            InputTensorSpec(
+                shape=(-1, 10, -1),
+                dtype=torch.float32,
+                shape_ranges=[((1, 10, 20), (5, 10, 20), (10, 10, 20))],
+            ),
+        ]
+        self.run_test_with_dynamic_shape(
+            Chunk(), input_specs, expected_ops={acc_ops.chunk}
+        )
+
 
 if __name__ == "__main__":
     run_tests()
diff --git a/test/converters/acc_op/test_getitem.py b/test/converters/acc_op/test_getitem.py
@@ -2,7 +2,7 @@
 import torch
 import torch.nn as nn
 from parameterized import parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase
+from torch.testing._internal.common_fx2trt import AccTestCase, InputTensorSpec
 from torch.testing._internal.common_utils import run_tests
 
 
@@ -52,6 +52,98 @@ def forward(self, x):
         inputs = [torch.randn(2, 10, 10, 10)]
         self.run_test(Getitem(idx), inputs, expected_ops={acc_ops.getitem})
 
+    @parameterized.expand(
+        [
+            ("slice_batch_dim", slice(None, None, None)),
+            ("ellipsis", (slice(None, None, None), ..., slice(0, -3, 2))),
+            (
+                "slice_all_none",
+                (slice(None, None, None), slice(None, None, None)),
+            ),
+            (
+                "slice_end_none",
+                (slice(None, None, None), slice(None, None, None), slice(1, None, 1)),
+            ),
+            (
+                "slice_step_none",
+                (slice(None, None, None), slice(None, None, None), slice(0, 3, None)),
+            ),
+            ("slice_neg_idx", (slice(None, None, None), -1, slice(None, None, None))),
+            (
+                "slice_neg_slice",
+                (slice(None, None, None), slice(None, None, None), slice(-8, -2, 3)),
+            ),
+            ("multi_dim", (slice(None, None, None), 0, 1)),
+            (
+                "slice_multi_dim",
+                (slice(None, None, None), slice(0, 3, 2), slice(1, -1, 3)),
+            ),
+            (
+                "none",
+                (slice(None, None, None), None, slice(1, -1, 3)),
+            ),
+        ]
+    )
+    def test_getitem_with_dynamic_shape(self, _, idx):
+        class Getitem(nn.Module):
+            def __init__(self, idx):
+                super().__init__()
+                self.idx = idx
+
+            def forward(self, x):
+                x = x + x
+                return x[self.idx]
+
+        input_specs = [
+            InputTensorSpec(
+                shape=(-1, 256, 256),
+                dtype=torch.float32,
+                shape_ranges=[((1, 256, 256), (3, 256, 256), (5, 256, 256))],
+            ),
+        ]
+        self.run_test_with_dynamic_shape(
+            Getitem(idx), input_specs, expected_ops={acc_ops.getitem}
+        )
+
+    @parameterized.expand(
+        [
+            ("slice_batch_dim", slice(None, None, None)),
+            ("ellipsis", (slice(None, None, None), ..., slice(0, -3, 2))),
+            (
+                "slice_all_none",
+                (slice(None, None, None), slice(None, None, None)),
+            ),
+            (
+                "slice_end_none",
+                (slice(None, None, None), slice(None, None, None), slice(1, None, 1)),
+            ),
+            (
+                "slice_step_none",
+                (slice(None, None, None), slice(None, None, None), slice(0, 3, None)),
+            ),
+        ]
+    )
+    def test_getitem_with_multi_dynamic_shape(self, _, idx):
+        class Getitem(nn.Module):
+            def __init__(self, idx):
+                super().__init__()
+                self.idx = idx
+
+            def forward(self, x):
+                x = x + x
+                return x[self.idx]
+
+        input_specs = [
+            InputTensorSpec(
+                shape=(-1, -1, 256),
+                dtype=torch.float32,
+                shape_ranges=[((1, 128, 256), (3, 192, 256), (5, 256, 256))],
+            ),
+        ]
+        self.run_test_with_dynamic_shape(
+            Getitem(idx), input_specs, expected_ops={acc_ops.getitem}
+        )
+
 
 if __name__ == "__main__":
     run_tests()