[fx2trt] improve pad/permute/setitem/getitem op (#72)

Wei Wei · Wei Wei · commit 5d80f4148742 · 2022-06-03T17:54:12.000-07:00
Summary: Pull Request resolved: https://github.com/pytorch/fx2trt/pull/72 1. support value in pad op 2. permute op improvement since it could not handle x.permute(permutation) 3. improve setitem: 1) fix issue with slice(-n,None,None) 4. improve getitem: 1) support x[slice(None,None,None), slice(0,0,None)] This case is needed for setitem pass optimization. When we need to split a range where the dimension is (20,10) (slice(None,None,None), slice(-10,None,None)) --> (slice(None,None,None), slice(0,-10,None)) + (slice(None,None,None), slice(-10,None,None)) Reviewed By: yinghai Differential Revision: D36223023 fbshipit-source-id: 7d87c74b85b5c6c6efcd46dac209a46af208d3f9
diff --git a/fx/converters/acc_ops_converters.py b/fx/converters/acc_ops_converters.py
@@ -332,10 +332,12 @@ def acc_ops_pad_with_slice_layer(
             f"Trying to pad last {len(pad) / 2} dimension but the input only has {rank} dimension."
         )
 
-    if value != 0:
-        raise RuntimeError(
-            f"Currently we only support padding value of 0, got {value}."
-        )
+    # cast value to TRTensor
+    dt = torch_dtype_from_trt(input_val.dtype)
+    value = 0 if value == None else value
+    value_const = get_trt_tensor(
+        network, torch.tensor([value], dtype=dt), f"{name}_value"
+    )
 
     input_shape = input_val.shape
     pre_start = tuple(i - 1 for i in input_shape)
@@ -352,6 +354,7 @@ def acc_ops_pad_with_slice_layer(
         pre_shape,
         pre_stride,
     )
+    layer.set_input(4, value_const)
     layer.mode = trt.SliceMode.FILL
     set_layer_name(layer, target, f"pre_{name}")
     half_pad_output = layer.get_output(0)
@@ -360,6 +363,7 @@ def acc_ops_pad_with_slice_layer(
     mid_start = tuple(i - 1 for i in shape)
     mid_stride = [-1] * len(shape)
     layer = network.add_slice(half_pad_output, mid_start, shape, mid_stride)
+    layer.set_input(4, value_const)
     layer.mode = trt.SliceMode.FILL
     set_layer_name(layer, target, f"transpose_{name}")
     transpose_output = layer.get_output(0)
@@ -373,6 +377,7 @@ def acc_ops_pad_with_slice_layer(
     post_stride = tuple([1] * len(shape))
 
     layer = network.add_slice(transpose_output, post_start, post_shape, post_stride)
+    layer.set_input(4, value_const)
     layer.mode = trt.SliceMode.FILL
     set_layer_name(layer, target, f"post_{name}")
     return layer.get_output(0)
@@ -2776,9 +2781,15 @@ def slice_to_trt_params(py_slice, dim_size):
         """
         Convert python slice to TensorRT slice layer parameters.
         """
-        start = get_positive_dim(py_slice.start, dim_size) if py_slice.start else 0
-        stride = py_slice.step if py_slice.step else 1
-        stop = get_positive_dim(py_slice.stop, dim_size) if py_slice.stop else dim_size
+        start = (
+            get_positive_dim(py_slice.start, dim_size) if py_slice.start != None else 0
+        )
+        stride = py_slice.step if py_slice.step != None else 1
+        stop = (
+            get_positive_dim(py_slice.stop, dim_size)
+            if py_slice.stop != None
+            else dim_size
+        )
         size = math.ceil((stop - start) * 1.0 / stride)
         return start, size, stride
 
@@ -2989,9 +3000,11 @@ def acc_ops_permute(
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     input_val = kwargs["input"]
     ranks = len(input_val.shape) + (1 if network.has_implicit_batch_dimension else 0)  # type: ignore[union-attr]
-    permutation = [
-        get_positive_dim(i, ranks) for i in cast(Sequence[int], kwargs["permutation"])
-    ]
+    if len(kwargs["permutation"]) == 1:
+        index = kwargs["permutation"][0]
+    else:
+        index = kwargs["permutation"]
+    permutation = [get_positive_dim(i, ranks) for i in cast(Sequence[int], index)]
 
     if not isinstance(input_val, TRTTensor):
         raise RuntimeError(
diff --git a/fx/passes/lower_basic_pass.py b/fx/passes/lower_basic_pass.py
@@ -1,6 +1,5 @@
 import copy
 import operator
-import operator
 import warnings
 from typing import Any
 
@@ -297,7 +296,7 @@ def split_across(
     start_node = end_node = mid_node = None
     if sli.start is None and sli.stop is None:
         return (start_node, input_node, end_node)
-    if sli.start is not None and sli.start > 0:
+    if sli.start is not None:
         st_sli = slice(0, sli.start, None)
         slice_list_gen = slice_list(st_sli, dim, size)
         start_node = gm.graph.call_function(
@@ -364,7 +363,11 @@ def transform_setitem(gm: torch.fx.GraphModule, input: Input):
                 for ind, val in enumerate(new_args):
                     if type(val) == int:
                         inp_flag = True
-                        new_args[ind] = slice(val, val + 1, None)
+                        if val == -1:
+                            new_args[ind] = slice(-1, None, None)
+                        else:
+                            new_args[ind] = slice(val, val + 1, None)
+
                 if inp_flag:
                     with gm.graph.inserting_before(inp):
                         new_node = gm.graph.call_function(
@@ -375,7 +378,18 @@ def transform_setitem(gm: torch.fx.GraphModule, input: Input):
 
             if type(sli) is not tuple:
                 sli = [sli]
-            sli = [slice(x, x + 1, None) if type(x) == int else x for x in sli]
+
+            tmp_sli = []
+            for x in sli:
+                if type(x) == int:
+                    if x == -1:
+                        tmp_sli.append(slice(-1, None, None))
+                    else:
+                        tmp_sli.append(slice(x, x + 1, None))
+                else:
+                    tmp_sli.append(x)
+            sli = tmp_sli
+
             dimension = len(sli)
             with gm.graph.inserting_before(node):
                 if dimension == 1:
diff --git a/test/converters/acc_op/test_getitem.py b/test/converters/acc_op/test_getitem.py
@@ -37,6 +37,10 @@ class TestGetitemConverter(AccTestCase):
                 "none",
                 (slice(None, None, None), None, slice(1, -1, 3), 1),
             ),
+            (
+                "slice_zero_slice",
+                (slice(None, None, None), slice(None, None, None), slice(0, 0, None)),
+            ),
         ]
     )
     def test_getitem(self, _, idx):
diff --git a/test/converters/acc_op/test_pad.py b/test/converters/acc_op/test_pad.py
@@ -12,37 +12,42 @@
 class TestPadConverter(AccTestCase):
     @parameterized.expand(
         [
-            ("1d", (1, 2)),
-            ("2d", (2, 0, 0, 1)),
+            ("1d", (1, 2), 9),
+            ("2d", (2, 0, 0, 1), 10),
         ]
     )
-    def test_pad(self, _, pad):
+    def test_pad_value(self, _, pad, value):
         class Pad(nn.Module):
             def forward(self, x):
-                return torch.nn.functional.pad(x, pad)
+                return torch.nn.functional.pad(x, pad, value=value)
 
         inputs = [torch.randn(1, 2, 3, 4)]
         self.run_test(
             Pad(),
             inputs,
             expected_ops={acc_ops.pad},
+            # enable value will not work with implicit batch
+            test_implicit_batch_dim=False,
         )
 
     @parameterized.expand(
         [
-            param("value", pad=(2, 0, 0, 1), value=1),
+            ("1d", (1, 2)),
+            ("2d", (2, 0, 0, 1)),
         ]
     )
-    def test_pad_fail(self, _, pad, mode="constant", value=0):
+    def test_pad(self, _, pad):
         class Pad(nn.Module):
             def forward(self, x):
-                return torch.nn.functional.pad(x, pad, mode, value)
+                return torch.nn.functional.pad(x, pad)
 
         inputs = [torch.randn(1, 2, 3, 4)]
-        self.run_test_with_assert_error(
+        self.run_test(
             Pad(),
             inputs,
-            expect_error=RuntimeError,
+            expected_ops={acc_ops.pad},
+            # enable value will not work with implicit batch
+            test_implicit_batch_dim=False,
         )
 
     @parameterized.expand(
@@ -64,6 +69,8 @@ def forward(self, x):
             Pad(),
             inputs,
             expected_ops={acc_ops.pad},
+            # enable value will not work with implicit batch
+            test_implicit_batch_dim=False,
         )
 
 
diff --git a/test/converters/acc_op/test_permute.py b/test/converters/acc_op/test_permute.py
@@ -7,6 +7,20 @@
 
 
 class TestPermuteConverter(AccTestCase):
+    @parameterized.expand(
+        [
+            ("positive", [0, 2, 1]),
+            ("negative", [0, -1, -2]),
+        ]
+    )
+    def test_permute_list(self, _, permutation):
+        class Permute(nn.Module):
+            def forward(self, x):
+                return x.permute(permutation)
+
+        inputs = [torch.randn(1, 3, 2)]
+        self.run_test(Permute(), inputs, expected_ops={acc_ops.permute})
+
     @parameterized.expand(
         [
             ("positive", [0, 2, 1]),

Original file line number	Diff line number	Diff line change
`@@ -37,6 +37,10 @@ class TestGetitemConverter(AccTestCase):`
`37`	`37`	`"none",`
`38`	`38`	`(slice(None, None, None), None, slice(1, -1, 3), 1),`
`39`	`39`	`),`
	`40`	`+ (`
	`41`	`+ "slice_zero_slice",`
	`42`	`+ (slice(None, None, None), slice(None, None, None), slice(0, 0, None)),`
	`43`	`+ ),`
`40`	`44`	`]`
`41`	`45`	`)`
`42`	`46`	`def test_getitem(self, _, idx):`