Add support for transposed convolution negative input padding (#4096)

ivangarcia44 · Ivan Garcia · web-flow · commit 8b6ddd3d69f0 · 2025-04-04T11:34:38.000-04:00
Currently when a transposed convolution is lowered from the torch dialect to the linalg dialect we get an insert_slide operation to create padding for the input tensor. For example: %inserted_slice = tensor.insert_slice %arg0 into %cast[0, 0, 2, %c-1] [1, 1, 4, 7] [1, 1, 1, 1] : tensor<1x1x4x7xf32> into tensor<1x1x?x?xf32> The above works well for the case where the input padding is positive. For transposed convolution the input padding is defined with this formula: dilation * (kernel_size - 1) - padding (see https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html) for details. Notice that if the input padding is above the left hand term, we get negative padding. For these cases PyTorch reduces the size of the input tensor. The torch to linalg lowering was not doing this, and therefore its value does not match what PyTorch gives (captured in e2e tests TransposedConv2dNegativePadding and TransposedConv3dNegativePadding). To fix this a tensor.extract_slice operation is added just before the insert_slice operation to reduce the input tensor size as PyTorch does. In the example above we get the code below whose result matches the numerical values of PyTorch. %extracted_slice = tensor.extract_slice %arg0[0, 0, 0, 1] [1, 1, 4, 5] [1, 1, 1, 1] : tensor<1x1x4x7xf32> to tensor<1x1x4x5xf32> %inserted_slice = tensor.insert_slice %extracted_slice into %4[0, 0, 2, 0] [1, 1, 4, 5] [1, 1, 1, 1] : tensor<1x1x4x5xf32> into tensor<1x1x8x5xf32> For each dimension with a negative padding, we add a positive offset (absolute value of negative padding) in the corresponding dimension for the extract_slice operation, and the dimension size is reduced by twice that amount (elements are lost in both sides of the dimension as specified in PyTorch). Then on the insert_slice the negative padding dimension has an offset of zero because the trimmed dimension fits exactly. For the case when padding is positive the existing behavior is kept. @rsuderman @vivekkhandelwal1 @zjgarvey @penguin-wwy @ubfx @sahas3 @Hanumanth04 @dixinzhou @rafaelubalmw --------- Co-authored-by: Ivan Garcia <igarcia@vdi-ah2ddp-178.dhcp.mathworks.com>
diff --git a/lib/Conversion/TorchToLinalg/Linear.cpp b/lib/Conversion/TorchToLinalg/Linear.cpp
@@ -748,6 +748,31 @@ class ConvertAtenBmmOp : public OpConversionPattern<AtenBmmOp> {
 };
 } // namespace
 
+namespace {
+bool isValueNegative(mlir::Value value) {
+  // Try to fold the operation to a constant
+  mlir::Operation *definingOp = value.getDefiningOp();
+
+  if (!definingOp)
+    return false;
+
+  // Attempt to fold the operation
+  mlir::SmallVector<mlir::OpFoldResult, 1> results;
+  if (failed(definingOp->fold(results)) || results.empty())
+    return false;
+
+  // Check if the folded result is a constant
+  if (auto attr = results.front().dyn_cast<mlir::Attribute>()) {
+    if (auto intAttr = dyn_cast<mlir::IntegerAttr>(attr)) {
+      int64_t intValue = intAttr.getInt();
+      return intValue < 0;
+    }
+  }
+
+  return false;
+}
+} // namespace
+
 namespace {
 class ConvertAtenConvolutionOp : public OpConversionPattern<AtenConvolutionOp> {
 public:
@@ -1008,8 +1033,6 @@ class ConvertAtenConvolutionOp : public OpConversionPattern<AtenConvolutionOp> {
           rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexAttr(0));
       Value c1 =
           rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexAttr(1));
-      Value c2 =
-          rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexAttr(2));
 
       // Transpose and flip weight
       SmallVector<Value> weightInitDims = getTensorSizes(rewriter, loc, weight);
@@ -1060,45 +1083,10 @@ class ConvertAtenConvolutionOp : public OpConversionPattern<AtenConvolutionOp> {
                        })
                    .getResult(0);
 
-      // Calculate padded input size, allocate tensor
-      SmallVector<Value> outerSizes{inBatch, inChannels};
-      SmallVector<Value> innerSizes{inBatch, inChannels};
-      SmallVector<Value> offsets{c0, c0};
-      for (size_t i = 0; i < numSpatialDims; i++) {
-        Value innerSize = rewriter.create<arith::SubIOp>(loc, inDims[i], c1);
-        innerSize = rewriter.create<arith::MulIOp>(
-            loc, innerSize, castIntToIndex(rewriter, loc, strideIntValues[i]));
-        innerSize = rewriter.create<arith::AddIOp>(loc, innerSize, c1);
-
-        Value offset = rewriter.create<arith::SubIOp>(loc, weightDims[i], c1);
-        offset = rewriter.create<arith::MulIOp>(
-            loc, offset, castIntToIndex(rewriter, loc, dilationIntValues[i]));
-        offset = rewriter.create<arith::SubIOp>(
-            loc, offset, castIntToIndex(rewriter, loc, paddingIntValues[i]));
-
-        Value outerSize = rewriter.create<arith::MulIOp>(loc, offset, c2);
-        outerSize = rewriter.create<arith::AddIOp>(loc, outerSize, innerSize);
-        outerSize = rewriter.create<arith::AddIOp>(
-            loc, outerSize,
-            castIntToIndex(rewriter, loc, outputPaddingIntValues[i]));
-
-        outerSizes.push_back(outerSize);
-        offsets.push_back(offset);
-      }
-
-      // Allocate padded input tensor
-      Value initTensor =
-          createInitTensor(rewriter, loc, outerSizes, inputDTy, pad);
-
-      // Insert input into allocated tensor
-      SmallVector<Value> strideIndexValues{c1, c1};
-      for (auto stride : strideIntValues)
-        strideIndexValues.push_back(castIntToIndex(rewriter, loc, stride));
-      SmallVector<Value> insertSizes = getTensorSizes(rewriter, loc, input);
-
-      paddedInput = rewriter.create<tensor::InsertSliceOp>(
-          loc, torch_to_linalg::removeSizeInformation(rewriter, loc, input),
-          initTensor, offsets, insertSizes, strideIndexValues);
+      paddedInput = createTransposedInputPadding(
+          inBatch, inChannels, inDims, weightDims, paddingIntValues,
+          strideIntValues, dilationIntValues, outputPaddingIntValues, input,
+          inputDTy, pad, rewriter, loc, numSpatialDims, c0, c1);
 
       // Calculate output dims
       for (size_t i = 0; i < numSpatialDims; i++)
@@ -1482,9 +1470,107 @@ class ConvertAtenConvolutionOp : public OpConversionPattern<AtenConvolutionOp> {
     rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType, conv);
     return success();
   }
+
+  static Value createTransposedInputPadding(
+      Value inBatch, Value inChannels, SmallVector<Value> &inDims,
+      SmallVector<Value> &weightDims, SmallVector<Value> &paddingIntValues,
+      SmallVector<Value> &strideIntValues,
+      SmallVector<Value> &dilationIntValues,
+      SmallVector<Value> &outputPaddingIntValues, Value input, Type inputDTy,
+      Value pad, PatternRewriter &rewriter, Location loc, size_t numSpatialDims,
+      Value c0, Value c1);
 };
 } // namespace
 
+Value ConvertAtenConvolutionOp::createTransposedInputPadding(
+    Value inBatch, Value inChannels, SmallVector<Value> &inDims,
+    SmallVector<Value> &weightDims, SmallVector<Value> &paddingIntValues,
+    SmallVector<Value> &strideIntValues, SmallVector<Value> &dilationIntValues,
+    SmallVector<Value> &outputPaddingIntValues, Value input, Type inputDTy,
+    Value pad, PatternRewriter &rewriter, Location loc, size_t numSpatialDims,
+    Value c0, Value c1) {
+  // Calculate padded input size, allocate tensor
+  SmallVector<Value> outerSizes{inBatch, inChannels};
+  SmallVector<Value> innerSizes{inBatch, inChannels};
+  SmallVector<Value> insertSliceOffsets{c0, c0};
+
+  SmallVector<Value> inputSizes = getTensorSizes(rewriter, loc, input);
+  SmallVector<Value> sliceSizes{inputSizes[0], inputSizes[1]};
+
+  // For the case in which the padding dimension value is negative,
+  // we will need to shrink the dimension. Note in the PyTorch
+  // ConvTranspose2d operator documentation that the padding is
+  // defined by dilation * (kernel_size - 1) - padding. If the
+  // resulting padding is negative, PyTorch will extract elements
+  // from both sides of the dimension.
+  SmallVector<Value> extractSliceOffsets{c0, c0};
+  bool anyDimensionPaddingIsNegative = false;
+
+  Value c2 = rewriter.create<arith::ConstantOp>(loc, rewriter.getIndexAttr(2));
+
+  for (size_t i = 0; i < numSpatialDims; i++) {
+    Value innerSize = rewriter.createOrFold<arith::SubIOp>(loc, inDims[i], c1);
+    innerSize = rewriter.createOrFold<arith::MulIOp>(
+        loc, innerSize, castIntToIndex(rewriter, loc, strideIntValues[i]));
+    innerSize = rewriter.createOrFold<arith::AddIOp>(loc, innerSize, c1);
+
+    Value offset = rewriter.createOrFold<arith::SubIOp>(loc, weightDims[i], c1);
+    offset = rewriter.createOrFold<arith::MulIOp>(
+        loc, offset, castIntToIndex(rewriter, loc, dilationIntValues[i]));
+    offset = rewriter.createOrFold<arith::SubIOp>(
+        loc, offset, castIntToIndex(rewriter, loc, paddingIntValues[i]));
+
+    Value outerSize = rewriter.createOrFold<arith::MulIOp>(loc, offset, c2);
+    outerSize = rewriter.createOrFold<arith::AddIOp>(loc, outerSize, innerSize);
+    outerSize = rewriter.createOrFold<arith::AddIOp>(
+        loc, outerSize,
+        castIntToIndex(rewriter, loc, outputPaddingIntValues[i]));
+
+    outerSizes.push_back(outerSize);
+    if (isValueNegative(offset)) {
+      // Make the negative value positive by multiplying by -1.
+      anyDimensionPaddingIsNegative = true;
+      auto offsetType = offset.getType();
+      auto negOneConst = rewriter.createOrFold<arith::ConstantOp>(
+          loc, offsetType, rewriter.getIntegerAttr(offsetType, -1));
+      auto posOffset =
+          rewriter.createOrFold<arith::MulIOp>(loc, offset, negOneConst);
+
+      // Compute the reduced dimension size due to negative padding.
+      auto sizeReduction =
+          rewriter.createOrFold<arith::MulIOp>(loc, posOffset, c2);
+      sliceSizes.push_back(rewriter.createOrFold<arith::SubIOp>(
+          loc, inputSizes[i + 2], sizeReduction));
+
+      extractSliceOffsets.push_back(posOffset);
+      insertSliceOffsets.push_back(c0);
+    } else {
+      sliceSizes.push_back(inputSizes[i + 2]);
+      extractSliceOffsets.push_back(c0);
+      insertSliceOffsets.push_back(offset);
+    }
+  }
+  Value initTensor = createInitTensor(rewriter, loc, outerSizes, inputDTy, pad);
+
+  // Insert input into allocated tensor
+  SmallVector<Value> strideIndexValues{c1, c1};
+  for (auto stride : strideIntValues)
+    strideIndexValues.push_back(castIntToIndex(rewriter, loc, stride));
+
+  auto insertSliceOpInput = input;
+  if (anyDimensionPaddingIsNegative) {
+    insertSliceOpInput = rewriter.create<tensor::ExtractSliceOp>(
+        loc, torch_to_linalg::removeSizeInformation(rewriter, loc, input),
+        extractSliceOffsets, sliceSizes, strideIndexValues);
+  }
+
+  auto paddedInput = rewriter.create<tensor::InsertSliceOp>(
+      loc,
+      torch_to_linalg::removeSizeInformation(rewriter, loc, insertSliceOpInput),
+      initTensor, insertSliceOffsets, sliceSizes, strideIndexValues);
+  return paddedInput;
+}
+
 namespace {
 
 /// Creates coefficients based on DFT definition, see
diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py
@@ -3770,6 +3770,9 @@
     "TorchPrimLoopWhileLikeModule_basic",
     "TraceModule_empty",
     "TraceUnsignedIntModule_empty",
+    "TransposedConv1dNegativePadding_basic",
+    "TransposedConv2dNegativePadding_basic",
+    "TransposedConv3dNegativePadding_basic",
     "UnsafeViewCollapseDynamicWithAtenSizeIntModule_basic",
     "UpSampleNearest2dBackwardScalesNone_basic",
     "UpSampleNearest2dBackward_basic",
@@ -4758,6 +4761,9 @@
     "TraceSignedIntModule_basic",
     "TraceUnsignedIntModule_basic",
     "TraceUnsignedIntModule_empty",
+    "TransposedConv1dNegativePadding_basic",
+    "TransposedConv2dNegativePadding_basic",
+    "TransposedConv3dNegativePadding_basic",
     "TupleModule_basic",
     "TypeAsDifferentModule_basic",
     "TypeConversionF32ToF64Module_basic",
diff --git a/projects/pt1/python/torch_mlir_e2e_test/test_suite/conv.py b/projects/pt1/python/torch_mlir_e2e_test/test_suite/conv.py
@@ -1757,3 +1757,99 @@ def forward(self, inputVec, weight, bias):
 @register_test_case(module_factory=lambda: ConvolutionModule2DGroupedTranspose())
 def ConvolutionModule2DGroupedTranspose_basic(module, tu: TestUtils):
     module.forward(tu.rand(1, 2, 5, 7), tu.rand(2, 2, 3, 3), tu.rand(4))
+
+
+class TransposedConv1dNegativePadding(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args(
+        [
+            None,
+            ([1, 1, 7], torch.float32, True),
+            ([1, 2, 3], torch.float32, True),
+            ([2], torch.float32, True),
+        ]
+    )
+    def forward(self, inputVec, weight, bias):
+        return torch.ops.aten.convolution(
+            inputVec,
+            weight,
+            bias=bias,
+            stride=[1],
+            padding=[3],
+            dilation=[1],
+            transposed=True,
+            output_padding=[0],
+            groups=1,
+        )
+
+
+@register_test_case(module_factory=lambda: TransposedConv1dNegativePadding())
+def TransposedConv1dNegativePadding_basic(module, tu: TestUtils):
+    module.forward(tu.rand(1, 1, 7), tu.rand(1, 2, 3), tu.rand(2))
+
+
+class TransposedConv2dNegativePadding(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args(
+        [
+            None,
+            ([1, 1, 4, 7], torch.float32, True),
+            ([1, 2, 3, 3], torch.float32, True),
+            ([2], torch.float32, True),
+        ]
+    )
+    def forward(self, inputVec, weight, bias):
+        return torch.ops.aten.convolution(
+            inputVec,
+            weight,
+            bias=bias,
+            stride=[1, 1],
+            padding=[0, 3],
+            dilation=[1, 1],
+            transposed=True,
+            output_padding=[0, 0],
+            groups=1,
+        )
+
+
+@register_test_case(module_factory=lambda: TransposedConv2dNegativePadding())
+def TransposedConv2dNegativePadding_basic(module, tu: TestUtils):
+    module.forward(tu.rand(1, 1, 4, 7), tu.rand(1, 2, 3, 3), tu.rand(2))
+
+
+class TransposedConv3dNegativePadding(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args(
+        [
+            None,
+            ([4, 1, 8, 13, 17], torch.float32, True),
+            ([1, 1, 3, 7, 3], torch.float32, True),
+            ([1], torch.float32, True),
+        ]
+    )
+    def forward(self, inputVec, weight, bias):
+        return torch.ops.aten.convolution(
+            inputVec,
+            weight,
+            bias=bias,
+            stride=[1, 1, 1],
+            padding=[2, 1, 3],
+            dilation=[1, 1, 1],
+            transposed=True,
+            output_padding=[0, 0, 0],
+            groups=1,
+        )
+
+
+@register_test_case(module_factory=lambda: TransposedConv3dNegativePadding())
+def TransposedConv3dNegativePadding_basic(module, tu: TestUtils):
+    module.forward(tu.rand(4, 1, 8, 13, 17), tu.rand(1, 1, 3, 7, 3), tu.rand(1))
diff --git a/test/Conversion/TorchToLinalg/convolution.mlir b/test/Conversion/TorchToLinalg/convolution.mlir
@@ -150,3 +150,27 @@ func.func @transposedGroupedConvolution2D(%arg0: !torch.vtensor<[1,2,5,7],f32>)
   %6 = torch.aten.convolution %arg0, %0, %1, %2, %3, %4, %true, %5, %int2 : !torch.vtensor<[1,2,5,7],f32>, !torch.vtensor<[2,2,3,3],f32>, !torch.vtensor<[4],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1,4,10,14],f32>
   return %6 : !torch.vtensor<[1,4,10,14],f32>
 }
+
+// CHECK-LABEL:   func.func @tranConv2dNegativePadding(
+// CHECK-SAME:       %[[INPUT_VTENSOR:.*]]: !torch.vtensor<[1,1,4,7],f32>) -> !torch.vtensor<[1,2,6,3],f32>
+// CHECK:            %[[IN_TENSOR:.*]] = torch_c.to_builtin_tensor %[[INPUT_VTENSOR]] : !torch.vtensor<[1,1,4,7],f32> -> tensor<1x1x4x7xf32>
+// CHECK:            %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[IN_TENSOR]][0, 0, 0, 1] [1, 1, 4, 5] [1, 1, 1, 1] : tensor<1x1x4x7xf32> to tensor<1x1x4x5xf32>
+// CHECK:            %[[INSERTED_SLICE:.*]] = tensor.insert_slice %[[EXTRACTED_SLICE]] into %[[INIT_TENSOR:.*]][0, 0, 2, 0] [1, 1, 4, 5] [1, 1, 1, 1] : tensor<1x1x4x5xf32> into tensor<1x1x8x5xf32>
+// CHECK:            %[[OUT_TENSOR:.*]] = linalg.conv_2d_nchw_fchw {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[INSERTED_SLICE]], %[[WEIGHTS:.*]] : tensor<1x1x8x5xf32>, tensor<2x1x3x3xf32>) outs(%[[INIT_OUT_TENSOR:.*]] : tensor<1x2x6x3xf32>) -> tensor<1x2x6x3xf32>
+// CHECK:            %[[OUT_VTENSOR:.*]] = torch_c.from_builtin_tensor %[[OUT_TENSOR]] : tensor<1x2x6x3xf32> -> !torch.vtensor<[1,2,6,3],f32>
+func.func @tranConv2dNegativePadding(%arg0: !torch.vtensor<[1, 1, 4, 7],f32>) -> !torch.vtensor<[1, 2, 6, 3],f32> attributes {torch.assume_strict_symbolic_shapes} {
+  %int0 = torch.constant.int 0
+  %true = torch.constant.bool true
+  %int1 = torch.constant.int 1
+  %int2 = torch.constant.int 2
+  %int3 = torch.constant.int 3
+  %int4 = torch.constant.int 4
+  %0 = torch.vtensor.literal(dense_resource<torch_tensor_1_2_3_3_torch.float32> : tensor<1x2x3x3xf32>) : !torch.vtensor<[1,2,3,3],f32>
+  %1 = torch.vtensor.literal(dense_resource<torch_tensor_2_torch.float32> : tensor<2xf32>) : !torch.vtensor<[2],f32>
+  %2 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
+  %3 = torch.prim.ListConstruct %int0, %int3 : (!torch.int, !torch.int) -> !torch.list<int>
+  %4 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int>
+  %5 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int>
+  %6 = torch.aten.convolution %arg0, %0, %1, %2, %3, %4, %true, %5, %int1 : !torch.vtensor<[1, 1, 4, 7],f32>, !torch.vtensor<[1,2,3,3],f32>, !torch.vtensor<[2],f32>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[1, 2, 6, 3],f32>
+  return %6 : !torch.vtensor<[1, 2, 6, 3],f32>
+}