add lowering torch.aten.pixel_unshuffle op to linalg

alaa-ali · alaa-ali · commit 39a85409827f · 2025-08-25T15:25:09.000-04:00
diff --git a/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td b/include/torch-mlir/Dialect/Torch/IR/GeneratedTorchOps.td
@@ -8668,6 +8668,30 @@ def Torch_AtenPixelShuffleOp : Torch_Op<"aten.pixel_shuffle", [
   }];
 }
 
+def Torch_AtenPixelUnshuffleOp : Torch_Op<"aten.pixel_unshuffle", [
+    AllowsTypeRefinement,
+    HasValueSemantics,
+    ReadOnly
+  ]> {
+  let summary = "Generated op for `aten::pixel_unshuffle : (Tensor, int) -> (Tensor)`";
+  let arguments = (ins
+    AnyTorchTensorType:$self,
+    Torch_IntType:$downscale_factor
+  );
+  let results = (outs
+    AnyTorchOptionalTensorType:$result
+  );
+  let hasCustomAssemblyFormat = 1;
+  let extraClassDefinition = [{
+    ParseResult AtenPixelUnshuffleOp::parse(OpAsmParser &parser, OperationState &result) {
+      return parseDefaultTorchOp(parser, result, 2, 1);
+    }
+    void AtenPixelUnshuffleOp::print(OpAsmPrinter &printer) {
+      printDefaultTorchOp(printer, *this, 2, 1);
+    }
+  }];
+}
+
 def Torch_AtenChannelShuffleOp : Torch_Op<"aten.channel_shuffle", [
     AllowsTypeRefinement,
     HasValueSemantics,
diff --git a/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp b/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp
@@ -3710,6 +3710,177 @@ class DecomposeAtenPixelShuffleOp
 };
 } // namespace
 
+// Decompose aten.pixel_unshuffle into: prims.split_dim, aten.permute, and
+// prims.collapse operations.
+//
+// We want to do the exact opposite of aten.pixel_shuffle
+//
+// If input is a tensor of shape
+//     (*leading_dims, C, H*r, W*r),
+//
+// where leading_dims is of size N, then
+//    X = pixel_unshuffle(input, downscale_factor)
+//
+// gets replaced with
+//    X = input.split_dim(...)  # shape (*leading_dims, C, H, r, W*r)
+//    X = X.split_dim(...)      # shape (*leading_dims, C, H, r, W, r)
+//    X = X.permute(0, ..., N, N+2, N+4, N+1, N+3)
+//                              # shape (*leading_dims, C, r, r, H, W)
+//    X = X.collapse(...)       # shape (*leading_dims, C, r*r, H, W)
+//    X = X.collapse(...)       # shape (*leading_dims, C*r*r, H, W)
+//
+// 'r' above is referred to as the 'downscale factor' or just 'factor' below.
+namespace {
+class DecomposeAtenPixelUnshuffleOp
+    : public OpRewritePattern<AtenPixelUnshuffleOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+  LogicalResult matchAndRewrite(AtenPixelUnshuffleOp op,
+                                PatternRewriter &rewriter) const override {
+
+    Location loc = op.getLoc();
+    Value inValue = op.getSelf();
+    auto inType = cast<BaseTensorType>(inValue.getType());
+    auto maybeSizes = inType.getOptionalSizes();
+    if (!maybeSizes) {
+      return rewriter.notifyMatchFailure(
+          op, "Expected input tensor to have known rank.");
+    }
+    auto inShape = maybeSizes.value();
+    auto inRank = inShape.size();
+
+    // The input tensor must have at least 3 dimensions: (1) the channel
+    // dimension which gets bigger by 'factor*factor', (2) the H channel which
+    // gets smaller by 'factor' and (3) the W channel which get smaller by
+    // 'factor'. The total number of dimensions is 3 + N, where N is the number
+    // of leading dimensions, and N >= 0 so the input must have rank at least 3.
+    if (inRank < 3)
+      return rewriter.notifyMatchFailure(
+          op, "Expected input tensor to have rank greater than 2.");
+
+    const auto inOptionalDType = inType.getOptionalDtype();
+
+    auto getTypeFromShape = [inOptionalDType](auto &&vals) {
+      // Get a vector of integers from a vector of Values.
+      auto getIntShape = [](auto &&vals) {
+        SmallVector<int64_t> shape;
+        shape.reserve(vals.size());
+        for (auto v : vals) {
+          int64_t cst_val;
+          if (matchPattern(v, m_TorchConstantInt(&cst_val))) {
+            shape.push_back(cst_val);
+          } else {
+            shape.push_back(kUnknownSize);
+          }
+        }
+        return shape;
+      };
+
+      const auto intShape = getIntShape(vals);
+      return ValueTensorType::get(vals[0].getContext(),
+                                  llvm::ArrayRef(intShape), inOptionalDType);
+    };
+
+    auto nLeadingDims = inRank - 3;
+
+    // Get the size of the dimension 'i'. Note the use of 'createOrFold' instead
+    // of 'create': if the dimension size is known, then the AtenSizeIntOp is
+    // folded to a ConstantOp.
+    auto getDimSize = [&](uint64_t i) -> Value {
+      Value dim =
+          rewriter.create<ConstantIntOp>(loc, rewriter.getI64IntegerAttr(i));
+      return rewriter.createOrFold<AtenSizeIntOp>(loc, inValue, dim);
+    };
+
+    auto inC = getDimSize(inRank - 3);
+    auto inH = getDimSize(inRank - 2);
+    auto inW = getDimSize(inRank - 1);
+
+    auto factor = op.getDownscaleFactor();
+
+    Value factorSquared =
+        rewriter.createOrFold<AtenMulIntOp>(loc, factor, factor);
+
+    Value outC = rewriter.createOrFold<AtenMulIntOp>(loc, inC, factorSquared);
+
+    Value outH = rewriter.createOrFold<AtenFloordivIntOp>(loc, inH, factor);
+    Value outW = rewriter.createOrFold<AtenFloordivIntOp>(loc, inW, factor);
+
+    SmallVector<Value> dimensionConstants;
+    dimensionConstants.reserve(inRank + 2);
+    for (unsigned i = 0; i < inRank + 2; ++i) {
+      dimensionConstants.push_back(
+          rewriter.create<ConstantIntOp>(loc, rewriter.getI64IntegerAttr(i)));
+    }
+
+    SmallVector<Value> leadingDims;
+    leadingDims.reserve(nLeadingDims);
+    for (unsigned i = 0; i < nLeadingDims; ++i) {
+      Value leadingDimSize = rewriter.createOrFold<AtenSizeIntOp>(
+          loc, inValue, dimensionConstants[i]);
+      leadingDims.push_back(leadingDimSize);
+    }
+
+    SmallVector<Value> partiallyExpandedShape = leadingDims;
+    partiallyExpandedShape.append({inC, outH, factor, inW});
+
+    SmallVector<Value> prePermuteShape = leadingDims;
+    prePermuteShape.append({inC, outH, factor, outW, factor});
+
+    SmallVector<Value> postPermuteShape = leadingDims;
+    postPermuteShape.append({inC, factor, factor, outH, outW});
+
+    SmallVector<Value> partiallyCollapsedShape = leadingDims;
+    partiallyCollapsedShape.append({inC, factorSquared, outH, outW});
+
+    SmallVector<Value> outShape = leadingDims;
+    outShape.append({outC, outH, outW});
+
+    SmallVector<Value> permutation{dimensionConstants.begin(),
+                                   dimensionConstants.begin() + nLeadingDims};
+    SmallVector<uint64_t> permutationTail{0, 2, 4, 1, 3};
+    for (uint64_t d : permutationTail) {
+      permutation.push_back(dimensionConstants[nLeadingDims + d]);
+    }
+
+    Value permuteDimsOrder = rewriter.create<PrimListConstructOp>(
+        loc, Torch::ListType::get(Torch::IntType::get(op->getContext())),
+        permutation);
+
+    // Split input channel inH -> (outH, factor)
+    auto partiallyExpanded =
+        rewriter
+            .create<PrimsSplitDimOp>(
+                loc, getTypeFromShape(partiallyExpandedShape), inValue,
+                dimensionConstants[nLeadingDims + 1], outH)
+            .getResult();
+
+    // Split new dimension inW -> (outW, factor)
+    auto fullyExpanded = rewriter.create<PrimsSplitDimOp>(
+        loc, getTypeFromShape(prePermuteShape), partiallyExpanded,
+        dimensionConstants[nLeadingDims + 3], outW);
+
+    // Perform the permutation
+    auto permuted =
+        rewriter.create<AtenPermuteOp>(loc, getTypeFromShape(postPermuteShape),
+                                       fullyExpanded, permuteDimsOrder);
+
+    // Collapse final 2 dimension
+    auto partiallyCollapsed = rewriter.create<PrimsCollapseOp>(
+        loc, getTypeFromShape(partiallyCollapsedShape), permuted,
+        dimensionConstants[nLeadingDims + 1],
+        dimensionConstants[nLeadingDims + 2]);
+
+    // Collapse back to original rank
+    rewriter.replaceOpWithNewOp<PrimsCollapseOp>(
+        op, op.getType(), partiallyCollapsed, dimensionConstants[nLeadingDims],
+        dimensionConstants[nLeadingDims + 1]);
+
+    return success();
+  }
+};
+} // namespace
+
 // Decompose aten.channel_shuffle into: prims.split_dim, aten.permute, and
 // prims.collapse operations.
 //
@@ -12859,6 +13030,7 @@ class DecomposeComplexOpsPass
     addPatternIfTargetOpIsIllegal<DecomposeAtenRenormOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenLinalgCrossOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenPixelShuffleOp>(patterns);
+    addPatternIfTargetOpIsIllegal<DecomposeAtenPixelUnshuffleOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenChannelShuffleOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAtenTOp>(patterns);
     addPatternIfTargetOpIsIllegal<DecomposeAten_LogSoftmaxBackwardDataOp>(
diff --git a/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp b/lib/Dialect/Torch/Transforms/LowerToBackendContract.cpp
@@ -421,6 +421,7 @@ static void markDecomposedOpsAsIllegal(MLIRContext *context,
   target.addIllegalOp<Aten_LinalgDetOp>();
   target.addIllegalOp<AtenLinalgSlogdetOp>();
   target.addIllegalOp<AtenPixelShuffleOp>();
+  target.addIllegalOp<AtenPixelUnshuffleOp>();
   target.addIllegalOp<AtenChannelShuffleOp>();
   target.addIllegalOp<AtenTOp>();
   target.addIllegalOp<Aten_LogSoftmaxBackwardDataOp>();
diff --git a/lib/Dialect/Torch/Utils/Utils.cpp b/lib/Dialect/Torch/Utils/Utils.cpp
@@ -327,7 +327,7 @@ bool Torch::isViewLikeOp(Operation *op) {
       AtenNarrowTensorOp, AtenToDeviceOp, PrimsSqueezeOp, AtenMovedimIntOp,
       PrimsViewOfOp, AtenRealOp, AtenImagOp, PrimsSplitDimOp,
       AtenViewAsComplexOp, AtenViewAsRealOp, AtenPixelShuffleOp,
-      AtenChannelShuffleOp, AtenDiagonalOp, AtenUnfoldOp>(op);
+      AtenPixelUnshuffleOp, AtenChannelShuffleOp, AtenDiagonalOp, AtenUnfoldOp>(op);
 }
 
 Value Torch::getConstantWithGivenDtypeAndValue(PatternRewriter &rewriter,
diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py
@@ -819,6 +819,12 @@
     "PrimsSqueezeModule_basic",
     "PrimsViewOfModule_basic",
     "PrimsViewOfZeroRankModule_basic",
+    "PixelUnshuffleModuleFullDynamic_basic",
+    "PixelUnshuffleModuleSpatiallyDynamic_basic",
+    "PixelUnshuffleModuleSpatiallyStatic_basic",
+    "PixelUnshuffleModuleStaticRank3Int64_basic",
+    "PixelUnshuffleModuleStaticRank4Float32_basic",
+    "PixelUnshuffleModuleStaticRank5Float32_basic",
     "QuantizedBatchedInputSingleLayer_basic",
     "QuantizedMLP_basic",
     "QuantizedNoLayer_basic",
@@ -3127,6 +3133,11 @@
     "PixelShuffleModuleSpatiallyDynamic_basic",
     "PixelShuffleModuleSpatiallyStatic_basic",
     "PixelShuffleModuleStaticRank3Int64_basic",
+    "PixelUnshuffleModuleStaticRank5Float32_basic",
+    "PixelUnshuffleModuleStaticRank3Int64_basic",
+    "PixelUnshuffleModuleFullDynamic_basic",
+    "PixelUnshuffleModuleSpatiallyDynamic_basic",
+    "PixelUnshuffleModuleSpatiallyStatic_basic",
     "ChannelShuffleBasic_basic",
     "ChannelShuffleUnitaryGroup_basic",
     "ChannelShuffle1D_basic",
@@ -4738,6 +4749,11 @@
     "PixelShuffleModuleSpatiallyStatic_basic",
     "PixelShuffleModuleStaticRank3Int64_basic",
     "PixelShuffleModuleStaticRank4Float32_basic",
+    "PixelUnshuffleModuleStaticRank5Float32_basic",
+    "PixelUnshuffleModuleStaticRank3Int64_basic",
+    "PixelUnshuffleModuleFullDynamic_basic",
+    "PixelUnshuffleModuleSpatiallyDynamic_basic",
+    "PixelUnshuffleModuleSpatiallyStatic_basic",
     "ChannelShuffleBasic_basic",
     "ChannelShuffleUnitaryGroup_basic",
     "ChannelShuffle1D_basic",
diff --git a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/abstract_interp_lib_gen.py
@@ -843,6 +843,21 @@ def aten〇channel_shuffle〡shape(self: List[int], groups: int) -> List[int]:
     assert len(self) >= 3, "input must be at least rank-3 in channel_shuffle"
     return self
 
+def aten〇pixel_unshuffle〡shape(self: List[int], downscale_factor: int) -> List[int]:
+
+    assert len(self) >= 3, "input must be at least rank-3 in pixel_unshuffle"
+    downscale_factor_squared = downscale_factor * downscale_factor
+    assert self[-2] % (downscale_factor) == 0, "height  must be divisible by downscale_factor in pixel_unshuffle"
+    assert self[-1] % (downscale_factor) == 0, "width  must be divisible by downscale_factor in pixel_unshuffle"
+
+    out = self[0:-3]
+    out.append(self[-3] * downscale_factor_squared)
+    out.append(self[-2] // downscale_factor)
+    out.append(self[-1] // downscale_factor)
+    return out
+
+
+
 def aten〇permute〡shape(self: List[int], dims: List[int]) -> List[int]:
     return upstream_shape_functions.permute(self, dims)
 
@@ -3069,6 +3084,11 @@ def aten〇pixel_shuffle〡dtype(self_rank_dtype: Tuple[int, int], upscale_facto
     self_rank, self_dtype = self_rank_dtype
     return self_dtype
 
+@check_dtype_function(_check_tensors_with_the_same_dtype(tensor_shapes=[(1, 2, 2)], downscale_factor = 2))
+def aten〇pixel_unshuffle〡dtype(self_rank_dtype: Tuple[int, int], downscale_factor: int) -> int:
+    self_rank, self_dtype = self_rank_dtype
+    return self_dtype
+
 @check_dtype_function(_check_tensors_with_the_same_dtype(tensor_shapes=[(1, 4, 4, 5)], groups = 2))
 def aten〇channel_shuffle〡dtype(self_rank_dtype: Tuple[int, int], groups: int) -> int:
     self_rank, self_dtype = self_rank_dtype
diff --git a/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py b/projects/pt1/python/torch_mlir/jit_ir_importer/build_tools/torch_ods_gen.py
@@ -719,6 +719,7 @@ def emit_with_mutating_variants(key, **kwargs):
     emit("aten::topk : (Tensor, int, int, bool, bool) -> (Tensor, Tensor)")
     emit("aten::transpose.int : (Tensor, int, int) -> (Tensor)", has_folder=True)
     emit("aten::pixel_shuffle : (Tensor, int) -> (Tensor)")
+    emit("aten::pixel_unshuffle : (Tensor, int) -> (Tensor)")
     emit("aten::channel_shuffle : (Tensor, int) -> (Tensor)")
     emit("aten::permute : (Tensor, int[]) -> (Tensor)", has_verifier=True)
     emit("aten::movedim.int : (Tensor, int, int) -> (Tensor)")
diff --git a/projects/pt1/python/torch_mlir_e2e_test/test_suite/basic.py b/projects/pt1/python/torch_mlir_e2e_test/test_suite/basic.py
diff --git a/test/Dialect/Torch/decompose-complex-ops.mlir b/test/Dialect/Torch/decompose-complex-ops.mlir

Original file line number	Diff line number	Diff line change
`@@ -327,7 +327,7 @@ bool Torch::isViewLikeOp(Operation *op) {`
`327`	`327`	`AtenNarrowTensorOp, AtenToDeviceOp, PrimsSqueezeOp, AtenMovedimIntOp,`
`328`	`328`	`PrimsViewOfOp, AtenRealOp, AtenImagOp, PrimsSplitDimOp,`
`329`	`329`	`AtenViewAsComplexOp, AtenViewAsRealOp, AtenPixelShuffleOp,`
`330`		`- AtenChannelShuffleOp, AtenDiagonalOp, AtenUnfoldOp>(op);`
	`330`	`+ AtenPixelUnshuffleOp, AtenChannelShuffleOp, AtenDiagonalOp, AtenUnfoldOp>(op);`
`331`	`331`	`}`
`332`	`332`
`333`	`333`	`Value Torch::getConstantWithGivenDtypeAndValue(PatternRewriter &rewriter,`