intel
diff --git a/‎include/triton/Dialect/Triton/IR/TritonTypes.td‎
Lines changed: 2 additions & 2 deletions b/‎include/triton/Dialect/Triton/IR/TritonTypes.td‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎lib/Analysis/Utility.cpp‎
Lines changed: 1 addition & 2 deletions b/‎lib/Analysis/Utility.cpp‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎lib/Conversion/TritonToTritonGPU/RelayoutTritonGPU.cpp‎
Lines changed: 1 addition & 2 deletions b/‎lib/Conversion/TritonToTritonGPU/RelayoutTritonGPU.cpp‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎lib/Conversion/TritonToTritonGPU/TritonGPUConversion.cpp‎
Lines changed: 2 additions & 3 deletions b/‎lib/Conversion/TritonToTritonGPU/TritonGPUConversion.cpp‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp‎
Lines changed: 7 additions & 14 deletions b/‎lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp‎
Lines changed: 7 additions & 14 deletions
diff --git a/‎lib/Dialect/Triton/IR/Ops.cpp‎
Lines changed: 1 addition & 3 deletions b/‎lib/Dialect/Triton/IR/Ops.cpp‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎lib/Dialect/Triton/IR/Types.cpp‎
Lines changed: 4 additions & 8 deletions b/‎lib/Dialect/Triton/IR/Types.cpp‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎lib/Dialect/Triton/Transforms/ReorderBroadcast.cpp‎
Lines changed: 2 additions & 4 deletions b/‎lib/Dialect/Triton/Transforms/ReorderBroadcast.cpp‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎lib/Dialect/TritonGPU/IR/Ops.cpp‎
Lines changed: 1 addition & 2 deletions b/‎lib/Dialect/TritonGPU/IR/Ops.cpp‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp‎
Lines changed: 10 additions & 19 deletions b/‎lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp‎
Lines changed: 10 additions & 19 deletions
@@ -108,7 +108,7 @@ def TT_TensorDescType : TritonTypeDef<"TensorDesc", "tensordesc", []> {
       if (auto intTy = llvm::dyn_cast<IntegerType>(blockType.getElementType())) {
         auto sem = isSigned ? IntegerType::Signed : IntegerType::Unsigned;
         auto elemTy = IntegerType::get($_ctxt, intTy.getWidth(), sem);
-        blockType = RankedTensorType::get(blockType.getShape(), elemTy);
+        blockType = blockType.clone(elemTy);
       }
       return Base::get($_ctxt, blockType);
     }]>,
@@ -119,7 +119,7 @@ def TT_TensorDescType : TritonTypeDef<"TensorDesc", "tensordesc", []> {
       if (auto intTy = llvm::dyn_cast<IntegerType>(resTy.getElementType())) {
         auto width = resTy.getElementTypeBitWidth();
         auto signlessTy = IntegerType::get(getContext(), width);
-        resTy = RankedTensorType::get(resTy.getShape(), signlessTy);
+        resTy = resTy.clone(signlessTy);
       }
       return resTy;
     }
 
@@ -724,8 +724,7 @@ bool matchMmaV3AndDotOperandLayout(RankedTensorType srcTy,
     return false;
   }
   int elementTypeSize = srcTy.getElementType().getIntOrFloatBitWidth();
-  auto parentTy = RankedTensorType::get(
-      srcTy.getShape(), srcTy.getElementType(), dotOperandLayout.getParent());
+  auto parentTy = srcTy.cloneWithEncoding(dotOperandLayout.getParent());
   auto ans = mmaLayout.getVersionMajor() == 3 &&
              dotOperandLayout.getOpIdx() == 0 &&
              mmaLayout.getWarpsPerCTA()[1] == 1 &&
 
@@ -31,8 +31,7 @@ RankedTensorType getTMEMTensorLayout(const TypeConverter *tc,
     encoding = ttng::getTmemCompatibleLayout(
         tmemEnc.getBlockM(), tmemEnc.getBlockN(), type, numWarps);
   }
-  return RankedTensorType::get(type.getShape(), type.getElementType(),
-                               encoding);
+  return type.cloneWithEncoding(encoding);
 }
 
 struct TMEMLoadOpPattern : public OpConversionPattern<ttng::TMEMLoadOp> {
 
@@ -34,7 +34,7 @@ TritonGPUTypeConverter::TritonGPUTypeConverter(MLIRContext *context,
     triton::gpu::BlockedEncodingAttr encoding =
         getDefaultBlockedEncoding(this->context, shape, this->numWarps,
                                   this->threadsPerWarp, this->numCTAs);
-    return RankedTensorType::get(shape, tensorType.getElementType(), encoding);
+    return tensorType.cloneWithEncoding(encoding);
   });
 
   // Add encoding for tensor pointer
@@ -150,8 +150,7 @@ static RankedTensorType getNewIndicesType(RankedTensorType type,
   if (enc == newEncoding)
     return {};
 
-  return RankedTensorType::get(type.getShape(), type.getElementType(),
-                               newEncoding);
+  return type.cloneWithEncoding(newEncoding);
 }
 
 // Function for converting any gather or scatter op that requires a specific
 
@@ -172,8 +172,7 @@ struct TritonExpandDimsPattern
     // convert operand to slice of return type
     Attribute newArgEncoding = triton::gpu::SliceEncodingAttr::get(
         getContext(), op.getAxis(), retEncoding);
-    RankedTensorType newArgType = RankedTensorType::get(
-        argType.getShape(), argType.getElementType(), newArgEncoding);
+    RankedTensorType newArgType = argType.cloneWithEncoding(newArgEncoding);
     // construct new op
     auto newSrc = rewriter.create<triton::gpu::ConvertLayoutOp>(
         op.getLoc(), newArgType, adaptor.getSrc());
@@ -238,8 +237,7 @@ struct TritonDotPattern : public OpConversionPattern<triton::DotOp> {
     Attribute dEncoding = triton::gpu::BlockedEncodingAttr::get(
         getContext(), origShape, retSizePerThread, retOrder, numWarps,
         threadsPerWarp, numCTAs);
-    RankedTensorType retType =
-        RankedTensorType::get(origShape, origType.getElementType(), dEncoding);
+    RankedTensorType retType = origType.cloneWithEncoding(dEncoding);
     // a & b must be of smem layout
     auto aType = cast<RankedTensorType>(adaptor.getA().getType());
     auto bType = cast<RankedTensorType>(adaptor.getB().getType());
@@ -255,15 +253,13 @@ struct TritonDotPattern : public OpConversionPattern<triton::DotOp> {
     if (!mlir::isa<triton::gpu::DotOperandEncodingAttr>(aEncoding)) {
       Attribute encoding = triton::gpu::DotOperandEncodingAttr::get(
           getContext(), 0, dEncoding, aEltType);
-      auto dstType =
-          RankedTensorType::get(aType.getShape(), aEltType, encoding);
+      auto dstType = aType.cloneWithEncoding(encoding);
       a = rewriter.create<triton::gpu::ConvertLayoutOp>(a.getLoc(), dstType, a);
     }
     if (!mlir::isa<triton::gpu::DotOperandEncodingAttr>(bEncoding)) {
       Attribute encoding = triton::gpu::DotOperandEncodingAttr::get(
           getContext(), 1, dEncoding, bEltType);
-      auto dstType =
-          RankedTensorType::get(bType.getShape(), bEltType, encoding);
+      auto dstType = bType.cloneWithEncoding(encoding);
       b = rewriter.create<triton::gpu::ConvertLayoutOp>(b.getLoc(), dstType, b);
     }
     c = rewriter.create<triton::gpu::ConvertLayoutOp>(c.getLoc(), retType, c);
@@ -313,8 +309,7 @@ struct TritonCatPattern : public OpConversionPattern<triton::CatOp> {
         triton::gpu::BlockedEncodingAttr::get(
             getContext(), newRetSizePerThread, retThreadsPerWarp,
             retWarpsPerCTA, retOrder, retEncoding.getCTALayout());
-    auto newRetType = RankedTensorType::get(retShape, retType.getElementType(),
-                                            newRetEncoding);
+    auto newRetType = retType.cloneWithEncoding(newRetEncoding);
     addNamedAttrs(rewriter.replaceOpWithNewOp<triton::CatOp>(
                       op, newRetType, adaptor.getOperands()),
                   adaptor.getAttributes());
@@ -387,8 +382,7 @@ struct TritonSplitOpPattern : public OpConversionPattern<triton::SplitOp> {
                              append(defaultEnc.getCTAsPerCGA(), 1),
                              append(defaultEnc.getCTASplitNum(), 1),
                              prepend(defaultEnc.getCTAOrder(), rank - 1)));
-      srcTy = RankedTensorType::get(srcTy.getShape(), srcTy.getElementType(),
-                                    srcEnc);
+      srcTy = srcTy.cloneWithEncoding(srcEnc);
       src = rewriter.create<ConvertLayoutOp>(op.getLoc(), srcTy, src);
     }
 
@@ -427,8 +421,7 @@ struct TritonBroadcastPattern
     auto srcEncoding = srcType.getEncoding();
     if (!srcEncoding)
       return failure();
-    Type retType = RankedTensorType::get(
-        op.getType().getShape(), op.getType().getElementType(), srcEncoding);
+    Type retType = op.getType().cloneWithEncoding(srcEncoding);
     // Type retType = this->getTypeConverter()->convertType(op.getType());
     addNamedAttrs(rewriter.replaceOpWithNewOp<triton::BroadcastOp>(
                       op, retType, adaptor.getOperands()),
 
@@ -1244,9 +1244,7 @@ LogicalResult GatherOp::inferReturnTypes(
   auto srcType = cast<RankedTensorType>(adaptor.getSrc().getType());
 
   // Shape and encoding of the indices with the element type of the src.
-  inferredReturnTypes.push_back(
-      RankedTensorType::get(indicesType.getShape(), srcType.getElementType(),
-                            indicesType.getEncoding()));
+  inferredReturnTypes.push_back(indicesType.clone(srcType.getElementType()));
   return success();
 }
 
 
@@ -64,18 +64,16 @@ unsigned getPointeeBitWidth(Type type) {
 Type getI1SameShape(Type type) {
   auto i1Type = IntegerType::get(type.getContext(), 1);
   if (auto tensorTy = dyn_cast<RankedTensorType>(type))
-    return RankedTensorType::get(tensorTy.getShape(), i1Type,
-                                 tensorTy.getEncoding());
+    return tensorTy.clone(i1Type);
   return i1Type;
 }
 
 Type getPointeeType(Type type) {
   if (auto tensorTy = dyn_cast<RankedTensorType>(type)) {
     // Tensor of pointers
-    auto shape = tensorTy.getShape();
     auto ptrType = dyn_cast<PointerType>(tensorTy.getElementType());
     Type pointeeType = ptrType.getPointeeType();
-    return RankedTensorType::get(shape, pointeeType, tensorTy.getEncoding());
+    return tensorTy.clone(pointeeType);
   } else if (auto ptrType = dyn_cast<PointerType>(type)) {
     // scalar pointer
     Type pointeeType = ptrType.getPointeeType();
@@ -87,17 +85,15 @@ Type getPointeeType(Type type) {
 Type getI32SameShape(Type type) {
   auto i32Type = IntegerType::get(type.getContext(), 32);
   if (auto tensorTy = dyn_cast<RankedTensorType>(type))
-    return RankedTensorType::get(tensorTy.getShape(), i32Type,
-                                 tensorTy.getEncoding());
+    return tensorTy.clone(i32Type);
   return i32Type;
 }
 
 Type getPointerTypeSameShape(Type type) {
   if (auto tensorTy = dyn_cast<RankedTensorType>(type)) {
     Type elementType = tensorTy.getElementType();
-    auto shape = tensorTy.getShape();
     PointerType ptrType = PointerType::get(elementType, 1);
-    return RankedTensorType::get(shape, ptrType, tensorTy.getEncoding());
+    return tensorTy.clone(ptrType);
   } else {
     return PointerType::get(type, 1);
   }
 
@@ -155,7 +155,6 @@ struct MoveBroadcastAfterElementwisePattern
 
     auto srcTy = broadcastOp.getSrc().getType();
     auto bcSrcShape = srcTy.getShape();
-    auto srcEncoding = srcTy.getEncoding();
 
     // Reshape operands to match srcShape
     llvm::SmallVector<Value, 4> newOperands;
@@ -167,7 +166,7 @@ struct MoveBroadcastAfterElementwisePattern
       }
       auto elemTy =
           dyn_cast<RankedTensorType>(operand.getType()).getElementType();
-      auto newTy = RankedTensorType::get(bcSrcShape, elemTy, srcEncoding);
+      auto newTy = srcTy.clone(bcSrcShape, elemTy);
       if (auto splatOp = llvm::dyn_cast<SplatOp>(definingOp)) {
         auto newSplat = rewriter.create<SplatOp>(loc, newTy, splatOp.getSrc());
         newOperands.push_back(newSplat);
@@ -191,8 +190,7 @@ struct MoveBroadcastAfterElementwisePattern
     auto resultTypes = op->getResultTypes();
     for (auto resultTy : resultTypes) {
       auto elemTy = dyn_cast<RankedTensorType>(resultTy).getElementType();
-      newResultTypes.push_back(
-          RankedTensorType::get(bcSrcShape, elemTy, srcEncoding));
+      newResultTypes.push_back(srcTy.clone(bcSrcShape, elemTy));
     }
 
     // Create new op and broadcast results
 
@@ -761,8 +761,7 @@ int32_t LocalAllocOp::getAlignmentOrDefault() {
 
 static Type removeEncodingIfTensor(Type type) {
   if (auto tensorType = dyn_cast<RankedTensorType>(type)) {
-    return RankedTensorType::get(tensorType.getShape(),
-                                 tensorType.getElementType());
+    return tensorType.cloneWithEncoding({});
   }
   return type;
 }
 
@@ -355,8 +355,7 @@ class BlockedToMMA : public mlir::OpRewritePattern<DotOp> {
     auto mmaEnc = NvidiaMmaEncodingAttr::get(
         oldRetType.getContext(), versionMajor, versionMinor, warpsPerTile,
         CTALayout, instrShape);
-    auto newRetType = RankedTensorType::get(
-        oldRetType.getShape(), oldRetType.getElementType(), mmaEnc);
+    auto newRetType = oldRetType.cloneWithEncoding(mmaEnc);
     // convert accumulator
     auto oldAcc = dotOp.getOperand(2);
     auto newAcc =
@@ -368,8 +367,7 @@ class BlockedToMMA : public mlir::OpRewritePattern<DotOp> {
       auto vType = cast<RankedTensorType>(v.getType());
       auto newVEncoding = DotOperandEncodingAttr::get(
           v.getContext(), opIdx, newRetType.getEncoding(), minType);
-      auto newVType = RankedTensorType::get(
-          vType.getShape(), vType.getElementType(), newVEncoding);
+      auto newVType = vType.cloneWithEncoding(newVEncoding);
       return rewriter.create<ConvertLayoutOp>(v.getLoc(), newVType, v);
     };
 
@@ -476,14 +474,11 @@ static Value splitBOperand(Value b, mlir::PatternRewriter &rewriter) {
     if (!tensorType)
       continue;
     Value newOperand = rewriter.create<ConvertLayoutOp>(
-        operand.get().getLoc(),
-        RankedTensorType::get(tensorType.getShape(),
-                              tensorType.getElementType(), newLayout),
+        operand.get().getLoc(), tensorType.cloneWithEncoding(newLayout),
         operand.get());
     loadOp->setOperand(operand.getOperandNumber(), newOperand);
   }
-  loadOp->getResult(0).setType(RankedTensorType::get(
-      bType.getShape(), bType.getElementType(), newLayout));
+  loadOp->getResult(0).setType(bType.cloneWithEncoding(newLayout));
   Value newB = loadOp->getResult(0);
   rewriter.setInsertionPointAfter(loadOp);
   auto cvt = rewriter.create<ConvertLayoutOp>(b.getLoc(), bType, newB);
@@ -549,9 +544,7 @@ class BlockedToMMAv5 : public mlir::OpRewritePattern<DotOp> {
         /*mutableMemory=*/true);
     Attribute newDistributedEncoding = nvidia_gpu::getTmemCompatibleLayout(
         instrShape[0], instrShape[1], oldRetType, numWarps);
-    auto newAccType = RankedTensorType::get(oldRetType.getShape(),
-                                            oldRetType.getElementType(),
-                                            newDistributedEncoding);
+    auto newAccType = oldRetType.cloneWithEncoding(newDistributedEncoding);
     Value cvtAcc =
         rewriter.create<ConvertLayoutOp>(loc, newAccType, dotOp.getOperand(2));
     auto tokType = rewriter.getType<AsyncTokenType>();
@@ -704,9 +697,7 @@ class ScaledBlockedToMMAv5
         /*mutableMemory=*/true);
     Attribute newDistributedEncoding =
         nvidia_gpu::getTmemCompatibleLayout(m, n, oldRetType, numWarps);
-    auto newAccType = RankedTensorType::get(oldRetType.getShape(),
-                                            oldRetType.getElementType(),
-                                            newDistributedEncoding);
+    auto newAccType = oldRetType.cloneWithEncoding(newDistributedEncoding);
     Value cvtAcc =
         rewriter.create<ConvertLayoutOp>(loc, newAccType, dotOp.getOperand(2));
     auto tokType = rewriter.getType<AsyncTokenType>();
@@ -729,10 +720,10 @@ class ScaledBlockedToMMAv5
         /*mutableMemory=*/false);
     Attribute scaleALayout = getTmemScales(oldScaleAType, numWarps);
     Attribute scaleBLayout = getTmemScales(oldScaleBType, numWarps);
-    RankedTensorType newScaleAType = RankedTensorType::get(
-        oldScaleAType.getShape(), oldScaleAType.getElementType(), scaleALayout);
-    RankedTensorType newScaleBType = RankedTensorType::get(
-        oldScaleBType.getShape(), oldScaleBType.getElementType(), scaleBLayout);
+    RankedTensorType newScaleAType =
+        oldScaleAType.cloneWithEncoding(scaleALayout);
+    RankedTensorType newScaleBType =
+        oldScaleBType.cloneWithEncoding(scaleBLayout);
 
     auto lhsScale = addSmemStageToScaleLoad(dotOp.getAScale(), rewriter);
     auto rhsScale = addSmemStageToScaleLoad(dotOp.getBScale(), rewriter);
Original file line number	Diff line number	Diff line change
`@@ -108,7 +108,7 @@ def TT_TensorDescType : TritonTypeDef<"TensorDesc", "tensordesc", []> {`
`108`	`108`	`if (auto intTy = llvm::dyn_cast<IntegerType>(blockType.getElementType())) {`
`109`	`109`	`auto sem = isSigned ? IntegerType::Signed : IntegerType::Unsigned;`
`110`	`110`	`auto elemTy = IntegerType::get($_ctxt, intTy.getWidth(), sem);`
`111`		`- blockType = RankedTensorType::get(blockType.getShape(), elemTy);`
	`111`	`+ blockType = blockType.clone(elemTy);`
`112`	`112`	`}`
`113`	`113`	`return Base::get($_ctxt, blockType);`
`114`	`114`	`}]>,`
`@@ -119,7 +119,7 @@ def TT_TensorDescType : TritonTypeDef<"TensorDesc", "tensordesc", []> {`
`119`	`119`	`if (auto intTy = llvm::dyn_cast<IntegerType>(resTy.getElementType())) {`
`120`	`120`	`auto width = resTy.getElementTypeBitWidth();`
`121`	`121`	`auto signlessTy = IntegerType::get(getContext(), width);`
`122`		`- resTy = RankedTensorType::get(resTy.getShape(), signlessTy);`
	`122`	`+ resTy = resTy.clone(signlessTy);`
`123`	`123`	`}`
`124`	`124`	`return resTy;`
`125`	`125`	`}`
Original file line number	Diff line number	Diff line change
`@@ -31,8 +31,7 @@ RankedTensorType getTMEMTensorLayout(const TypeConverter *tc,`
`31`	`31`	`encoding = ttng::getTmemCompatibleLayout(`
`32`	`32`	`tmemEnc.getBlockM(), tmemEnc.getBlockN(), type, numWarps);`
`33`	`33`	`}`
`34`		`- return RankedTensorType::get(type.getShape(), type.getElementType(),`
`35`		`- encoding);`
	`34`	`+ return type.cloneWithEncoding(encoding);`
`36`	`35`	`}`
`37`	`36`
`38`	`37`	`struct TMEMLoadOpPattern : public OpConversionPattern<ttng::TMEMLoadOp> {`
Original file line number	Diff line number	Diff line change
`@@ -1244,9 +1244,7 @@ LogicalResult GatherOp::inferReturnTypes(`
`1244`	`1244`	`auto srcType = cast<RankedTensorType>(adaptor.getSrc().getType());`
`1245`	`1245`
`1246`	`1246`	`// Shape and encoding of the indices with the element type of the src.`
`1247`		`- inferredReturnTypes.push_back(`
`1248`		`- RankedTensorType::get(indicesType.getShape(), srcType.getElementType(),`
`1249`		`- indicesType.getEncoding()));`
	`1247`	`+ inferredReturnTypes.push_back(indicesType.clone(srcType.getElementType()));`
`1250`	`1248`	`return success();`
`1251`	`1249`	`}`
`1252`	`1250`
Original file line number	Diff line number	Diff line change
`@@ -761,8 +761,7 @@ int32_t LocalAllocOp::getAlignmentOrDefault() {`
`761`	`761`
`762`	`762`	`static Type removeEncodingIfTensor(Type type) {`
`763`	`763`	`if (auto tensorType = dyn_cast<RankedTensorType>(type)) {`
`764`		`- return RankedTensorType::get(tensorType.getShape(),`
`765`		`- tensorType.getElementType());`
	`764`	`+ return tensorType.cloneWithEncoding({});`
`766`	`765`	`}`
`767`	`766`	`return type;`
`768`	`767`	`}`