llvm
diff --git a/‎llvm/lib/CodeGen/ModuloSchedule.cpp‎
Lines changed: 1 addition & 1 deletion b/‎llvm/lib/CodeGen/ModuloSchedule.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td‎
Lines changed: 174 additions & 6 deletions b/‎mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td‎
Lines changed: 174 additions & 6 deletions
diff --git a/‎mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc‎
Lines changed: 6 additions & 8 deletions b/‎mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td‎
Lines changed: 2 additions & 9 deletions b/‎mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td‎
Lines changed: 2 additions & 9 deletions
diff --git a/‎mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp‎
Lines changed: 9 additions & 32 deletions b/‎mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp‎
Lines changed: 9 additions & 32 deletions
diff --git a/‎mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp‎
Lines changed: 0 additions & 2 deletions b/‎mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp‎
Lines changed: 0 additions & 2 deletions
@@ -412,7 +412,7 @@ void ModuloScheduleExpander::generateExistingPhis(
                             InitVal, NewReg);
       auto It = VRMap[CurStageNum].find(LoopVal);
       if (It != VRMap[CurStageNum].end()) {
-        llvm::Register Reg = It->second;
+        Register Reg = It->second;
         VRMap[CurStageNum][Def] = Reg;
       }
     }
 
@@ -652,6 +652,20 @@ def ROCDL_DPPUpdateOp : ROCDL_IntrOp<"update.dpp", [], [0],
   }];
 }
 
+def ROCDL_V2I16Type : FixedVectorOfLengthAndType<[2], [I16]>,
+                        BuildableType<"::mlir::VectorType::get("
+                          "{2},$_builder.getI16Type())">;
+
+def ROCDL_V2F16Type : FixedVectorOfLengthAndType<[2], [F16]>,
+                        BuildableType<"::mlir::VectorType::get("
+                          "{2},$_builder.getF16Type())">;
+
+def ROCDL_V2BF16Type : FixedVectorOfLengthAndType<[2], [BF16]>,
+                        BuildableType<"::mlir::VectorType::get("
+                          "{2},$_builder.getBF16Type())">;
+
+// TODO: The word and byte selectors are immarg in LLVM 
+// update to be attributes in MLIR
 //===---------------------------------------------------------------------===//
 // 16-bit float intrinsics
 //===---------------------------------------------------------------------===//
@@ -667,10 +681,168 @@ def ROCDL_CvtPkRtz:
   }];
 }
 
+def ROCDL_CvtScaleF32PkFp8F16 :
+    ROCDL_IntrOp<"cvt.scalef32.pk.fp8.f16", [], [], [Pure], 1>,
+    Arguments<(ins ROCDL_V2I16Type: $old, ROCDL_V2F16Type: $src, F32: $scale, I1:$wordSel)> {
+    let summary = "Scale and convert f16 to packed fp8";
+    let description = [{
+    Scale `src` by the exponent in `scale` then convert to packed fp8.
+    Store the result in low/high word based on $wordSel, preserving the other word.
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF32PkFp8Bf16 :
+    ROCDL_IntrOp<"cvt.scalef32.pk.fp8.bf16", [], [], [Pure], 1>,
+    Arguments<(ins ROCDL_V2I16Type: $old, ROCDL_V2BF16Type: $src, F32: $scale, I1:$wordSel)> {
+    let summary = "Scale and convert packed bf16 to packed fp8";
+    let description = [{
+    Scale `src` by the exponent in `scale` then convert to packed fp8.
+    Store the result in low/high word based on $wordSel, preserving the other word.
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
+  }];
+}
+
+
+def ROCDL_CvtScaleF32PkBf8F16 :
+    ROCDL_IntrOp<"cvt.scalef32.pk.bf8.f16", [], [], [Pure], 1>,
+    Arguments<(ins ROCDL_V2I16Type: $old, ROCDL_V2F16Type: $src, F32: $scale, I1:$wordSel)> {
+    let summary = "Scale and convert f16 to packed bf8";
+    let description = [{
+    Scale `src` by the exponent in `scale` then convert to packed bf8.
+    Store the result in low/high word based on $wordSel, preserving the other word.
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
+  }];
+}
+
+
+def ROCDL_CvtScaleF32PkBf8Bf16 :
+    ROCDL_IntrOp<"cvt.scalef32.pk.bf8.bf16", [], [], [Pure], 1>,
+    Arguments<(ins ROCDL_V2I16Type: $old, ROCDL_V2BF16Type: $src, F32: $scale, I1:$wordSel)> {
+    let summary = "Scale and convert bf16 to packed bf8";
+    let description = [{
+    Scale `src` by the exponent in `scale` then convert to packed bf8.
+    Store the result in low/high word based on $wordSel, preserving the other word.
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF32SrFp8F16 :
+    ROCDL_IntrOp<"cvt.scalef32.sr.fp8.f16", [], [], [Pure], 1>,
+    Arguments<(ins I32:$old, F16:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
+    let summary = "Scale and convert f16 to packed fp8 using stochastic rounding";
+    let description = [{
+    Scale `src` by the exponent in `scale` then convert to packed p8 with stochastic rounding
+    using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
+
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF32SrBf8F16 :
+    ROCDL_IntrOp<"cvt.scalef32.sr.bf8.f16", [], [], [Pure], 1>,
+    Arguments<(ins I32:$old, F16:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
+    let summary = "Scale and convert f16 to packed bf8 using stochastic rounding";
+    let description = [{
+    Scale `src` by the exponent in `scale` then convert to packed bf8 with stochastic rounding
+    using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
+
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF32SrFp8Bf16 :
+    ROCDL_IntrOp<"cvt.scalef32.sr.fp8.bf16", [], [], [Pure], 1>,
+    Arguments<(ins I32:$old, BF16:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
+    let summary = "Scale and convert packed bf16 to packed fp8 using stochastic rounding";
+    let description = [{
+    Scale `src` by the exponent in `scale` then convert to packed fp8 with stochastic rounding
+    using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
+
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF32SrBf8Bf16:
+    ROCDL_IntrOp<"cvt.scalef32.sr.bf8.bf16", [], [], [Pure], 1>,
+    Arguments<(ins I32:$old, BF16:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
+    let summary = "Scale and convert bf16 to packed fp8 using stochastic rounding";
+    let description = [{
+    Scale `src` by the exponent in `scale` then convert to packed p8 with stochastic rounding
+    using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
+
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF32PkF16Fp8 :
+    ROCDL_IntrOp<"cvt.scalef32.pk.f16.fp8", [], [], [Pure], 1>,
+    Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
+    let summary = "Scale and convert fp8 to packed f16";
+    let description = [{ Scale `src` based on $wordSel by the exponent in `scale` 
+    then convert to packed f16.
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF32PkF16Bf8 :
+    ROCDL_IntrOp<"cvt.scalef32.pk.f16.bf8", [], [], [Pure], 1>,
+    Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
+    let summary = "Scale and convert bf8 to packed f16";
+    let description = [{ Scale `src` based on $wordSel by the exponent in `scale`
+    then convert to packed f16.
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF16Fp8 :
+    ROCDL_IntrOp<"cvt.scalef32.f16.fp8", [], [], [Pure], 1>,
+    Arguments<(ins ROCDL_V2F16Type:$old, I32:$src, F32: $scale, I32:$byteSel, I1:$wordSel)> {
+    let summary = "Scale and convert fp8 to f16";
+    let description = [{ Scale `src` based on $wordSel by the exponent in `scale`
+    then convert to f16 store into the `byteSel`th byte of `old`, preserving the others.
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `[` $wordSel `]` `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
+  }];
+}
+
+def ROCDL_CvtScaleF16Bf8 :
+    ROCDL_IntrOp<"cvt.scalef32.f16.bf8", [], [], [Pure], 1>,
+    Arguments<(ins ROCDL_V2F16Type:$old, I32:$src, F32: $scale, I32:$byteSel, I1:$wordSel)> {
+    let summary = "Scale and convert fp8 to f16";
+    let description = [{ Scale `src` based on $wordSel by the exponent in `scale`
+    then convert to f16 store into the `byteSel`th byte of `old`, preserving the others.
+  }];
+  let assemblyFormat = [{
+    attr-dict $src `[` $wordSel `]` `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
+  }];
+}
+
 //===---------------------------------------------------------------------===//
 // 32-bit float intrinsics
 //===---------------------------------------------------------------------===//
-def ROCDL_CvtScalePkF32Fp8 :
+def ROCDL_CvtScale32PkF32Fp8 :
     ROCDL_IntrOp<"cvt.scalef32.pk.f32.fp8", [], [], [Pure], 1>,
     Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
   let summary = "Scale and convert packed fp8 to packed f32";
@@ -682,7 +854,7 @@ def ROCDL_CvtScalePkF32Fp8 :
     attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
   }];
 }
-def ROCDL_CvtScalePkF32Bf8 :
+def ROCDL_CvtScale32PkF32Bf8 :
     ROCDL_IntrOp<"cvt.scalef32.pk.f32.bf8", [], [], [Pure], 1>,
     Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
   let summary = "Scale and convert packed bf8 to packed f32";
@@ -697,10 +869,6 @@ def ROCDL_CvtScalePkF32Bf8 :
 //===---------------------------------------------------------------------===//
 // 8-bit float scale intrinsics
 //===---------------------------------------------------------------------===//
-def ROCDL_V2I16Type : FixedVectorOfLengthAndType<[2], [I16]>,
-                        BuildableType<"::mlir::VectorType::get("
-                          "{2},$_builder.getI16Type())">;
-
 def ROCDL_CvtScaleF32PkFp8F32:
     ROCDL_IntrOp<"cvt.scalef32.pk.fp8.f32", [], [], [Pure], 1>,
     Arguments<(ins ROCDL_V2I16Type:$old, F32:$srcA, F32:$srcB, F32:$scale, I1:$wordSel)> {
 
@@ -35,11 +35,9 @@ profileComplianceMap = {
         {fp16T, fp16T, fp32T, fp32T},
         {fp32T, fp32T, fp32T, fp32T}}}}},
     {"tosa.matmul",
-     {{{Profile::pro_int}, {{i8T, i8T, i8T, i8T, i32T}}},
+     {{{Profile::pro_int}, {{i8T, i8T, i32T}}},
       {{Profile::pro_fp},
-       {{fp16T, fp16T, fp16T, fp16T, fp16T},
-        {fp16T, fp16T, fp16T, fp16T, fp32T},
-        {fp32T, fp32T, fp32T, fp32T, fp32T}}}}},
+       {{fp16T, fp16T, fp16T}, {fp16T, fp16T, fp32T}, {fp32T, fp32T, fp32T}}}}},
     {"tosa.max_pool2d",
      {{{Profile::pro_int}, {{i8T, i8T}}},
       {{Profile::pro_fp}, {{fp16T, fp16T}, {fp32T, fp32T}}}}},
@@ -275,10 +273,10 @@ extensionComplianceMap = {
       {{Extension::int16}, {{i16T, i8T, i48T, i48T}}},
       {{Extension::bf16}, {{bf16T, bf16T, fp32T, fp32T}}}}},
     {"tosa.matmul",
-     {{{Extension::int16}, {{i16T, i16T, i16T, i16T, i48T}}},
-      {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T, fp8e4m3T, fp8e4m3T, fp16T}}},
-      {{Extension::fp8e5m2}, {{fp8e5m2T, fp8e5m2T, fp8e5m2T, fp8e5m2T, fp16T}}},
-      {{Extension::bf16}, {{bf16T, bf16T, bf16T, bf16T, fp32T}}}}},
+     {{{Extension::int16}, {{i16T, i16T, i48T}}},
+      {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T, fp16T}}},
+      {{Extension::fp8e5m2}, {{fp8e5m2T, fp8e5m2T, fp16T}}},
+      {{Extension::bf16}, {{bf16T, bf16T, fp32T}}}}},
     {"tosa.max_pool2d",
      {{{Extension::int16}, {{i16T, i16T}}},
       {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T}}},
 
@@ -311,8 +311,8 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> {
   let arguments = (ins
     Tosa_Tensor3D:$a,
     Tosa_Tensor3D:$b,
-    Tosa_ScalarIntOrFloatTensor:$a_zp,
-    Tosa_ScalarIntOrFloatTensor:$b_zp
+    OptionalAttr<I32Attr>:$a_zp,
+    OptionalAttr<I32Attr>:$b_zp
   );
 
   let results = (outs
@@ -324,13 +324,6 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> {
     Extension<[Tosa_EXT_INT16, Tosa_EXT_FP8E4M3, Tosa_EXT_FP8E5M2, Tosa_EXT_BF16]>,
   ];
 
-  let extraClassDeclaration = [{
-    FailureOr<int64_t> getAZeroPoint();
-    FailureOr<int64_t> getBZeroPoint();
-    LogicalResult verifyAZeroPoint(int64_t zp);
-    LogicalResult verifyBZeroPoint(int64_t zp);
-  }];
-
   let builders = [Tosa_MatMulOpQuantInfoBuilder];
   let hasVerifier = 1;
 }
 
@@ -270,8 +270,8 @@ class ConvConverter : public OpConversionPattern<TosaConvOp> {
       return rewriter.notifyMatchFailure(
           op, "weight zero point cannot be statically determined");
 
-    const int64_t inputZpVal = *maybeIZp;
-    const int64_t weightZpVal = *maybeWZp;
+    int64_t inputZpVal = *maybeIZp;
+    int64_t weightZpVal = *maybeWZp;
 
     if (op.verifyInputZeroPoint(inputZpVal).failed())
       return rewriter.notifyMatchFailure(
@@ -466,8 +466,8 @@ class DepthwiseConvConverter
       return rewriter.notifyMatchFailure(
           op, "weight zero point cannot be statically determined");
 
-    const int64_t inputZpVal = *maybeIZp;
-    const int64_t weightZpVal = *maybeWZp;
+    int64_t inputZpVal = *maybeIZp;
+    int64_t weightZpVal = *maybeWZp;
 
     if (op.verifyInputZeroPoint(inputZpVal).failed())
       return rewriter.notifyMatchFailure(
@@ -621,38 +621,15 @@ class MatMulConverter : public OpConversionPattern<tosa::MatMulOp> {
                            .create<linalg::FillOp>(loc, ValueRange{zero},
                                                    ValueRange{emptyTensor})
                            .result();
-
-    FailureOr<int64_t> maybeAZp = op.getAZeroPoint();
-    FailureOr<int64_t> maybeBZp = op.getBZeroPoint();
-    if (failed(maybeAZp))
-      return rewriter.notifyMatchFailure(
-          op, "input a zero point cannot be statically determined");
-    if (failed(maybeBZp))
-      return rewriter.notifyMatchFailure(
-          op, "input b zero point cannot be statically determined");
-
-    const int64_t aZpVal = *maybeAZp;
-    const int64_t bZpVal = *maybeBZp;
-
-    if (op.verifyAZeroPoint(aZpVal).failed())
-      return rewriter.notifyMatchFailure(
-          op, "input a zero point must be zero for non-int8 integer types");
-
-    if (op.verifyBZeroPoint(bZpVal).failed())
-      return rewriter.notifyMatchFailure(
-          op, "input b zero point must be zero for non-int8 integer types");
-
-    if (aZpVal == 0 && bZpVal == 0) {
+    if (!op.getAZp() && !op.getBZp()) {
       rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
           op, TypeRange{op.getType()},
           ValueRange{adaptor.getA(), adaptor.getB()}, ValueRange{zeroTensor});
       return success();
     }
 
-    auto aZp = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getI32IntegerAttr(aZpVal));
-    auto bZp = rewriter.create<arith::ConstantOp>(
-        loc, rewriter.getI32IntegerAttr(bZpVal));
+    auto aZp = rewriter.create<arith::ConstantOp>(loc, op.getAZpAttr());
+    auto bZp = rewriter.create<arith::ConstantOp>(loc, op.getBZpAttr());
     rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
         op, TypeRange{op.getType()},
         ValueRange{adaptor.getA(), adaptor.getB(), aZp, bZp}, zeroTensor);
@@ -857,8 +834,8 @@ class AvgPool2dConverter : public OpRewritePattern<tosa::AvgPool2dOp> {
       return rewriter.notifyMatchFailure(
           op, "output zero point could not be statically determined");
 
-    const int64_t inputZpVal = *maybeIZp;
-    const int64_t outputZpVal = *maybeOZp;
+    int64_t inputZpVal = *maybeIZp;
+    int64_t outputZpVal = *maybeOZp;
 
     // Apply padding as necessary.
     llvm::SmallVector<int64_t> pad;
 
@@ -55,8 +55,6 @@ struct MatMulOpSharding
     SmallVector<AffineMap> maps;
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 1, 3}, ctx));
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 3, 2}, ctx));
-    maps.push_back(AffineMap::get(0, 0, {}, ctx));
-    maps.push_back(AffineMap::get(0, 0, {}, ctx));
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 1, 2}, ctx));
     return maps;
   }
Original file line number	Diff line number	Diff line change
`@@ -412,7 +412,7 @@ void ModuloScheduleExpander::generateExistingPhis(`
`412`	`412`	`InitVal, NewReg);`
`413`	`413`	`auto It = VRMap[CurStageNum].find(LoopVal);`
`414`	`414`	`if (It != VRMap[CurStageNum].end()) {`
`415`		`- llvm::Register Reg = It->second;`
	`415`	`+ Register Reg = It->second;`
`416`	`416`	`VRMap[CurStageNum][Def] = Reg;`
`417`	`417`	`}`
`418`	`418`	`}`
Original file line number	Diff line number	Diff line change
`@@ -55,8 +55,6 @@ struct MatMulOpSharding`
`55`	`55`	`SmallVector<AffineMap> maps;`
`56`	`56`	`maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 1, 3}, ctx));`
`57`	`57`	`maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 3, 2}, ctx));`
`58`		`- maps.push_back(AffineMap::get(0, 0, {}, ctx));`
`59`		`- maps.push_back(AffineMap::get(0, 0, {}, ctx));`
`60`	`58`	`maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 1, 2}, ctx));`
`61`	`59`	`return maps;`
`62`	`60`	`}`