[mlir][amdgpu] Add scaled_ext_packed{8,16} operations #159830

amd-eochoalo · 2025-09-19T18:36:05Z

No description provided.

llvmbot · 2025-09-19T18:49:48Z

@llvm/pr-subscribers-mlir-core
@llvm/pr-subscribers-mlir-amdgpu
@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-backend-amdgpu

Author: Erick Ochoa Lopez (amd-eochoalo)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/159830.diff

2 Files Affected:

(modified) mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td (+42)
(modified) mlir/test/Dialect/AMDGPU/ops.mlir (+55)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index a24a918357f2d..d5ea737e229ff 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -112,6 +112,48 @@ def AMDGPU_ExtPackedFp8Op :
   }];
 }
 
+def AMDGPU_ScaledExtPacked8Op
+    : AMDGPU_Op<"scaled_ext_packed8", [Pure]>,
+      Arguments<(
+          ins VectorOfLengthAndType<[8], [F4E2M1FN,F8E4M3FN,F8E5M2]>:$source,
+          F32:$scale,
+          ConfinedAttr<I32Attr, [IntNonNegative, IntMaxValue<7>]>:$index)>,
+      Results<(
+          outs AnyTypeOf<[FixedVectorOfLengthAndType<[8], [F32]>,
+                          FixedVectorOfLengthAndType<[8], [F16]>,
+                          FixedVectorOfLengthAndType<[8], [BF16]>]>:$res)> {
+  let summary = "Extend a vector of packed floating point values";
+
+  let description = [{
+    Extend and scale eight packed floats in to eight floats and return them.
+  }];
+
+  let assemblyFormat = [{
+    attr-dict $source `,` $scale `[` $index `]` `:` type($source) `to` type($res)
+  }];
+}
+
+def AMDGPU_ScaledExtPacked16Op
+    : AMDGPU_Op<"scaled_ext_packed16", [Pure]>,
+      Arguments<(
+          ins VectorOfLengthAndType<[16], [F6E2M3FN, F6E3M2FN]>:$source,
+          F32:$scale,
+          ConfinedAttr<I32Attr, [IntNonNegative, IntMaxValue<7>]>:$index)>,
+      Results<(
+          outs AnyTypeOf<[FixedVectorOfLengthAndType<[16], [F32]>,
+                          FixedVectorOfLengthAndType<[16], [F16]>,
+                          FixedVectorOfLengthAndType<[16], [BF16]>]>:$res)> {
+  let summary = "Extend a vector of packed floating point values";
+
+  let description = [{
+    Extend and scale 16 packed floats to 16 floats and return them.
+  }];
+
+  let assemblyFormat = [{
+    attr-dict $source `,` $scale `[` $index `]` `:` type($source) `to` type($res)
+  }];
+}
+
 def AMDGPU_ScaledExtPackedOp
     : AMDGPU_Op<"scaled_ext_packed", [Pure]>,
       Arguments<(
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 369e0fff538e1..1841c0815b435 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -221,6 +221,61 @@ func.func @scaled_ext_scalar_f4e2m1_bf16(%v: vector<2xf4E2M1FN>, %scale: f32) ->
   func.return %ret : vector<2xbf16>
 }
 
+// CHECK-LABEL: func.func @scaled_ext_packed8_fp4
+func.func @scaled_ext_packed8_fp4(%v: vector<8xf4E2M1FN>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret0 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret1 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xbf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret2 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xf32>
+  func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed8_fp8
+func.func @scaled_ext_packed8_fp8(%v: vector<8xf8E4M3FN>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret0 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret1 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xbf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret2 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xf32>
+  func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed8_bf8
+func.func @scaled_ext_packed8_bf8(%v: vector<8xf8E5M2>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret0 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E5M2> to vector<8xf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret1 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E5M2> to vector<8xbf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret2 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E5M2> to vector<8xf32>
+  func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed16_fp6
+func.func @scaled_ext_packed16_fp6(%v: vector<16xf6E2M3FN>, %scale: f32) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret0 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xf16>
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret1 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xbf16>
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret2 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xf32>
+  func.return %ret0, %ret1, %ret2 : vector<16xf16>, vector<16xbf16>, vector<16xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed16_bf16
+func.func @scaled_ext_packed16_bf16(%v: vector<16xf6E3M2FN>, %scale: f32) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret0 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xf16>
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret1 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xbf16>
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret2 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xf32>
+  func.return %ret0, %ret1, %ret2 : vector<16xf16>, vector<16xbf16>, vector<16xf32>
+}
+
 // CHECK-LABEL: func.func @packed_scaled_trunc_f8e4m3_f32
 // CHECK: amdgpu.packed_scaled_trunc
 func.func @packed_scaled_trunc_f8e4m3_f32(%v: vector<2xf32>, %scale: f32) -> vector<4xf8E4M3FN> {

llvmbot · 2025-09-19T18:49:49Z

@llvm/pr-subscribers-mlir

Author: Erick Ochoa Lopez (amd-eochoalo)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/159830.diff

2 Files Affected:

(modified) mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td (+42)
(modified) mlir/test/Dialect/AMDGPU/ops.mlir (+55)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index a24a918357f2d..d5ea737e229ff 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -112,6 +112,48 @@ def AMDGPU_ExtPackedFp8Op :
   }];
 }
 
+def AMDGPU_ScaledExtPacked8Op
+    : AMDGPU_Op<"scaled_ext_packed8", [Pure]>,
+      Arguments<(
+          ins VectorOfLengthAndType<[8], [F4E2M1FN,F8E4M3FN,F8E5M2]>:$source,
+          F32:$scale,
+          ConfinedAttr<I32Attr, [IntNonNegative, IntMaxValue<7>]>:$index)>,
+      Results<(
+          outs AnyTypeOf<[FixedVectorOfLengthAndType<[8], [F32]>,
+                          FixedVectorOfLengthAndType<[8], [F16]>,
+                          FixedVectorOfLengthAndType<[8], [BF16]>]>:$res)> {
+  let summary = "Extend a vector of packed floating point values";
+
+  let description = [{
+    Extend and scale eight packed floats in to eight floats and return them.
+  }];
+
+  let assemblyFormat = [{
+    attr-dict $source `,` $scale `[` $index `]` `:` type($source) `to` type($res)
+  }];
+}
+
+def AMDGPU_ScaledExtPacked16Op
+    : AMDGPU_Op<"scaled_ext_packed16", [Pure]>,
+      Arguments<(
+          ins VectorOfLengthAndType<[16], [F6E2M3FN, F6E3M2FN]>:$source,
+          F32:$scale,
+          ConfinedAttr<I32Attr, [IntNonNegative, IntMaxValue<7>]>:$index)>,
+      Results<(
+          outs AnyTypeOf<[FixedVectorOfLengthAndType<[16], [F32]>,
+                          FixedVectorOfLengthAndType<[16], [F16]>,
+                          FixedVectorOfLengthAndType<[16], [BF16]>]>:$res)> {
+  let summary = "Extend a vector of packed floating point values";
+
+  let description = [{
+    Extend and scale 16 packed floats to 16 floats and return them.
+  }];
+
+  let assemblyFormat = [{
+    attr-dict $source `,` $scale `[` $index `]` `:` type($source) `to` type($res)
+  }];
+}
+
 def AMDGPU_ScaledExtPackedOp
     : AMDGPU_Op<"scaled_ext_packed", [Pure]>,
       Arguments<(
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 369e0fff538e1..1841c0815b435 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -221,6 +221,61 @@ func.func @scaled_ext_scalar_f4e2m1_bf16(%v: vector<2xf4E2M1FN>, %scale: f32) ->
   func.return %ret : vector<2xbf16>
 }
 
+// CHECK-LABEL: func.func @scaled_ext_packed8_fp4
+func.func @scaled_ext_packed8_fp4(%v: vector<8xf4E2M1FN>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret0 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret1 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xbf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret2 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xf32>
+  func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed8_fp8
+func.func @scaled_ext_packed8_fp8(%v: vector<8xf8E4M3FN>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret0 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret1 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xbf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret2 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xf32>
+  func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed8_bf8
+func.func @scaled_ext_packed8_bf8(%v: vector<8xf8E5M2>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret0 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E5M2> to vector<8xf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret1 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E5M2> to vector<8xbf16>
+  // CHECK: amdgpu.scaled_ext_packed8
+  %ret2 = amdgpu.scaled_ext_packed8 %v, %scale[0] : vector<8xf8E5M2> to vector<8xf32>
+  func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed16_fp6
+func.func @scaled_ext_packed16_fp6(%v: vector<16xf6E2M3FN>, %scale: f32) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret0 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xf16>
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret1 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xbf16>
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret2 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xf32>
+  func.return %ret0, %ret1, %ret2 : vector<16xf16>, vector<16xbf16>, vector<16xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed16_bf16
+func.func @scaled_ext_packed16_bf16(%v: vector<16xf6E3M2FN>, %scale: f32) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) {
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret0 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xf16>
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret1 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xbf16>
+  // CHECK: amdgpu.scaled_ext_packed16
+  %ret2 = amdgpu.scaled_ext_packed16 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xf32>
+  func.return %ret0, %ret1, %ret2 : vector<16xf16>, vector<16xbf16>, vector<16xf32>
+}
+
 // CHECK-LABEL: func.func @packed_scaled_trunc_f8e4m3_f32
 // CHECK: amdgpu.packed_scaled_trunc
 func.func @packed_scaled_trunc_f8e4m3_f32(%v: vector<2xf32>, %scale: f32) -> vector<4xf8E4M3FN> {

krzysz00 · 2025-09-19T18:54:37Z

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

+  }];
+}
+
+def AMDGPU_ScaledExtPacked16Op


Don't make distinct operations here. Instead, loosen the definition of scaled_ext_packed and add checks for chip compatibility to the lowering.

If that's not feasible, get back to me.

I did it here c3832b0 . Is the assembly format acceptable for you?

krzysz00 · 2025-09-19T19:48:08Z

On further offline discussion, I'll need more context / we'll want to see if two separate ops are actually the better design here.

amd-eochoalo · 2025-09-19T20:01:59Z

between f92db34 and c3832b0 I prefer f92db34. By having two optional attributes which are in an XOR-relationship the constructors for this operation will always require a nullptr and getting the attributes will always get a std::optional<int32_t>.

Between f92db34 and merging these two operations into their distinct operation, one thing to notice is that we will need a verifier to make sure the types are correctly matched. (Not a big deal). We also need to choose a name for this operation since scaled_ext_packed is already taken. Maybe scaled_ext_packed_8_or_16?

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp

kuhar

LGTM but wait for @krzysz00 before merging

krzysz00

LGTM after one comment

krzysz00 · 2025-10-16T17:00:55Z

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

+  }];
+
+  let extraClassDeclaration = [{
+    static Type getScaleType(MLIRContext *ctx) {


What's this for?

I would like the assembly format to only have type($source) to type($res).

Without using

TypesMatchWith<"scale type is fixed", "source", "scale", "ScaledExtPacked816Op::getScaleType($_self.getContext())">] // (which requires the definition of this extra class declaration)

The assembly format parser generator gives an error stating

error: type of operand #1, named 'scale', is not buildable and a buildable type cannot be inferred attr-dict $source

I can inline this function like this:

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index 05525d3a061d..210097138807 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -119,7 +119,7 @@ def IsValidBlockSize: AttrConstraint< def AMDGPU_ScaledExtPacked816Op : AMDGPU_Op<"scaled_ext_packed816", [Pure, TypesMatchWith<"scale type is fixed", "source", "scale", - "ScaledExtPacked816Op::getScaleType($_self.getContext())">]>, + "VectorType::get(4, Float8E8M0FNUType::get($_self.getContext()))">]>, Arguments<( ins AnyTypeOf<[VectorOfLengthAndType<[8], [F4E2M1FN,F8E4M3FN,F8E5M2]>, VectorOfLengthAndType<[16], [F6E2M3FN, F6E3M2FN]>]>:$source, @@ -170,12 +170,6 @@ def AMDGPU_ScaledExtPacked816Op `:` type($source) `to` type($res) }]; - let extraClassDeclaration = [{ - static Type getScaleType(MLIRContext *ctx) { - return VectorType::get(4, Float8E8M0FNUType::get(ctx)); - } - }]; - } def AMDGPU_ScaledExtPackedOp

or if you prefer add the type declaration, or maybe another solution?

I don't think you want TypesMatchWith here - that implies a relationship between two types.

What you're looking for goes something like this

def Vector4Scales : AllOfType<[IsVectorOfShape<[4]>, IsVectorOfType<[F8E8M0FNU]>, "vector of 4 F8E8M0FNU scales", "::mlir::VectorType">, BuildableType<"::mlir::VectorType::get($_builder.getType<::mlir::Float8E8M0FNUType>, {4});">;

and then use that in the op definition

(See mlir/include/mlir/IR/CommonTypeConstraints.td for where all those tablegen bits came from.

(The top half of that is defining the constraint the type has to obay. The BuildableType bit is a tag that tells the tablegen bits "Hey, this is one very specific type, you can just ... create it yourself and don't have to parse it".

Thank you! I was looking for exactly this!

Thanks for pointing into BuildableType. I had some issues using it exactly as you described above, but in the end I learned a little bit more about Predicates and Types. I ended up using the following:

def Vector4Scales : AllOfType<[FixedVectorOfLengthAndType<[4], [F8E8M0FNU]>], "vector of 4 F8E8M0FNU scales", "::mlir::VectorType">, BuildableType<"::mlir::VectorType::get({4}, $_builder.getType<::mlir::Float8E8M0FNUType>());">;

Which if I understand correctly the FixedVectorOfLEngthAndType should already take care of the constraints we need. (Instead of using/defining these constraints [IsVectorOfShape<[4]>, IsVectorOfType<[F8E8M0FNU]>) and adds the necessary BuildableType which I was missing.

krzysz00 · 2025-10-16T17:01:24Z

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

+    `firstScaleByte` `(` $firstScaleByte `)`
+    `:` type($source) `to` type($res)
+  }];
+


Can you add a verifier that errors out on invalid block size / firstScaleByte combinations?

Thanks for the review! 4f83cd9

krzysz00 · 2025-10-16T22:34:36Z

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

+
+
+def Vector4Scales :
+  AllOfType<[FixedVectorOfLengthAndType<[4], [F8E8M0FNU]>],


This technically allows vector<2x2xf8E8M0FNU>

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

kuhar

LGTM % IR example in the description

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp

amd-eochoalo marked this pull request as ready for review September 19, 2025 18:49

llvmbot added backend:AMDGPU mlir:gpu mlir mlir:amdgpu labels Sep 19, 2025

amd-eochoalo requested review from krzysz00 and kuhar September 19, 2025 18:51

krzysz00 requested changes Sep 19, 2025

View reviewed changes

amd-eochoalo requested a review from krzysz00 September 19, 2025 19:30

[mlir][amdgpu] Add scaled_ext_packed{8,16} operations

9c09c35

amd-eochoalo force-pushed the eochoa/2025-09-19/cvt-amd-gpu branch from 0d09fc6 to 9c09c35 Compare October 16, 2025 13:03

kuhar reviewed Oct 16, 2025

View reviewed changes

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Show resolved Hide resolved

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Outdated Show resolved Hide resolved

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp Outdated Show resolved Hide resolved

amd-eochoalo added 2 commits October 16, 2025 10:32

Use TypesMatchWith and make the scale a constant type

f8b11c4

Add note about availability on gfx1250+

e71f8d8

kuhar approved these changes Oct 16, 2025

View reviewed changes

krzysz00 approved these changes Oct 16, 2025

View reviewed changes

amd-eochoalo added 3 commits October 16, 2025 14:16

Add verifier for blockSize and firstScaleByte

4f83cd9

Use ConfinedType

b7763ef

Only use AllOfType

d50b6fe

krzysz00 reviewed Oct 16, 2025

View reviewed changes

kuhar requested changes Oct 17, 2025

View reviewed changes

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Outdated Show resolved Hide resolved

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Show resolved Hide resolved

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Outdated Show resolved Hide resolved

Verify shape matches and better type constraint

3cdb174

llvmbot added mlir:core MLIR Core Infrastructure mlir:ods labels Oct 17, 2025

Added scale type to the assembly format

30dcfea

kuhar reviewed Oct 17, 2025

View reviewed changes

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Outdated Show resolved Hide resolved

amd-eochoalo added 2 commits October 17, 2025 11:11

Use functional-type

67261e5

Use : source_ty, scale_ty -> res_ty

fadf035

kuhar approved these changes Oct 17, 2025

View reviewed changes

Adds examples and better verification

0383100

kuhar reviewed Oct 17, 2025

View reviewed changes

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Outdated Show resolved Hide resolved

kuhar reviewed Oct 17, 2025

View reviewed changes

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td Outdated Show resolved Hide resolved

kuhar approved these changes Oct 17, 2025

View reviewed changes

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp Outdated Show resolved Hide resolved

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp Outdated Show resolved Hide resolved

amd-eochoalo added 2 commits October 17, 2025 11:53

no else after return and remove global resolution

7aa6169

indentation and syntax highlighting

7a5fea5

amd-eochoalo merged commit a76c71b into llvm:main Oct 17, 2025
10 checks passed



		def Vector4Scales :
		AllOfType<[FixedVectorOfLengthAndType<[4], [F8E8M0FNU]>],

[mlir][amdgpu] Add scaled_ext_packed{8,16} operations #159830

[mlir][amdgpu] Add scaled_ext_packed{8,16} operations #159830

Uh oh!

Conversation

amd-eochoalo commented Sep 19, 2025

Uh oh!

llvmbot commented Sep 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 19, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

krzysz00 commented Sep 19, 2025

Uh oh!

amd-eochoalo commented Sep 19, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

kuhar left a comment

Choose a reason for hiding this comment

Uh oh!

krzysz00 left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

kuhar left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Sep 19, 2025 •

edited

Loading