Skip to content

Commit cc0bbb7

Browse files
amd-eochoalogithub-actions[bot]
authored andcommitted
Automerge: [mlir][rocdl] Adds cvt.scalef32.pk16.* and cvt.scalef32.sr.pk16.* operations (#164014)
2 parents 2c317f6 + a3557c3 commit cc0bbb7

File tree

3 files changed

+134
-0
lines changed

3 files changed

+134
-0
lines changed

mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,7 @@ foreach smallT = [
11671167
ScaleArgInfo<ROCDL_V16BF16Type, "Bf16">,
11681168
ScaleArgInfo<ROCDL_V16F32Type, "F32">,
11691169
] in {
1170+
// Up-scaling
11701171
def ROCDL_CvtPkScalePk16 # largeT.nameForOp # smallT.nameForOp # Op :
11711172
ROCDL_ConcreteNonMemIntrOp<"cvt.scale.pk16." # largeT.name # "." # smallT.name,
11721173
[Pure], 1, [2], ["scaleSel"]>,
@@ -1182,6 +1183,42 @@ foreach smallT = [
11821183
}];
11831184

11841185
}
1186+
1187+
// Down-scaling
1188+
def ROCDL_CvtScaleF32Pk16 # smallT.nameForOp # largeT.nameForOp # Op :
1189+
ROCDL_ConcreteNonMemIntrOp<"cvt.scalef32.pk16." # smallT.name # "." # largeT.name,
1190+
[Pure], 1>,
1191+
Arguments<(ins largeT.type:$src, F32:$scale)> {
1192+
let results = (outs smallT.type:$res);
1193+
let summary = "Scale and convert packed "
1194+
# largeT.name # " to packed " # smallT.name ;
1195+
let description = [{
1196+
Convert 8 packed }] # largeT.name # [{ values to packed }]
1197+
# smallT.name # [{, multiplying by the exponent part of `scale`
1198+
before doing so. This op is for gfx1250+ arch.
1199+
}];
1200+
let assemblyFormat = [{
1201+
attr-dict $src `,` $scale `:` type($res)
1202+
}];
1203+
}
1204+
1205+
def ROCDL_CvtScaleF32SrPk16 # smallT.nameForOp # largeT.nameForOp # Op :
1206+
ROCDL_ConcreteNonMemIntrOp<"cvt.scalef32.sr.pk16." # smallT.name # "." # largeT.name,
1207+
[Pure], 1>,
1208+
Arguments<(ins largeT.type:$src, I32:$seed, F32:$scale)> {
1209+
let results = (outs smallT.type:$res);
1210+
let summary = "Scale and convert packed "
1211+
# largeT.name # " to packed " # smallT.name # " with stochastic rounding";
1212+
let description = [{
1213+
Convert 8 packed }] # largeT.name # [{ values to packed }]
1214+
# smallT.name # [{, multiplying by the exponent part of `scale`
1215+
before doing so and apply stochastic rounding. This op is for gfx1250+ arch.
1216+
}];
1217+
let assemblyFormat = [{
1218+
attr-dict $src `,` $seed `,` $scale `:` type($res)
1219+
}];
1220+
}
1221+
11851222
} // foreach largeT
11861223
} // foreach smallTOp
11871224

mlir/test/Dialect/LLVMIR/rocdl.mlir

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,6 +1211,57 @@ llvm.func @rocdl.cvt.scale.pk16(%v3xi32: vector<3xi32>, %scale:i32) {
12111211

12121212
// -----
12131213

1214+
// CHECK-LABEL: rocdl.cvt.scalef32.pk16
1215+
llvm.func @rocdl.cvt.scalef32.pk16(%v16xf32: vector<16xf32>,
1216+
%v16xf16: vector<16xf16>,
1217+
%v16xbf16: vector<16xbf16>,
1218+
%scale: f32) {
1219+
1220+
// CHECK: rocdl.cvt.scalef32.pk16.fp6.f16
1221+
%0 = rocdl.cvt.scalef32.pk16.fp6.f16 %v16xf16, %scale : vector<3xi32>
1222+
// CHECK: rocdl.cvt.scalef32.pk16.fp6.bf16
1223+
%1 = rocdl.cvt.scalef32.pk16.fp6.bf16 %v16xbf16, %scale : vector<3xi32>
1224+
// CHECK: rocdl.cvt.scalef32.pk16.fp6.f32
1225+
%2 = rocdl.cvt.scalef32.pk16.fp6.f32 %v16xf32, %scale : vector<3xi32>
1226+
1227+
// CHECK: rocdl.cvt.scalef32.pk16.bf6.f16
1228+
%3 = rocdl.cvt.scalef32.pk16.bf6.f16 %v16xf16, %scale : vector<3xi32>
1229+
// CHECK: rocdl.cvt.scalef32.pk16.bf6.bf16
1230+
%4 = rocdl.cvt.scalef32.pk16.bf6.bf16 %v16xbf16, %scale : vector<3xi32>
1231+
// CHECK: rocdl.cvt.scalef32.pk16.bf6.f32
1232+
%5 = rocdl.cvt.scalef32.pk16.bf6.f32 %v16xf32, %scale : vector<3xi32>
1233+
1234+
llvm.return
1235+
}
1236+
1237+
// -----
1238+
1239+
// CHECK-LABEL: rocdl.cvt.scalef32.sr.pk16
1240+
llvm.func @rocdl.cvt.scalef32.sr.pk16(%v16xf32: vector<16xf32>,
1241+
%v16xf16: vector<16xf16>,
1242+
%v16xbf16: vector<16xbf16>,
1243+
%seed: i32,
1244+
%scale: f32) {
1245+
1246+
// CHECK: rocdl.cvt.scalef32.sr.pk16.fp6.f16
1247+
%0 = rocdl.cvt.scalef32.sr.pk16.fp6.f16 %v16xf16, %seed, %scale : vector<3xi32>
1248+
// CHECK: rocdl.cvt.scalef32.sr.pk16.fp6.bf16
1249+
%1 = rocdl.cvt.scalef32.sr.pk16.fp6.bf16 %v16xbf16, %seed, %scale : vector<3xi32>
1250+
// CHECK: rocdl.cvt.scalef32.sr.pk16.fp6.f32
1251+
%2 = rocdl.cvt.scalef32.sr.pk16.fp6.f32 %v16xf32, %seed, %scale : vector<3xi32>
1252+
1253+
// CHECK: rocdl.cvt.scalef32.sr.pk16.bf6.f16
1254+
%3 = rocdl.cvt.scalef32.sr.pk16.bf6.f16 %v16xf16, %seed, %scale : vector<3xi32>
1255+
// CHECK: rocdl.cvt.scalef32.sr.pk16.bf6.bf16
1256+
%4 = rocdl.cvt.scalef32.sr.pk16.bf6.bf16 %v16xbf16, %seed, %scale : vector<3xi32>
1257+
// CHECK: rocdl.cvt.scalef32.sr.pk16.bf6.f32
1258+
%5 = rocdl.cvt.scalef32.sr.pk16.bf6.f32 %v16xf32, %seed, %scale : vector<3xi32>
1259+
1260+
llvm.return
1261+
}
1262+
1263+
// -----
1264+
12141265
// expected-error@below {{attribute attached to unexpected op}}
12151266
func.func private @expected_llvm_func() attributes { rocdl.kernel }
12161267

mlir/test/Target/LLVMIR/rocdl.mlir

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1556,6 +1556,52 @@ llvm.func @rocdl.cvt.scale.pk16(%v3xi32: vector<3xi32>, %scale:i32) {
15561556
llvm.return
15571557
}
15581558

1559+
// CHECK-LABEL: rocdl.cvt.scalef32.pk16
1560+
// CHECK-SAME:(<16 x float> %[[V16F32:.+]], <16 x half> %[[V16F16:.+]], <16 x bfloat> %[[V16BF16:.+]], float %[[SCALE:.+]])
1561+
llvm.func @rocdl.cvt.scalef32.pk16(%v16xf32: vector<16xf32>, %v16xf16: vector<16xf16>, %v16xbf16: vector<16xbf16>, %scale: f32) {
1562+
1563+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.f16(<16 x half> %[[V16F16]], float %[[SCALE]])
1564+
%0 = rocdl.cvt.scalef32.pk16.fp6.f16 %v16xf16, %scale : vector<3xi32>
1565+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.bf16(<16 x bfloat> %[[V16BF16]], float %[[SCALE]])
1566+
%1 = rocdl.cvt.scalef32.pk16.fp6.bf16 %v16xbf16, %scale : vector<3xi32>
1567+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.fp6.f32(<16 x float> %[[V16F32]], float %[[SCALE]])
1568+
%2 = rocdl.cvt.scalef32.pk16.fp6.f32 %v16xf32, %scale : vector<3xi32>
1569+
1570+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.f16(<16 x half> %[[V16F16]], float %[[SCALE]])
1571+
%3 = rocdl.cvt.scalef32.pk16.bf6.f16 %v16xf16, %scale : vector<3xi32>
1572+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.bf16(<16 x bfloat> %[[V16BF16]], float %[[SCALE]])
1573+
%4 = rocdl.cvt.scalef32.pk16.bf6.bf16 %v16xbf16, %scale : vector<3xi32>
1574+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.pk16.bf6.f32(<16 x float> %[[V16F32]], float %[[SCALE]])
1575+
%5 = rocdl.cvt.scalef32.pk16.bf6.f32 %v16xf32, %scale : vector<3xi32>
1576+
1577+
llvm.return
1578+
}
1579+
1580+
// CHECK-LABEL: rocdl.cvt.scalef32.sr.pk16
1581+
// CHECK-SAME:(<16 x float> %[[V16F32:.+]], <16 x half> %[[V16F16:.+]], <16 x bfloat> %[[V16BF16:.+]], i32 %[[SEED:.+]], float %[[SCALE:.+]])
1582+
llvm.func @rocdl.cvt.scalef32.sr.pk16(%v16xf32: vector<16xf32>,
1583+
%v16xf16: vector<16xf16>,
1584+
%v16xbf16: vector<16xbf16>,
1585+
%seed: i32,
1586+
%scale: f32) {
1587+
1588+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f16(<16 x half> %[[V16F16]], i32 %[[SEED]], float %[[SCALE]])
1589+
%0 = rocdl.cvt.scalef32.sr.pk16.fp6.f16 %v16xf16, %seed, %scale : vector<3xi32>
1590+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.bf16(<16 x bfloat> %[[V16BF16]], i32 %[[SEED]], float %[[SCALE]])
1591+
%1 = rocdl.cvt.scalef32.sr.pk16.fp6.bf16 %v16xbf16, %seed, %scale : vector<3xi32>
1592+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f32(<16 x float> %[[V16F32]], i32 %[[SEED]], float %[[SCALE]])
1593+
%2 = rocdl.cvt.scalef32.sr.pk16.fp6.f32 %v16xf32, %seed, %scale : vector<3xi32>
1594+
1595+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f16(<16 x half> %[[V16F16]], i32 %[[SEED]], float %[[SCALE]])
1596+
%3 = rocdl.cvt.scalef32.sr.pk16.bf6.f16 %v16xf16, %seed, %scale : vector<3xi32>
1597+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.bf16(<16 x bfloat> %[[V16BF16]], i32 %[[SEED]], float %[[SCALE]])
1598+
%4 = rocdl.cvt.scalef32.sr.pk16.bf6.bf16 %v16xbf16, %seed, %scale : vector<3xi32>
1599+
// CHECK: call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f32(<16 x float> %[[V16F32]], i32 %[[SEED]], float %[[SCALE]])
1600+
%5 = rocdl.cvt.scalef32.sr.pk16.bf6.f32 %v16xf32, %seed, %scale : vector<3xi32>
1601+
1602+
llvm.return
1603+
}
1604+
15591605
// CHECK-DAG: attributes #[[$KERNEL_ATTRS]] = { "amdgpu-flat-work-group-size"="1,256" "uniform-work-group-size"="true" }
15601606
// CHECK-DAG: attributes #[[$KERNEL_WORKGROUP_ATTRS]] = { "amdgpu-flat-work-group-size"="1,1024"
15611607
// CHECK-DAG: attributes #[[$KNOWN_BLOCK_SIZE_ATTRS]] = { "amdgpu-flat-work-group-size"="128,128"

0 commit comments

Comments
 (0)