@@ -1368,6 +1368,39 @@ llvm.func @rocdl.cvt.scalef32.pk8(%v8xf32: vector<8xf32>, %v8xf16: vector<8xf16>
1368
1368
llvm.return
1369
1369
}
1370
1370
1371
+ // CHECK-LABEL: rocdl.cvt.scalef32.sr.pk8
1372
+ // CHECK-SAME:(<8 x float> %[[V8F32:.+]], <8 x half> %[[V8F16:.+]], <8 x bfloat> %[[V8BF16:.+]], i32 %[[SEED:.+]], float %[[SCALE:.+]])
1373
+ llvm.func @rocdl.cvt.scalef32.sr.pk8 (%v8xf32: vector <8 xf32 >,
1374
+ %v8xf16: vector <8 xf16 >,
1375
+ %v8xbf16: vector <8 xbf16 >,
1376
+ %seed: i32 ,
1377
+ %scale: f32 ) {
1378
+
1379
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.f32(<8 x float> %[[V8F32]], i32 %[[SEED]], float %[[SCALE]])
1380
+ %0 = rocdl.cvt.scalef32.sr.pk8.fp8.f32 %v8xf32 , %seed , %scale : vector <2 xi32 >
1381
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.f32(<8 x float> %[[V8F32]], i32 %[[SEED]], float %[[SCALE]])
1382
+ %1 = rocdl.cvt.scalef32.sr.pk8.bf8.f32 %v8xf32 , %seed , %scale : vector <2 xi32 >
1383
+ // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.f32(<8 x float> %[[V8F32]], i32 %[[SEED]], float %[[SCALE]])
1384
+ %2 = rocdl.cvt.scalef32.sr.pk8.fp4.f32 %v8xf32 , %seed , %scale : i32
1385
+
1386
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.f16(<8 x half> %[[V8F16]], i32 %[[SEED]], float %[[SCALE]])
1387
+ %3 = rocdl.cvt.scalef32.sr.pk8.fp8.f16 %v8xf16 , %seed , %scale : vector <2 xi32 >
1388
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.f16(<8 x half> %[[V8F16]], i32 %[[SEED]], float %[[SCALE]])
1389
+ %4 = rocdl.cvt.scalef32.sr.pk8.bf8.f16 %v8xf16 , %seed , %scale : vector <2 xi32 >
1390
+ // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.f16(<8 x half> %[[V8F16]], i32 %[[SEED]], float %[[SCALE]])
1391
+ %5 = rocdl.cvt.scalef32.sr.pk8.fp4.f16 %v8xf16 , %seed , %scale : i32
1392
+
1393
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.fp8.bf16(<8 x bfloat> %[[V8BF16]], i32 %[[SEED]], float %[[SCALE]])
1394
+ %6 = rocdl.cvt.scalef32.sr.pk8.fp8.bf16 %v8xbf16 , %seed , %scale : vector <2 xi32 >
1395
+ // CHECK: call <2 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk8.bf8.bf16(<8 x bfloat> %[[V8BF16]], i32 %[[SEED]], float %[[SCALE]])
1396
+ %7 = rocdl.cvt.scalef32.sr.pk8.bf8.bf16 %v8xbf16 , %seed , %scale : vector <2 xi32 >
1397
+ // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.bf16(<8 x bfloat> %[[V8BF16]], i32 %[[SEED]], float %[[SCALE]])
1398
+ %8 = rocdl.cvt.scalef32.sr.pk8.fp4.bf16 %v8xbf16 , %seed , %scale : i32
1399
+
1400
+ llvm.return
1401
+ }
1402
+
1403
+
1371
1404
// CHECK-LABEL: @rocdl.cvt.scale.pk16
1372
1405
// CHECK-SAME:(<3 x i32> %[[SRC0:.+]], i32 %[[SCALE:.+]])
1373
1406
llvm.func @rocdl.cvt.scale.pk16 (%v3xi32: vector <3 xi32 >, %scale:i32 ) {
0 commit comments