@@ -1302,26 +1302,26 @@ llvm.func @rocdl_last_use(%ptr: !llvm.ptr<1>) -> i32 {
13021302// CHECK-SAME:(i32 %[[I32:.+]], <2 x i32> %[[V2I32:.+]], i32 %[[SCALE:.+]])
13031303llvm.func @rocdl.cvt.scale.pk8 (%i32: i32 , %v2xi32: vector <2 xi32 >, %scale: i32 ) {
13041304
1305- // CHECK: call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp4(i32 %[[I32]], i32 %[[SCALE]], i32 0)
1306- %0 = rocdl.cvt.scale.pk8.f16.fp4 %i32 , %scale [0 ] : vector <8 xf16 >
1305+ // CHECK: call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp4(i32 %[[I32]], i32 %[[SCALE]], i32 0)
1306+ %0 = rocdl.cvt.scale.pk8.f16.fp4 %i32 , %scale [0 ] : vector <8 xf16 >
13071307 // CHECK: call <8 x bfloat> @llvm.amdgcn.cvt.scale.pk8.bf16.fp4(i32 %[[I32]], i32 %[[SCALE]], i32 0)
1308- %1 = rocdl.cvt.scale.pk8.bf16.fp4 %i32 , %scale [0 ] : vector <8 xbf16 >
1309- // CHECK: call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp4(i32 %[[I32]], i32 %[[SCALE]], i32 0)
1310- %2 = rocdl.cvt.scale.pk8.f32.fp4 %i32 , %scale [0 ] : vector <8 xf32 >
1308+ %1 = rocdl.cvt.scale.pk8.bf16.fp4 %i32 , %scale [0 ] : vector <8 xbf16 >
1309+ // CHECK: call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp4(i32 %[[I32]], i32 %[[SCALE]], i32 0)
1310+ %2 = rocdl.cvt.scale.pk8.f32.fp4 %i32 , %scale [0 ] : vector <8 xf32 >
13111311
1312- // CHECK: call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1313- %3 = rocdl.cvt.scale.pk8.f16.fp8 %v2xi32 , %scale [0 ] : vector <8 xf16 >
1312+ // CHECK: call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1313+ %3 = rocdl.cvt.scale.pk8.f16.fp8 %v2xi32 , %scale [0 ] : vector <8 xf16 >
13141314 // CHECK: call <8 x bfloat> @llvm.amdgcn.cvt.scale.pk8.bf16.fp8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1315- %4 = rocdl.cvt.scale.pk8.bf16.fp8 %v2xi32 , %scale [0 ] : vector <8 xbf16 >
1316- // CHECK: call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1317- %5 = rocdl.cvt.scale.pk8.f32.fp8 %v2xi32 , %scale [0 ] : vector <8 xf32 >
1315+ %4 = rocdl.cvt.scale.pk8.bf16.fp8 %v2xi32 , %scale [0 ] : vector <8 xbf16 >
1316+ // CHECK: call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1317+ %5 = rocdl.cvt.scale.pk8.f32.fp8 %v2xi32 , %scale [0 ] : vector <8 xf32 >
13181318
1319- // CHECK: call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.bf8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1320- %6 = rocdl.cvt.scale.pk8.f16.bf8 %v2xi32 , %scale [0 ] : vector <8 xf16 >
1319+ // CHECK: call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.bf8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1320+ %6 = rocdl.cvt.scale.pk8.f16.bf8 %v2xi32 , %scale [0 ] : vector <8 xf16 >
13211321 // CHECK: call <8 x bfloat> @llvm.amdgcn.cvt.scale.pk8.bf16.bf8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1322- %7 = rocdl.cvt.scale.pk8.bf16.bf8 %v2xi32 , %scale [0 ] : vector <8 xbf16 >
1323- // CHECK: call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.bf8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1324- %8 = rocdl.cvt.scale.pk8.f32.bf8 %v2xi32 , %scale [0 ] : vector <8 xf32 >
1322+ %7 = rocdl.cvt.scale.pk8.bf16.bf8 %v2xi32 , %scale [0 ] : vector <8 xbf16 >
1323+ // CHECK: call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.bf8(<2 x i32> %[[V2I32]], i32 %[[SCALE]], i32 0)
1324+ %8 = rocdl.cvt.scale.pk8.f32.bf8 %v2xi32 , %scale [0 ] : vector <8 xf32 >
13251325
13261326 llvm.return
13271327}
@@ -1330,18 +1330,18 @@ llvm.func @rocdl.cvt.scale.pk8(%i32: i32, %v2xi32: vector<2xi32>, %scale: i32) {
13301330// CHECK-SAME:(<3 x i32> %[[SRC0:.+]], i32 %[[SCALE:.+]])
13311331llvm.func @rocdl.cvt.scale.pk16 (%v3xi32: vector <3 xi32 >, %scale:i32 ) {
13321332
1333- // CHECK: call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1334- %0 = rocdl.cvt.scale.pk16.f16.fp6 %v3xi32 , %scale [0 ] : vector <16 xf16 >
1333+ // CHECK: call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.fp6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1334+ %0 = rocdl.cvt.scale.pk16.f16.fp6 %v3xi32 , %scale [0 ] : vector <16 xf16 >
13351335 // CHECK: call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.fp6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1336- %1 = rocdl.cvt.scale.pk16.bf16.fp6 %v3xi32 , %scale [0 ] : vector <16 xbf16 >
1337- // CHECK: call <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.fp6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1338- %2 = rocdl.cvt.scale.pk16.f32.fp6 %v3xi32 , %scale [0 ] : vector <16 xf32 >
1339- // CHECK: call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1340- %3 = rocdl.cvt.scale.pk16.f16.bf6 %v3xi32 , %scale [0 ] : vector <16 xf16 >
1336+ %1 = rocdl.cvt.scale.pk16.bf16.fp6 %v3xi32 , %scale [0 ] : vector <16 xbf16 >
1337+ // CHECK: call <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.fp6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1338+ %2 = rocdl.cvt.scale.pk16.f32.fp6 %v3xi32 , %scale [0 ] : vector <16 xf32 >
1339+ // CHECK: call <16 x half> @llvm.amdgcn.cvt.scale.pk16.f16.bf6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1340+ %3 = rocdl.cvt.scale.pk16.f16.bf6 %v3xi32 , %scale [0 ] : vector <16 xf16 >
13411341 // CHECK: call <16 x bfloat> @llvm.amdgcn.cvt.scale.pk16.bf16.bf6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1342- %4 = rocdl.cvt.scale.pk16.bf16.bf6 %v3xi32 , %scale [0 ] : vector <16 xbf16 >
1342+ %4 = rocdl.cvt.scale.pk16.bf16.bf6 %v3xi32 , %scale [0 ] : vector <16 xbf16 >
13431343 // CHECK: call <16 x float> @llvm.amdgcn.cvt.scale.pk16.f32.bf6(<3 x i32> %[[SRC0]], i32 %[[SCALE]], i32 0)
1344- %5 = rocdl.cvt.scale.pk16.f32.bf6 %v3xi32 , %scale [0 ] : vector <16 xf32 >
1344+ %5 = rocdl.cvt.scale.pk16.f32.bf6 %v3xi32 , %scale [0 ] : vector <16 xf32 >
13451345
13461346 llvm.return
13471347}
0 commit comments