@@ -1153,6 +1153,9 @@ llvm.func @rocdl_4bit_packed_floats(%old: i32, %source0: f32, %source1: f32, %so
11531153 // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f32(i32 %10, <2 x float> %3, i32 %6, float 1.000000e+00, i32 0)
11541154 // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.f16(i32 %11, <2 x half> %4, i32 %6, float 1.000000e+00, i32 0)
11551155 // CHECK: call i32 @llvm.amdgcn.cvt.scalef32.sr.pk.fp4.bf16(i32 %12, <2 x bfloat> %5, i32 %6, float 1.000000e+00, i32 0)
1156+ // CHECK: call <2 x float> @llvm.amdgcn.cvt.scalef32.pk.f32.fp4(i32 %0, float 1.000000e+00, i32 0)
1157+ // CHECK: call <2 x half> @llvm.amdgcn.cvt.scalef32.pk.f16.fp4(i32 %0, float 1.000000e+00, i32 0)
1158+ // CHECK: call <2 x bfloat> @llvm.amdgcn.cvt.scalef32.pk.bf16.fp4(i32 %0, float 1.000000e+00, i32 0)
11561159 %c0 = llvm.mlir.constant (0 : i32 ) : i32
11571160 %scale = llvm.mlir.constant (1.0 : f32 ) : f32
11581161 %pk1 = rocdl.cvt.scalef32.pk.fp4.f32 %source0 , %source1 , %scale -> %old [%c0 ] : i32
@@ -1161,6 +1164,9 @@ llvm.func @rocdl_4bit_packed_floats(%old: i32, %source0: f32, %source1: f32, %so
11611164 %sr1 = rocdl.cvt.scalef32.sr.pk.fp4.f32 %source , %stoch , %scale -> %pk3 [%c0 ] : i32
11621165 %sr2 = rocdl.cvt.scalef32.sr.pk.fp4.f16 %source_half , %stoch , %scale -> %sr1 [%c0 ] : i32
11631166 %sr3 = rocdl.cvt.scalef32.sr.pk.fp4.bf16 %source_bfloat , %stoch , %scale -> %sr2 [%c0 ] : i32
1167+ %pk4 = rocdl.cvt.scalef32.pk.f32.fp4 %old [%c0 ], %scale : vector <2 xf32 >
1168+ %pk5 = rocdl.cvt.scalef32.pk.f16.fp4 %old [%c0 ], %scale : vector <2 xf16 >
1169+ %pk6 = rocdl.cvt.scalef32.pk.bf16.fp4 %old [%c0 ], %scale : vector <2 xbf16 >
11641170 llvm.return %sr3 : i32
11651171}
11661172
0 commit comments