@@ -601,6 +601,34 @@ define <2 x i16> @test_cvt_scalef32_pk_fp8_f16_word1(<2 x i16> %old, <2 x half>
601601 ret <2 x i16 > %ret
602602}
603603
604+ define <2 x i16 > @test_cvt_scalef32_pk_fp8_f16_imm1 (<2 x i16 > %old , float %scale ) {
605+ ; GCN-LABEL: test_cvt_scalef32_pk_fp8_f16_imm1:
606+ ; GCN: ; %bb.0:
607+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608+ ; GCN-NEXT: v_cvt_scalef32_pk_fp8_f16 v0, 4.0, v1
609+ ; GCN-NEXT: s_setpc_b64 s[30:31]
610+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.fp8.f16 (<2 x i16 > %old , <2 x half > <half 4 .0 , half 4 .0 >, float %scale , i1 false )
611+ ret <2 x i16 > %ret
612+ }
613+
614+ define <2 x i16 > @test_cvt_scalef32_pk_fp8_f16_imm2 (<2 x i16 > %old , float %scale ) {
615+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_pk_fp8_f16_imm2:
616+ ; GFX950-SDAG: ; %bb.0:
617+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618+ ; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x40004400
619+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk_fp8_f16 v0, s0, v1
620+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
621+ ;
622+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_pk_fp8_f16_imm2:
623+ ; GFX950-GISEL: ; %bb.0:
624+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x40004400
626+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk_fp8_f16 v0, v2, v1
627+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
628+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.fp8.f16 (<2 x i16 > %old , <2 x half > <half 4 .0 , half 2 .0 >, float %scale , i1 false )
629+ ret <2 x i16 > %ret
630+ }
631+
604632define <2 x i16 > @test_cvt_scalef32_pk_fp8_bf16_word0 (<2 x i16 > %old , <2 x bfloat> %src , float %scale ) {
605633; GCN-LABEL: test_cvt_scalef32_pk_fp8_bf16_word0:
606634; GCN: ; %bb.0:
@@ -621,6 +649,27 @@ define <2 x i16> @test_cvt_scalef32_pk_fp8_bf16_word1(<2 x i16> %old, <2 x bfloa
621649 ret <2 x i16 > %ret
622650}
623651
652+ define <2 x i16 > @test_cvt_scalef32_pk_fp8_bf16_imm1 (<2 x i16 > %old , float %scale ) {
653+ ; GCN-LABEL: test_cvt_scalef32_pk_fp8_bf16_imm1:
654+ ; GCN: ; %bb.0:
655+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
656+ ; GCN-NEXT: v_cvt_scalef32_pk_fp8_bf16 v0, 4.0, v1
657+ ; GCN-NEXT: s_setpc_b64 s[30:31]
658+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.fp8.bf16 (<2 x i16 > %old , <2 x bfloat> <bfloat 4 .0 , bfloat 4 .0 >, float %scale , i1 false )
659+ ret <2 x i16 > %ret
660+ }
661+
662+ define <2 x i16 > @test_cvt_scalef32_pk_fp8_bf16_imm2 (<2 x i16 > %old , float %scale ) {
663+ ; GCN-LABEL: test_cvt_scalef32_pk_fp8_bf16_imm2:
664+ ; GCN: ; %bb.0:
665+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666+ ; GCN-NEXT: s_mov_b32 s0, 0x40004080
667+ ; GCN-NEXT: v_cvt_scalef32_pk_fp8_bf16 v0, s0, v1
668+ ; GCN-NEXT: s_setpc_b64 s[30:31]
669+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.fp8.bf16 (<2 x i16 > %old , <2 x bfloat> <bfloat 4 .0 , bfloat 2 .0 >, float %scale , i1 false )
670+ ret <2 x i16 > %ret
671+ }
672+
624673define <2 x i16 > @test_cvt_scalef32_pk_bf8_f16_word0 (<2 x i16 > %old , <2 x half > %src , float %scale ) {
625674; GCN-LABEL: test_cvt_scalef32_pk_bf8_f16_word0:
626675; GCN: ; %bb.0:
@@ -641,6 +690,34 @@ define <2 x i16> @test_cvt_scalef32_pk_bf8_f16_word1(<2 x i16> %old, <2 x half>
641690 ret <2 x i16 > %ret
642691}
643692
693+ define <2 x i16 > @test_cvt_scalef32_pk_bf8_f16_imm1 (<2 x i16 > %old , float %scale ) {
694+ ; GCN-LABEL: test_cvt_scalef32_pk_bf8_f16_imm1:
695+ ; GCN: ; %bb.0:
696+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
697+ ; GCN-NEXT: v_cvt_scalef32_pk_bf8_f16 v0, 4.0, v1
698+ ; GCN-NEXT: s_setpc_b64 s[30:31]
699+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.bf8.f16 (<2 x i16 > %old , <2 x half > <half 4 .0 , half 4 .0 >, float %scale , i1 false )
700+ ret <2 x i16 > %ret
701+ }
702+
703+ define <2 x i16 > @test_cvt_scalef32_pk_bf8_f16_imm2 (<2 x i16 > %old , float %scale ) {
704+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_pk_bf8_f16_imm2:
705+ ; GFX950-SDAG: ; %bb.0:
706+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707+ ; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x40004400
708+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk_bf8_f16 v0, s0, v1
709+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
710+ ;
711+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_pk_bf8_f16_imm2:
712+ ; GFX950-GISEL: ; %bb.0:
713+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x40004400
715+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk_bf8_f16 v0, v2, v1
716+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
717+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.bf8.f16 (<2 x i16 > %old , <2 x half > <half 4 .0 , half 2 .0 >, float %scale , i1 false )
718+ ret <2 x i16 > %ret
719+ }
720+
644721define <2 x i16 > @test_cvt_scalef32_pk_bf8_bf16_word0 (<2 x i16 > %old , <2 x bfloat> %src , float %scale ) {
645722; GCN-LABEL: test_cvt_scalef32_pk_bf8_bf16_word0:
646723; GCN: ; %bb.0:
@@ -661,6 +738,27 @@ define <2 x i16> @test_cvt_scalef32_pk_bf8_bf16_word1(<2 x i16> %old, <2 x bfloa
661738 ret <2 x i16 > %ret
662739}
663740
741+ define <2 x i16 > @test_cvt_scalef32_pk_bf8_bf16_imm1 (<2 x i16 > %old , float %scale ) {
742+ ; GCN-LABEL: test_cvt_scalef32_pk_bf8_bf16_imm1:
743+ ; GCN: ; %bb.0:
744+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745+ ; GCN-NEXT: v_cvt_scalef32_pk_bf8_bf16 v0, 4.0, v1
746+ ; GCN-NEXT: s_setpc_b64 s[30:31]
747+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.bf8.bf16 (<2 x i16 > %old , <2 x bfloat> <bfloat 4 .0 , bfloat 4 .0 >, float %scale , i1 false )
748+ ret <2 x i16 > %ret
749+ }
750+
751+ define <2 x i16 > @test_cvt_scalef32_pk_bf8_bf16_imm2 (<2 x i16 > %old , float %scale ) {
752+ ; GCN-LABEL: test_cvt_scalef32_pk_bf8_bf16_imm2:
753+ ; GCN: ; %bb.0:
754+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755+ ; GCN-NEXT: s_mov_b32 s0, 0x40004080
756+ ; GCN-NEXT: v_cvt_scalef32_pk_bf8_bf16 v0, s0, v1
757+ ; GCN-NEXT: s_setpc_b64 s[30:31]
758+ %ret = tail call <2 x i16 > @llvm.amdgcn.cvt.scalef32.pk.bf8.bf16 (<2 x i16 > %old , <2 x bfloat> <bfloat 4 .0 , bfloat 2 .0 >, float %scale , i1 false )
759+ ret <2 x i16 > %ret
760+ }
761+
664762define <2 x float > @test_cvt_scale_f32_fp4_byte0 (i32 %src , float %scale ) {
665763; GCN-LABEL: test_cvt_scale_f32_fp4_byte0:
666764; GCN: ; %bb.0:
@@ -1236,6 +1334,37 @@ define i32 @test_cvt_scalef32_fp4_f16_byte3(<2 x half> %src0, float %scale, i32
12361334 ret i32 %ret
12371335}
12381336
1337+ define i32 @test_cvt_scalef32_fp4_f16_imm1 (float %scale , i32 %old ) {
1338+ ; GCN-LABEL: test_cvt_scalef32_fp4_f16_imm1:
1339+ ; GCN: ; %bb.0:
1340+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1341+ ; GCN-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, 4.0, v0
1342+ ; GCN-NEXT: v_mov_b32_e32 v0, v1
1343+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1344+ %ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16 (i32 %old , <2 x half > <half 4 .0 , half 4 .0 >, float %scale , i32 0 )
1345+ ret i32 %ret
1346+ }
1347+
1348+ define i32 @test_cvt_scalef32_fp4_f16_imm2 (float %scale , i32 %old ) {
1349+ ; GFX950-SDAG-LABEL: test_cvt_scalef32_fp4_f16_imm2:
1350+ ; GFX950-SDAG: ; %bb.0:
1351+ ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352+ ; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x40004400
1353+ ; GFX950-SDAG-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, s0, v0
1354+ ; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, v1
1355+ ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
1356+ ;
1357+ ; GFX950-GISEL-LABEL: test_cvt_scalef32_fp4_f16_imm2:
1358+ ; GFX950-GISEL: ; %bb.0:
1359+ ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1360+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x40004400
1361+ ; GFX950-GISEL-NEXT: v_cvt_scalef32_pk_fp4_f16 v1, v2, v0
1362+ ; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, v1
1363+ ; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
1364+ %ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.f16 (i32 %old , <2 x half > <half 4 .0 , half 2 .0 >, float %scale , i32 0 )
1365+ ret i32 %ret
1366+ }
1367+
12391368define i32 @test_cvt_scalef32_fp4_bf16_byte0 (<2 x bfloat> %src0 , float %scale , i32 %old ) {
12401369; GCN-LABEL: test_cvt_scalef32_fp4_bf16_byte0:
12411370; GCN: ; %bb.0:
@@ -1283,6 +1412,29 @@ define i32 @test_cvt_scalef32_fp4_bf16_byte3(<2 x bfloat> %src0, float %scale, i
12831412 ret i32 %ret
12841413}
12851414
1415+ define i32 @test_cvt_scalef32_fp4_bf16_imm1 (float %scale , i32 %old ) {
1416+ ; GCN-LABEL: test_cvt_scalef32_fp4_bf16_imm1:
1417+ ; GCN: ; %bb.0:
1418+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1419+ ; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v1, 4.0, v0
1420+ ; GCN-NEXT: v_mov_b32_e32 v0, v1
1421+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1422+ %ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.bf16 (i32 %old , <2 x bfloat> <bfloat 4 .0 , bfloat 4 .0 >, float %scale , i32 0 )
1423+ ret i32 %ret
1424+ }
1425+
1426+ define i32 @test_cvt_scalef32_fp4_bf16_imm2 (float %scale , i32 %old ) {
1427+ ; GCN-LABEL: test_cvt_scalef32_fp4_bf16_imm2:
1428+ ; GCN: ; %bb.0:
1429+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430+ ; GCN-NEXT: s_mov_b32 s0, 0x40004080
1431+ ; GCN-NEXT: v_cvt_scalef32_pk_fp4_bf16 v1, s0, v0
1432+ ; GCN-NEXT: v_mov_b32_e32 v0, v1
1433+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1434+ %ret = tail call i32 @llvm.amdgcn.cvt.scalef32.pk.fp4.bf16 (i32 %old , <2 x bfloat> <bfloat 4 .0 , bfloat 2 .0 >, float %scale , i32 0 )
1435+ ret i32 %ret
1436+ }
1437+
12861438define amdgpu_ps void @test_scalef32_pk32_fp6_f32_vv_inreg_src (<16 x float > inreg %src , float %scale , ptr addrspace (1 ) %out ) {
12871439; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f32_vv_inreg_src:
12881440; GFX950-SDAG: ; %bb.0:
0 commit comments