@@ -103,7 +103,7 @@ define float @v_uitofp_to_f32_multi_use_lshr8_mask255(i32 %arg0) nounwind {
103
103
; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0
104
104
; VI-NEXT: flat_store_dword v[0:1], v0
105
105
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
106
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
106
+ ; VI-NEXT: s_waitcnt vmcnt(0)
107
107
; VI-NEXT: s_setpc_b64 s[30:31]
108
108
%lshr.8 = lshr i32 %arg0 , 8
109
109
store i32 %lshr.8 , i32 addrspace (1 )* undef
@@ -527,7 +527,7 @@ define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 a
527
527
; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
528
528
; VI-NEXT: v_addc_u32_e32 v1, vcc, v2, v3, vcc
529
529
; VI-NEXT: flat_load_ubyte v0, v[0:1]
530
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
530
+ ; VI-NEXT: s_waitcnt vmcnt(0)
531
531
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
532
532
; VI-NEXT: v_mov_b32_e32 v0, s2
533
533
; VI-NEXT: v_mov_b32_e32 v1, s3
@@ -628,13 +628,13 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)
628
628
; VI-NEXT: flat_load_ubyte v3, v[6:7]
629
629
; VI-NEXT: v_mov_b32_e32 v5, s3
630
630
; VI-NEXT: v_mov_b32_e32 v4, s2
631
- ; VI-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
631
+ ; VI-NEXT: s_waitcnt vmcnt(3)
632
632
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
633
- ; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
633
+ ; VI-NEXT: s_waitcnt vmcnt(2)
634
634
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
635
- ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
635
+ ; VI-NEXT: s_waitcnt vmcnt(1)
636
636
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
637
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
637
+ ; VI-NEXT: s_waitcnt vmcnt(0)
638
638
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
639
639
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
640
640
; VI-NEXT: s_endpgm
@@ -711,7 +711,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias
711
711
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
712
712
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
713
713
; VI-NEXT: flat_load_dword v0, v[0:1]
714
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
714
+ ; VI-NEXT: s_waitcnt vmcnt(0)
715
715
; VI-NEXT: v_add_u32_e32 v0, vcc, 2, v0
716
716
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
717
717
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -758,7 +758,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias
758
758
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
759
759
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
760
760
; VI-NEXT: flat_load_dword v0, v[0:1]
761
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
761
+ ; VI-NEXT: s_waitcnt vmcnt(0)
762
762
; VI-NEXT: v_and_b32_e32 v0, 0xff00, v0
763
763
; VI-NEXT: v_cvt_f32_ubyte1_e32 v2, v0
764
764
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -805,7 +805,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out,
805
805
; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
806
806
; VI-NEXT: v_addc_u32_e32 v1, vcc, v2, v3, vcc
807
807
; VI-NEXT: flat_load_ubyte v0, v[0:1]
808
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
808
+ ; VI-NEXT: s_waitcnt vmcnt(0)
809
809
; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
810
810
; VI-NEXT: v_mov_b32_e32 v0, s2
811
811
; VI-NEXT: v_mov_b32_e32 v1, s3
@@ -874,13 +874,13 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no
874
874
; VI-NEXT: flat_load_ubyte v3, v[6:7]
875
875
; VI-NEXT: v_mov_b32_e32 v5, s3
876
876
; VI-NEXT: v_mov_b32_e32 v4, s2
877
- ; VI-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3)
877
+ ; VI-NEXT: s_waitcnt vmcnt(3)
878
878
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
879
- ; VI-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2)
879
+ ; VI-NEXT: s_waitcnt vmcnt(2)
880
880
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
881
- ; VI-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1)
881
+ ; VI-NEXT: s_waitcnt vmcnt(1)
882
882
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
883
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
883
+ ; VI-NEXT: s_waitcnt vmcnt(0)
884
884
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
885
885
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
886
886
; VI-NEXT: s_endpgm
@@ -923,7 +923,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out
923
923
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
924
924
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
925
925
; VI-NEXT: flat_load_dword v0, v[0:1]
926
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
926
+ ; VI-NEXT: s_waitcnt vmcnt(0)
927
927
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
928
928
; VI-NEXT: v_mov_b32_e32 v0, s2
929
929
; VI-NEXT: v_mov_b32_e32 v1, s3
@@ -969,7 +969,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out
969
969
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
970
970
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
971
971
; VI-NEXT: flat_load_dword v0, v[0:1]
972
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
972
+ ; VI-NEXT: s_waitcnt vmcnt(0)
973
973
; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0
974
974
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
975
975
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1018,7 +1018,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out
1018
1018
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1019
1019
; VI-NEXT: flat_load_dword v0, v[0:1]
1020
1020
; VI-NEXT: v_mov_b32_e32 v1, 0xff
1021
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1021
+ ; VI-NEXT: s_waitcnt vmcnt(0)
1022
1022
; VI-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1023
1023
; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0
1024
1024
; VI-NEXT: v_mov_b32_e32 v0, s2
@@ -1064,7 +1064,7 @@ define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out
1064
1064
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1065
1065
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1066
1066
; VI-NEXT: flat_load_dword v0, v[0:1]
1067
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1067
+ ; VI-NEXT: s_waitcnt vmcnt(0)
1068
1068
; VI-NEXT: v_cvt_f32_ubyte3_e32 v2, v0
1069
1069
; VI-NEXT: v_mov_b32_e32 v0, s2
1070
1070
; VI-NEXT: v_mov_b32_e32 v1, s3
@@ -1111,7 +1111,7 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float a
1111
1111
; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1112
1112
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
1113
1113
; VI-NEXT: flat_load_dword v0, v[0:1]
1114
- ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1114
+ ; VI-NEXT: s_waitcnt vmcnt(0)
1115
1115
; VI-NEXT: v_or_b32_e32 v0, 0x80000001, v0
1116
1116
; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
1117
1117
; VI-NEXT: v_add_f32_e32 v2, v0, v1
0 commit comments