@@ -5,16 +5,12 @@ define amdgpu_kernel void @v_ashr_pk_i8_i32(ptr addrspace(1) %out, i32 %src0, i3
55; GFX950: ; %bb.0:
66; GFX950-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
77; GFX950-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
8- ; GFX950-NEXT: v_mov_b32_e32 v1, 0xffffff80
9- ; GFX950-NEXT: v_mov_b32_e32 v2, 0x7f
108; GFX950-NEXT: v_mov_b32_e32 v0, 0
119; GFX950-NEXT: s_waitcnt lgkmcnt(0)
12- ; GFX950-NEXT: s_ashr_i32 s1, s1, s2
13- ; GFX950-NEXT: s_ashr_i32 s0, s0, s2
14- ; GFX950-NEXT: v_med3_i32 v3, s0, v1, v2
15- ; GFX950-NEXT: v_med3_i32 v1, s1, v1, v2
16- ; GFX950-NEXT: v_lshlrev_b32_e32 v1, 8, v1
17- ; GFX950-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
10+ ; GFX950-NEXT: s_and_b32 s2, s2, 31
11+ ; GFX950-NEXT: v_mov_b32_e32 v1, s1
12+ ; GFX950-NEXT: v_mov_b32_e32 v2, s2
13+ ; GFX950-NEXT: v_ashr_pk_i8_i32 v1, s0, v1, v2
1814; GFX950-NEXT: global_store_short v0, v1, s[6:7]
1915; GFX950-NEXT: s_endpgm
2016 %insert.0 = insertelement <2 x i32 > poison, i32 %src0 , i64 0
@@ -36,15 +32,12 @@ define amdgpu_kernel void @v_ashr_pk_u8_i32(ptr addrspace(1) %out, i32 %src0, i3
3632; GFX950: ; %bb.0:
3733; GFX950-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
3834; GFX950-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
39- ; GFX950-NEXT: v_mov_b32_e32 v1, 0xff
4035; GFX950-NEXT: v_mov_b32_e32 v0, 0
4136; GFX950-NEXT: s_waitcnt lgkmcnt(0)
42- ; GFX950-NEXT: s_ashr_i32 s1, s1, s2
43- ; GFX950-NEXT: s_ashr_i32 s0, s0, s2
44- ; GFX950-NEXT: v_med3_i32 v2, s0, 0, v1
45- ; GFX950-NEXT: v_med3_i32 v1, s1, 0, v1
46- ; GFX950-NEXT: v_lshlrev_b32_e32 v1, 8, v1
47- ; GFX950-NEXT: v_or_b32_e32 v1, v2, v1
37+ ; GFX950-NEXT: s_and_b32 s2, s2, 31
38+ ; GFX950-NEXT: v_mov_b32_e32 v1, s1
39+ ; GFX950-NEXT: v_mov_b32_e32 v2, s2
40+ ; GFX950-NEXT: v_ashr_pk_u8_i32 v1, s0, v1, v2
4841; GFX950-NEXT: global_store_short v0, v1, s[6:7]
4942; GFX950-NEXT: s_endpgm
5043 %insert.0 = insertelement <2 x i32 > poison, i32 %src0 , i64 0
0 commit comments