@@ -6508,50 +6508,39 @@ entry:
65086508}
65096509
65106510; Found by fuzzer, reduced with llvm-reduce.
6511- define amdgpu_kernel void @insert_very_small_from_very_large (<32 x i16 > %L3 , ptr %ptr ) {
6511+ define void @insert_very_small_from_very_large (<32 x i16 > %L3 , ptr %ptr ) {
65126512; GPRIDX-LABEL: insert_very_small_from_very_large:
65136513; GPRIDX: ; %bb.0: ; %bb
6514- ; GPRIDX-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
6515- ; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
6516- ; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
6517- ; GPRIDX-NEXT: s_lshr_b32 s2, s12, 1
6518- ; GPRIDX-NEXT: s_and_b32 s2, s2, 1
6519- ; GPRIDX-NEXT: s_lshl_b32 s2, s2, 1
6520- ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
6521- ; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
6522- ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
6523- ; GPRIDX-NEXT: flat_store_byte v[0:1], v2
6524- ; GPRIDX-NEXT: s_endpgm
6514+ ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6515+ ; GPRIDX-NEXT: v_lshrrev_b32_e32 v0, 1, v0
6516+ ; GPRIDX-NEXT: v_and_b32_e32 v0, 1, v0
6517+ ; GPRIDX-NEXT: v_lshlrev_b16_e32 v0, 1, v0
6518+ ; GPRIDX-NEXT: v_and_b32_e32 v0, 3, v0
6519+ ; GPRIDX-NEXT: flat_store_byte v[16:17], v0
6520+ ; GPRIDX-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
6521+ ; GPRIDX-NEXT: s_setpc_b64 s[30:31]
65256522;
65266523; GFX10-LABEL: insert_very_small_from_very_large:
65276524; GFX10: ; %bb.0: ; %bb
6528- ; GFX10-NEXT: s_clause 0x1
6529- ; GFX10-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
6530- ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
6525+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6526+ ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
6527+ ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
6528+ ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
6529+ ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
6530+ ; GFX10-NEXT: flat_store_byte v[16:17], v0
65316531; GFX10-NEXT: s_waitcnt lgkmcnt(0)
6532- ; GFX10-NEXT: s_lshr_b32 s2, s12, 1
6533- ; GFX10-NEXT: v_mov_b32_e32 v0, s0
6534- ; GFX10-NEXT: s_and_b32 s2, s2, 1
6535- ; GFX10-NEXT: v_mov_b32_e32 v1, s1
6536- ; GFX10-NEXT: s_lshl_b32 s2, s2, 1
6537- ; GFX10-NEXT: v_mov_b32_e32 v2, s2
6538- ; GFX10-NEXT: flat_store_byte v[0:1], v2
6539- ; GFX10-NEXT: s_endpgm
6532+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
65406533;
65416534; GFX11-LABEL: insert_very_small_from_very_large:
65426535; GFX11: ; %bb.0: ; %bb
6543- ; GFX11-NEXT: s_clause 0x1
6544- ; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x0
6545- ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x40
6536+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6537+ ; GFX11-NEXT: v_lshrrev_b16 v0.l, 1, v0.l
6538+ ; GFX11-NEXT: v_and_b16 v0.l, v0.l, 1
6539+ ; GFX11-NEXT: v_lshlrev_b16 v0.l, 1, v0.l
6540+ ; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
6541+ ; GFX11-NEXT: flat_store_b8 v[16:17], v0
65466542; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6547- ; GFX11-NEXT: s_lshr_b32 s2, s8, 1
6548- ; GFX11-NEXT: v_mov_b32_e32 v0, s0
6549- ; GFX11-NEXT: s_and_b32 s2, s2, 1
6550- ; GFX11-NEXT: v_mov_b32_e32 v1, s1
6551- ; GFX11-NEXT: s_lshl_b32 s2, s2, 1
6552- ; GFX11-NEXT: v_mov_b32_e32 v2, s2
6553- ; GFX11-NEXT: flat_store_b8 v[0:1], v2
6554- ; GFX11-NEXT: s_endpgm
6543+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
65556544bb:
65566545 %a = bitcast <32 x i16 > %L3 to i512
65576546 %b = trunc i512 %a to i8
0 commit comments