Skip to content

Commit d7198c4

Browse files
committed
fix after rebase
1 parent a295d00 commit d7198c4

File tree

2 files changed

+28
-39
lines changed

2 files changed

+28
-39
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6508,50 +6508,39 @@ entry:
65086508
}
65096509

65106510
; Found by fuzzer, reduced with llvm-reduce.
6511-
define amdgpu_kernel void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) {
6511+
define void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) {
65126512
; GPRIDX-LABEL: insert_very_small_from_very_large:
65136513
; GPRIDX: ; %bb.0: ; %bb
6514-
; GPRIDX-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
6515-
; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
6516-
; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
6517-
; GPRIDX-NEXT: s_lshr_b32 s2, s12, 1
6518-
; GPRIDX-NEXT: s_and_b32 s2, s2, 1
6519-
; GPRIDX-NEXT: s_lshl_b32 s2, s2, 1
6520-
; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
6521-
; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
6522-
; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
6523-
; GPRIDX-NEXT: flat_store_byte v[0:1], v2
6524-
; GPRIDX-NEXT: s_endpgm
6514+
; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6515+
; GPRIDX-NEXT: v_lshrrev_b32_e32 v0, 1, v0
6516+
; GPRIDX-NEXT: v_and_b32_e32 v0, 1, v0
6517+
; GPRIDX-NEXT: v_lshlrev_b16_e32 v0, 1, v0
6518+
; GPRIDX-NEXT: v_and_b32_e32 v0, 3, v0
6519+
; GPRIDX-NEXT: flat_store_byte v[16:17], v0
6520+
; GPRIDX-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
6521+
; GPRIDX-NEXT: s_setpc_b64 s[30:31]
65256522
;
65266523
; GFX10-LABEL: insert_very_small_from_very_large:
65276524
; GFX10: ; %bb.0: ; %bb
6528-
; GFX10-NEXT: s_clause 0x1
6529-
; GFX10-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
6530-
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
6525+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6526+
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
6527+
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
6528+
; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
6529+
; GFX10-NEXT: v_and_b32_e32 v0, 3, v0
6530+
; GFX10-NEXT: flat_store_byte v[16:17], v0
65316531
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
6532-
; GFX10-NEXT: s_lshr_b32 s2, s12, 1
6533-
; GFX10-NEXT: v_mov_b32_e32 v0, s0
6534-
; GFX10-NEXT: s_and_b32 s2, s2, 1
6535-
; GFX10-NEXT: v_mov_b32_e32 v1, s1
6536-
; GFX10-NEXT: s_lshl_b32 s2, s2, 1
6537-
; GFX10-NEXT: v_mov_b32_e32 v2, s2
6538-
; GFX10-NEXT: flat_store_byte v[0:1], v2
6539-
; GFX10-NEXT: s_endpgm
6532+
; GFX10-NEXT: s_setpc_b64 s[30:31]
65406533
;
65416534
; GFX11-LABEL: insert_very_small_from_very_large:
65426535
; GFX11: ; %bb.0: ; %bb
6543-
; GFX11-NEXT: s_clause 0x1
6544-
; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x0
6545-
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x40
6536+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6537+
; GFX11-NEXT: v_lshrrev_b16 v0.l, 1, v0.l
6538+
; GFX11-NEXT: v_and_b16 v0.l, v0.l, 1
6539+
; GFX11-NEXT: v_lshlrev_b16 v0.l, 1, v0.l
6540+
; GFX11-NEXT: v_and_b32_e32 v0, 3, v0
6541+
; GFX11-NEXT: flat_store_b8 v[16:17], v0
65466542
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6547-
; GFX11-NEXT: s_lshr_b32 s2, s8, 1
6548-
; GFX11-NEXT: v_mov_b32_e32 v0, s0
6549-
; GFX11-NEXT: s_and_b32 s2, s2, 1
6550-
; GFX11-NEXT: v_mov_b32_e32 v1, s1
6551-
; GFX11-NEXT: s_lshl_b32 s2, s2, 1
6552-
; GFX11-NEXT: v_mov_b32_e32 v2, s2
6553-
; GFX11-NEXT: flat_store_b8 v[0:1], v2
6554-
; GFX11-NEXT: s_endpgm
6543+
; GFX11-NEXT: s_setpc_b64 s[30:31]
65556544
bb:
65566545
%a = bitcast <32 x i16> %L3 to i512
65576546
%b = trunc i512 %a to i8

llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2166,14 +2166,14 @@ define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) {
21662166
; GISEL-LABEL: load_v6i8:
21672167
; GISEL: ; %bb.0:
21682168
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169-
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4
21702169
; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0
2170+
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4
21712171
; GISEL-NEXT: s_waitcnt vmcnt(1)
2172-
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
2173-
; GISEL-NEXT: s_waitcnt vmcnt(0)
21742172
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
21752173
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
21762174
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
2175+
; GISEL-NEXT: s_waitcnt vmcnt(0)
2176+
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
21772177
; GISEL-NEXT: s_setpc_b64 s[30:31]
21782178
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
21792179
%ret = load <6 x i8>, ptr addrspace(7) %p
@@ -3630,10 +3630,10 @@ define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) {
36303630
; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 glc
36313631
; GISEL-NEXT: s_waitcnt vmcnt(1)
36323632
; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0
3633-
; GISEL-NEXT: s_waitcnt vmcnt(0)
3634-
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
36353633
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
36363634
; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0
3635+
; GISEL-NEXT: s_waitcnt vmcnt(0)
3636+
; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4
36373637
; GISEL-NEXT: s_setpc_b64 s[30:31]
36383638
%p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
36393639
%ret = load volatile <6 x i8>, ptr addrspace(7) %p

0 commit comments

Comments
 (0)