Skip to content

Commit 53af2e6

Browse files
authored
[AMDGPU][GISel] Add inbounds flag to FLAT GISel tests (#153000)
This is in preparation for a patch that disables folding offsets into FLAT instructions if the corresponding address computation is not inbounds, to avoid miscompilations where this would lead to wrong aperture check results. With the added inbounds flags for GEPs and G_PTR_ADDs affecting FLAT instructions, the outputs for these tests won't change. For SWDEV-516125.
1 parent 92f97cb commit 53af2e6

13 files changed

+94
-94
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,7 +1119,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #1
11191119
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
11201120
; GFX11-NEXT: flat_store_b32 v[0:1], v2
11211121
; GFX11-NEXT: s_endpgm
1122-
%gep = getelementptr i32, ptr %ptr, i32 4
1122+
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
11231123
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
11241124
store i32 %result, ptr %out, align 4
11251125
ret void
@@ -1218,7 +1218,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_system(ptr %out, ptr %
12181218
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
12191219
; GFX11-NEXT: flat_store_b32 v[0:1], v2
12201220
; GFX11-NEXT: s_endpgm
1221-
%gep = getelementptr i32, ptr %ptr, i32 4
1221+
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
12221222
%result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
12231223
store i32 %result, ptr %out, align 4
12241224
ret void
@@ -1384,7 +1384,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) #1 {
13841384
; GFX11-NEXT: buffer_gl1_inv
13851385
; GFX11-NEXT: buffer_gl0_inv
13861386
; GFX11-NEXT: s_endpgm
1387-
%gep = getelementptr i32, ptr %ptr, i32 4
1387+
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
13881388
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
13891389
ret void
13901390
}
@@ -1470,7 +1470,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_system(ptr %ptr) #1
14701470
; GFX11-NEXT: buffer_gl1_inv
14711471
; GFX11-NEXT: buffer_gl0_inv
14721472
; GFX11-NEXT: s_endpgm
1473-
%gep = getelementptr i32, ptr %ptr, i32 4
1473+
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
14741474
%result = atomicrmw udec_wrap ptr %gep, i32 42 seq_cst, align 4
14751475
ret void
14761476
}
@@ -1599,7 +1599,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %
15991599
%id = call i32 @llvm.amdgcn.workitem.id.x()
16001600
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
16011601
%out.gep = getelementptr i32, ptr %out, i32 %id
1602-
%gep = getelementptr i32, ptr %gep.tid, i32 5
1602+
%gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
16031603
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
16041604
store i32 %result, ptr %out.gep, align 4
16051605
ret void
@@ -1706,7 +1706,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #1
17061706
; GFX11-NEXT: s_endpgm
17071707
%id = call i32 @llvm.amdgcn.workitem.id.x()
17081708
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
1709-
%gep = getelementptr i32, ptr %gep.tid, i32 5
1709+
%gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
17101710
%result = atomicrmw udec_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
17111711
ret void
17121712
}
@@ -1926,7 +1926,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #1
19261926
; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
19271927
; GFX11-NEXT: flat_store_b64 v[2:3], v[0:1]
19281928
; GFX11-NEXT: s_endpgm
1929-
%gep = getelementptr i64, ptr %ptr, i32 4
1929+
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
19301930
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
19311931
store i64 %result, ptr %out, align 4
19321932
ret void
@@ -2102,7 +2102,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) #1 {
21022102
; GFX11-NEXT: buffer_gl1_inv
21032103
; GFX11-NEXT: buffer_gl0_inv
21042104
; GFX11-NEXT: s_endpgm
2105-
%gep = getelementptr i64, ptr %ptr, i32 4
2105+
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
21062106
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
21072107
ret void
21082108
}
@@ -2193,7 +2193,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_system(ptr %ptr) #1
21932193
; GFX11-NEXT: buffer_gl1_inv
21942194
; GFX11-NEXT: buffer_gl0_inv
21952195
; GFX11-NEXT: s_endpgm
2196-
%gep = getelementptr i64, ptr %ptr, i32 4
2196+
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
21972197
%result = atomicrmw udec_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
21982198
ret void
21992199
}
@@ -2333,7 +2333,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %
23332333
%id = call i32 @llvm.amdgcn.workitem.id.x()
23342334
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
23352335
%out.gep = getelementptr i64, ptr %out, i32 %id
2336-
%gep = getelementptr i64, ptr %gep.tid, i32 5
2336+
%gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
23372337
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
23382338
store i64 %result, ptr %out.gep, align 4
23392339
ret void
@@ -2444,7 +2444,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #1
24442444
; GFX11-NEXT: s_endpgm
24452445
%id = call i32 @llvm.amdgcn.workitem.id.x()
24462446
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
2447-
%gep = getelementptr i64, ptr %gep.tid, i32 5
2447+
%gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
24482448
%result = atomicrmw udec_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
24492449
ret void
24502450
}

llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2525,7 +2525,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #1
25252525
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
25262526
; GFX12-NEXT: flat_store_b32 v[0:1], v2
25272527
; GFX12-NEXT: s_endpgm
2528-
%gep = getelementptr i32, ptr %ptr, i32 4
2528+
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
25292529
%result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
25302530
store i32 %result, ptr %out, align 4
25312531
ret void
@@ -2639,7 +2639,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_system(ptr %out, ptr %
26392639
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
26402640
; GFX12-NEXT: flat_store_b32 v[0:1], v2
26412641
; GFX12-NEXT: s_endpgm
2642-
%gep = getelementptr i32, ptr %ptr, i32 4
2642+
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
26432643
%result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
26442644
store i32 %result, ptr %out, align 4
26452645
ret void
@@ -2827,7 +2827,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) #1 {
28272827
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
28282828
; GFX12-NEXT: global_inv scope:SCOPE_DEV
28292829
; GFX12-NEXT: s_endpgm
2830-
%gep = getelementptr i32, ptr %ptr, i32 4
2830+
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
28312831
%result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
28322832
ret void
28332833
}
@@ -2926,7 +2926,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_system(ptr %ptr) #1
29262926
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
29272927
; GFX12-NEXT: global_inv scope:SCOPE_SYS
29282928
; GFX12-NEXT: s_endpgm
2929-
%gep = getelementptr i32, ptr %ptr, i32 4
2929+
%gep = getelementptr inbounds i32, ptr %ptr, i32 4
29302930
%result = atomicrmw uinc_wrap ptr %gep, i32 42 seq_cst, align 4
29312931
ret void
29322932
}
@@ -3077,7 +3077,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %
30773077
%id = call i32 @llvm.amdgcn.workitem.id.x()
30783078
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
30793079
%out.gep = getelementptr i32, ptr %out, i32 %id
3080-
%gep = getelementptr i32, ptr %gep.tid, i32 5
3080+
%gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
30813081
%result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
30823082
store i32 %result, ptr %out.gep, align 4
30833083
ret void
@@ -3201,7 +3201,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #1
32013201
; GFX12-NEXT: s_endpgm
32023202
%id = call i32 @llvm.amdgcn.workitem.id.x()
32033203
%gep.tid = getelementptr i32, ptr %ptr, i32 %id
3204-
%gep = getelementptr i32, ptr %gep.tid, i32 5
3204+
%gep = getelementptr inbounds i32, ptr %gep.tid, i32 5
32053205
%result = atomicrmw uinc_wrap ptr %gep, i32 42 syncscope("agent") seq_cst, align 4
32063206
ret void
32073207
}
@@ -3571,7 +3571,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #1
35713571
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
35723572
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
35733573
; GFX12-NEXT: s_endpgm
3574-
%gep = getelementptr i64, ptr %ptr, i32 4
3574+
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
35753575
%result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
35763576
store i64 %result, ptr %out, align 4
35773577
ret void
@@ -3701,7 +3701,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_system(ptr %out, ptr %
37013701
; GFX12-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
37023702
; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1]
37033703
; GFX12-NEXT: s_endpgm
3704-
%gep = getelementptr i64, ptr %ptr, i32 4
3704+
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
37053705
%result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
37063706
store i64 %result, ptr %out, align 4
37073707
ret void
@@ -3901,7 +3901,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) #1 {
39013901
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
39023902
; GFX12-NEXT: global_inv scope:SCOPE_DEV
39033903
; GFX12-NEXT: s_endpgm
3904-
%gep = getelementptr i64, ptr %ptr, i32 4
3904+
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
39053905
%result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
39063906
ret void
39073907
}
@@ -4006,7 +4006,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_system(ptr %ptr) #1
40064006
; GFX12-NEXT: s_wait_storecnt_dscnt 0x0
40074007
; GFX12-NEXT: global_inv scope:SCOPE_SYS
40084008
; GFX12-NEXT: s_endpgm
4009-
%gep = getelementptr i64, ptr %ptr, i32 4
4009+
%gep = getelementptr inbounds i64, ptr %ptr, i32 4
40104010
%result = atomicrmw uinc_wrap ptr %gep, i64 42 seq_cst, align 8, !noalias.addrspace !0
40114011
ret void
40124012
}
@@ -4169,7 +4169,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %
41694169
%id = call i32 @llvm.amdgcn.workitem.id.x()
41704170
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
41714171
%out.gep = getelementptr i64, ptr %out, i32 %id
4172-
%gep = getelementptr i64, ptr %gep.tid, i32 5
4172+
%gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
41734173
%result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
41744174
store i64 %result, ptr %out.gep, align 4
41754175
ret void
@@ -4297,7 +4297,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #1
42974297
; GFX12-NEXT: s_endpgm
42984298
%id = call i32 @llvm.amdgcn.workitem.id.x()
42994299
%gep.tid = getelementptr i64, ptr %ptr, i32 %id
4300-
%gep = getelementptr i64, ptr %gep.tid, i32 5
4300+
%gep = getelementptr inbounds i64, ptr %gep.tid, i32 5
43014301
%result = atomicrmw uinc_wrap ptr %gep, i64 42 syncscope("agent") seq_cst, align 8, !noalias.addrspace !0
43024302
ret void
43034303
}

llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx942.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half>
108108
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
109109
; GFX942-NEXT: buffer_inv sc1
110110
; GFX942-NEXT: s_setpc_b64 s[30:31]
111-
%gep = getelementptr <2 x half>, ptr %ptr, i32 256
111+
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
112112
%result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
113113
ret <2 x half> %result
114114
}
@@ -122,7 +122,7 @@ define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val
122122
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
123123
; GFX942-NEXT: buffer_inv sc1
124124
; GFX942-NEXT: s_setpc_b64 s[30:31]
125-
%gep = getelementptr <2 x half>, ptr %ptr, i32 256
125+
%gep = getelementptr inbounds <2 x half>, ptr %ptr, i32 256
126126
%unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
127127
ret void
128128
}

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ body: |
153153
%2:vgpr(s32) = COPY $vgpr3
154154
%3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
155155
%4:vgpr(s64) = G_CONSTANT i64 4
156-
%5:vgpr(p0) = G_PTR_ADD %0, %4
156+
%5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
157157
%6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0)
158158
$vgpr0 = COPY %6
159159
@@ -305,7 +305,7 @@ body: |
305305
%2:vgpr(s64) = COPY $vgpr4_vgpr5
306306
%3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
307307
%4:vgpr(s64) = G_CONSTANT i64 4
308-
%5:vgpr(p0) = G_PTR_ADD %0, %4
308+
%5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
309309
%6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 0)
310310
$vgpr0_vgpr1 = COPY %6
311311
@@ -406,7 +406,7 @@ body: |
406406
%2:vgpr(s32) = COPY $vgpr3
407407
%3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
408408
%4:vgpr(s64) = G_CONSTANT i64 -4
409-
%5:vgpr(p0) = G_PTR_ADD %0, %4
409+
%5:vgpr(p0) = inbounds G_PTR_ADD %0, %4
410410
%6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0)
411411
$vgpr0 = COPY %6
412412

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ body: |
101101
%0:vgpr(p0) = COPY $vgpr0_vgpr1
102102
%1:vgpr(s32) = COPY $vgpr2
103103
%2:vgpr(s64) = G_CONSTANT i64 2047
104-
%3:vgpr(p0) = G_PTR_ADD %0, %2
104+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
105105
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
106106
$vgpr0 = COPY %4
107107
@@ -155,7 +155,7 @@ body: |
155155
%0:vgpr(p0) = COPY $vgpr0_vgpr1
156156
%1:vgpr(s32) = COPY $vgpr2
157157
%2:vgpr(s64) = G_CONSTANT i64 2047
158-
%3:vgpr(p0) = G_PTR_ADD %0, %2
158+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
159159
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
160160
161161
...
@@ -211,7 +211,7 @@ body: |
211211
%0:vgpr(p0) = COPY $vgpr0_vgpr1
212212
%1:vgpr(s32) = COPY $vgpr2
213213
%2:vgpr(s64) = G_CONSTANT i64 2048
214-
%3:vgpr(p0) = G_PTR_ADD %0, %2
214+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
215215
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
216216
$vgpr0 = COPY %4
217217
@@ -265,7 +265,7 @@ body: |
265265
%0:vgpr(p0) = COPY $vgpr0_vgpr1
266266
%1:vgpr(s32) = COPY $vgpr2
267267
%2:vgpr(s64) = G_CONSTANT i64 2048
268-
%3:vgpr(p0) = G_PTR_ADD %0, %2
268+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
269269
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
270270
271271
...
@@ -321,7 +321,7 @@ body: |
321321
%0:vgpr(p0) = COPY $vgpr0_vgpr1
322322
%1:vgpr(s32) = COPY $vgpr2
323323
%2:vgpr(s64) = G_CONSTANT i64 4095
324-
%3:vgpr(p0) = G_PTR_ADD %0, %2
324+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
325325
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
326326
$vgpr0 = COPY %4
327327
@@ -375,7 +375,7 @@ body: |
375375
%0:vgpr(p0) = COPY $vgpr0_vgpr1
376376
%1:vgpr(s32) = COPY $vgpr2
377377
%2:vgpr(s64) = G_CONSTANT i64 4095
378-
%3:vgpr(p0) = G_PTR_ADD %0, %2
378+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
379379
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
380380
381381
...
@@ -463,7 +463,7 @@ body: |
463463
%0:vgpr(p0) = COPY $vgpr0_vgpr1
464464
%1:vgpr(s32) = COPY $vgpr2
465465
%2:vgpr(s64) = G_CONSTANT i64 4097
466-
%3:vgpr(p0) = G_PTR_ADD %0, %2
466+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
467467
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
468468
$vgpr0 = COPY %4
469469
@@ -547,7 +547,7 @@ body: |
547547
%0:vgpr(p0) = COPY $vgpr0_vgpr1
548548
%1:vgpr(s32) = COPY $vgpr2
549549
%2:vgpr(s64) = G_CONSTANT i64 4097
550-
%3:vgpr(p0) = G_PTR_ADD %0, %2
550+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
551551
%4:vgpr(s32) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s32), addrspace 0)
552552
553553
...
@@ -647,7 +647,7 @@ body: |
647647
%0:vgpr(p0) = COPY $vgpr0_vgpr1
648648
%1:vgpr(s64) = COPY $vgpr2_vgpr3
649649
%2:vgpr(s64) = G_CONSTANT i64 4095
650-
%3:vgpr(p0) = G_PTR_ADD %0, %2
650+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
651651
%4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0)
652652
$vgpr0_vgpr1 = COPY %4
653653
@@ -701,7 +701,7 @@ body: |
701701
%0:vgpr(p0) = COPY $vgpr0_vgpr1
702702
%1:vgpr(s64) = COPY $vgpr2_vgpr3
703703
%2:vgpr(s64) = G_CONSTANT i64 4095
704-
%3:vgpr(p0) = G_PTR_ADD %0, %2
704+
%3:vgpr(p0) = inbounds G_PTR_ADD %0, %2
705705
%4:vgpr(s64) = G_ATOMICRMW_ADD %3, %1 :: (load store seq_cst (s64), addrspace 0)
706706
707707
...

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ body: |
492492
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
493493
%0:vgpr(p0) = COPY $vgpr0_vgpr1
494494
%1:vgpr(s64) = G_CONSTANT i64 -2048
495-
%2:vgpr(p0) = G_PTR_ADD %0, %1
495+
%2:vgpr(p0) = inbounds G_PTR_ADD %0, %1
496496
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0)
497497
$vgpr0 = COPY %3
498498
@@ -561,7 +561,7 @@ body: |
561561
; GFX12-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
562562
%0:vgpr(p0) = COPY $vgpr0_vgpr1
563563
%1:vgpr(s64) = G_CONSTANT i64 4095
564-
%2:vgpr(p0) = G_PTR_ADD %0, %1
564+
%2:vgpr(p0) = inbounds G_PTR_ADD %0, %1
565565
%3:vgpr(s32) = G_LOAD %2 :: (load seq_cst (s32), align 4, addrspace 0)
566566
$vgpr0 = COPY %3
567567

0 commit comments

Comments
 (0)