Skip to content

Commit c0637c6

Browse files
committed
factor out intrinsics update change to another PR
1 parent 1d29c84 commit c0637c6

File tree

1 file changed

+25
-75
lines changed

1 file changed

+25
-75
lines changed

llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll

Lines changed: 25 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -4,56 +4,6 @@
44

55
target triple = "nvptx64-nvidia-cuda"
66

7-
declare ptr addrspace(7) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3), i32)
8-
declare i1 @llvm.nvvm.isspacep.shared.cluster(ptr)
9-
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
10-
declare ptr @llvm.nvvm.mapa(ptr, i32)
11-
12-
; Common setup for distributed shared memory cluster addressing
13-
define i32 @test_distributed_shared_cluster_common(ptr %ptr, ptr addrspace(3) %smem_ptr) local_unnamed_addr {
14-
; CHECK-LABEL: test_distributed_shared_cluster_common(
15-
; CHECK: {
16-
; CHECK-NEXT: .reg .pred %p<3>;
17-
; CHECK-NEXT: .reg .b32 %r<8>;
18-
; CHECK-NEXT: .reg .b64 %rd<5>;
19-
; CHECK-EMPTY:
20-
; CHECK-NEXT: // %bb.0: // %entry
21-
; CHECK-NEXT: ld.param.u64 %rd1, [test_distributed_shared_cluster_common_param_0];
22-
; CHECK-NEXT: ld.param.u64 %rd2, [test_distributed_shared_cluster_common_param_1];
23-
; CHECK-NEXT: mov.u32 %r1, %ctaid.x;
24-
; CHECK-NEXT: xor.b32 %r2, %r1, 1;
25-
; CHECK-NEXT: isspacep.shared::cluster %p1, %rd1;
26-
; CHECK-NEXT: mapa.u64 %rd3, %rd1, %r2;
27-
; CHECK-NEXT: isspacep.shared::cluster %p2, %rd3;
28-
; CHECK-NEXT: mapa.shared::cluster.u64 %rd4, %rd2, %r2;
29-
; CHECK-NEXT: ld.shared::cluster.u32 %r3, [%rd4];
30-
; CHECK-NEXT: add.s32 %r4, %r3, 42;
31-
; CHECK-NEXT: st.shared::cluster.u32 [%rd4], %r4;
32-
; CHECK-NEXT: selp.b32 %r5, 1, 0, %p1;
33-
; CHECK-NEXT: selp.b32 %r6, 1, 0, %p2;
34-
; CHECK-NEXT: add.s32 %r7, %r5, %r6;
35-
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
36-
; CHECK-NEXT: ret;
37-
entry:
38-
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
39-
%1 = xor i32 %0, 1
40-
%2 = tail call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %ptr)
41-
%3 = tail call ptr @llvm.nvvm.mapa(ptr %ptr, i32 %1)
42-
%4 = tail call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %3)
43-
%dsmem_ptr = call ptr addrspace(7) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %smem_ptr, i32 %1)
44-
45-
; Add load and store to the distributed shared memory cluster
46-
%loaded_val = load i32, ptr addrspace(7) %dsmem_ptr
47-
%updated_val = add i32 %loaded_val, 42
48-
store i32 %updated_val, ptr addrspace(7) %dsmem_ptr
49-
50-
; Return value preserves the isspacep test results plus the value operation
51-
%5 = zext i1 %2 to i32
52-
%6 = zext i1 %4 to i32
53-
%ret = add i32 %5, %6
54-
ret i32 %ret
55-
}
56-
577
; Floating point atomic operations tests
588
define void @test_distributed_shared_cluster_float_atomic(ptr addrspace(7) %dsmem_ptr) local_unnamed_addr {
599
; CHECK-LABEL: test_distributed_shared_cluster_float_atomic(
@@ -207,84 +157,84 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
207157
; CHECK-NEXT: shl.b32 %r3, %r37, %r1;
208158
; CHECK-NEXT: ld.shared::cluster.u32 %r38, [%rd1];
209159
; CHECK-NEXT: and.b32 %r48, %r38, %r2;
210-
; CHECK-NEXT: $L__BB4_1: // %partword.cmpxchg.loop33
160+
; CHECK-NEXT: $L__BB3_1: // %partword.cmpxchg.loop33
211161
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
212162
; CHECK-NEXT: or.b32 %r39, %r48, %r3;
213163
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r6, [%rd1], %r39, %r48;
214164
; CHECK-NEXT: setp.eq.s32 %p1, %r6, %r39;
215-
; CHECK-NEXT: @%p1 bra $L__BB4_3;
165+
; CHECK-NEXT: @%p1 bra $L__BB3_3;
216166
; CHECK-NEXT: // %bb.2: // %partword.cmpxchg.failure32
217-
; CHECK-NEXT: // in Loop: Header=BB4_1 Depth=1
167+
; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1
218168
; CHECK-NEXT: and.b32 %r7, %r6, %r2;
219169
; CHECK-NEXT: setp.ne.s32 %p2, %r48, %r7;
220170
; CHECK-NEXT: mov.b32 %r48, %r7;
221-
; CHECK-NEXT: @%p2 bra $L__BB4_1;
222-
; CHECK-NEXT: $L__BB4_3: // %partword.cmpxchg.end31
171+
; CHECK-NEXT: @%p2 bra $L__BB3_1;
172+
; CHECK-NEXT: $L__BB3_3: // %partword.cmpxchg.end31
223173
; CHECK-NEXT: ld.shared::cluster.u32 %r40, [%rd1];
224174
; CHECK-NEXT: and.b32 %r49, %r40, %r2;
225-
; CHECK-NEXT: $L__BB4_4: // %partword.cmpxchg.loop23
175+
; CHECK-NEXT: $L__BB3_4: // %partword.cmpxchg.loop23
226176
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
227177
; CHECK-NEXT: or.b32 %r41, %r49, %r3;
228178
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r10, [%rd1], %r41, %r49;
229179
; CHECK-NEXT: setp.eq.s32 %p3, %r10, %r41;
230-
; CHECK-NEXT: @%p3 bra $L__BB4_6;
180+
; CHECK-NEXT: @%p3 bra $L__BB3_6;
231181
; CHECK-NEXT: // %bb.5: // %partword.cmpxchg.failure22
232-
; CHECK-NEXT: // in Loop: Header=BB4_4 Depth=1
182+
; CHECK-NEXT: // in Loop: Header=BB3_4 Depth=1
233183
; CHECK-NEXT: and.b32 %r11, %r10, %r2;
234184
; CHECK-NEXT: setp.ne.s32 %p4, %r49, %r11;
235185
; CHECK-NEXT: mov.b32 %r49, %r11;
236-
; CHECK-NEXT: @%p4 bra $L__BB4_4;
237-
; CHECK-NEXT: $L__BB4_6: // %partword.cmpxchg.end21
186+
; CHECK-NEXT: @%p4 bra $L__BB3_4;
187+
; CHECK-NEXT: $L__BB3_6: // %partword.cmpxchg.end21
238188
; CHECK-NEXT: fence.acq_rel.sys;
239189
; CHECK-NEXT: fence.acq_rel.sys;
240190
; CHECK-NEXT: ld.shared::cluster.u32 %r42, [%rd1];
241191
; CHECK-NEXT: and.b32 %r50, %r42, %r2;
242-
; CHECK-NEXT: $L__BB4_7: // %partword.cmpxchg.loop13
192+
; CHECK-NEXT: $L__BB3_7: // %partword.cmpxchg.loop13
243193
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
244194
; CHECK-NEXT: or.b32 %r43, %r50, %r3;
245195
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r14, [%rd1], %r43, %r50;
246196
; CHECK-NEXT: setp.eq.s32 %p5, %r14, %r43;
247-
; CHECK-NEXT: @%p5 bra $L__BB4_9;
197+
; CHECK-NEXT: @%p5 bra $L__BB3_9;
248198
; CHECK-NEXT: // %bb.8: // %partword.cmpxchg.failure12
249-
; CHECK-NEXT: // in Loop: Header=BB4_7 Depth=1
199+
; CHECK-NEXT: // in Loop: Header=BB3_7 Depth=1
250200
; CHECK-NEXT: and.b32 %r15, %r14, %r2;
251201
; CHECK-NEXT: setp.ne.s32 %p6, %r50, %r15;
252202
; CHECK-NEXT: mov.b32 %r50, %r15;
253-
; CHECK-NEXT: @%p6 bra $L__BB4_7;
254-
; CHECK-NEXT: $L__BB4_9: // %partword.cmpxchg.end11
203+
; CHECK-NEXT: @%p6 bra $L__BB3_7;
204+
; CHECK-NEXT: $L__BB3_9: // %partword.cmpxchg.end11
255205
; CHECK-NEXT: fence.acq_rel.sys;
256206
; CHECK-NEXT: ld.shared::cluster.u32 %r44, [%rd1];
257207
; CHECK-NEXT: and.b32 %r51, %r44, %r2;
258-
; CHECK-NEXT: $L__BB4_10: // %partword.cmpxchg.loop3
208+
; CHECK-NEXT: $L__BB3_10: // %partword.cmpxchg.loop3
259209
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
260210
; CHECK-NEXT: or.b32 %r45, %r51, %r3;
261211
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r18, [%rd1], %r45, %r51;
262212
; CHECK-NEXT: setp.eq.s32 %p7, %r18, %r45;
263-
; CHECK-NEXT: @%p7 bra $L__BB4_12;
213+
; CHECK-NEXT: @%p7 bra $L__BB3_12;
264214
; CHECK-NEXT: // %bb.11: // %partword.cmpxchg.failure2
265-
; CHECK-NEXT: // in Loop: Header=BB4_10 Depth=1
215+
; CHECK-NEXT: // in Loop: Header=BB3_10 Depth=1
266216
; CHECK-NEXT: and.b32 %r19, %r18, %r2;
267217
; CHECK-NEXT: setp.ne.s32 %p8, %r51, %r19;
268218
; CHECK-NEXT: mov.b32 %r51, %r19;
269-
; CHECK-NEXT: @%p8 bra $L__BB4_10;
270-
; CHECK-NEXT: $L__BB4_12: // %partword.cmpxchg.end1
219+
; CHECK-NEXT: @%p8 bra $L__BB3_10;
220+
; CHECK-NEXT: $L__BB3_12: // %partword.cmpxchg.end1
271221
; CHECK-NEXT: fence.acq_rel.sys;
272222
; CHECK-NEXT: fence.sc.sys;
273223
; CHECK-NEXT: ld.shared::cluster.u32 %r46, [%rd1];
274224
; CHECK-NEXT: and.b32 %r52, %r46, %r2;
275-
; CHECK-NEXT: $L__BB4_13: // %partword.cmpxchg.loop
225+
; CHECK-NEXT: $L__BB3_13: // %partword.cmpxchg.loop
276226
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
277227
; CHECK-NEXT: or.b32 %r47, %r52, %r3;
278228
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r22, [%rd1], %r47, %r52;
279229
; CHECK-NEXT: setp.eq.s32 %p9, %r22, %r47;
280-
; CHECK-NEXT: @%p9 bra $L__BB4_15;
230+
; CHECK-NEXT: @%p9 bra $L__BB3_15;
281231
; CHECK-NEXT: // %bb.14: // %partword.cmpxchg.failure
282-
; CHECK-NEXT: // in Loop: Header=BB4_13 Depth=1
232+
; CHECK-NEXT: // in Loop: Header=BB3_13 Depth=1
283233
; CHECK-NEXT: and.b32 %r23, %r22, %r2;
284234
; CHECK-NEXT: setp.ne.s32 %p10, %r52, %r23;
285235
; CHECK-NEXT: mov.b32 %r52, %r23;
286-
; CHECK-NEXT: @%p10 bra $L__BB4_13;
287-
; CHECK-NEXT: $L__BB4_15: // %partword.cmpxchg.end
236+
; CHECK-NEXT: @%p10 bra $L__BB3_13;
237+
; CHECK-NEXT: $L__BB3_15: // %partword.cmpxchg.end
288238
; CHECK-NEXT: fence.acq_rel.sys;
289239
; CHECK-NEXT: ret;
290240
entry:

0 commit comments

Comments
 (0)