Skip to content

Commit 30a980e

Browse files
committed
Reorganize tests for adds and subs
1 parent f07c87c commit 30a980e

9 files changed

+709
-700
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,12 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
6666
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
6767
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
6868
; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0
69-
; IR-NEXT: br i1 [[TMP8]], label %[[TMP9:.*]], label %[[BB11:.*]]
70-
; IR: [[TMP9]]:
69+
; IR-NEXT: br i1 [[TMP8]], label %[[BB9:.*]], label %[[BB11:.*]]
70+
; IR: [[BB9]]:
7171
; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 [[TMP7]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
7272
; IR-NEXT: br label %[[BB11]]
7373
; IR: [[BB11]]:
74-
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], %[[TMP9]] ]
74+
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], %[[BB9]] ]
7575
; IR-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP12]])
7676
; IR-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP5]]
7777
; IR-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0)
@@ -162,12 +162,12 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
162162
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
163163
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
164164
; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0
165-
; IR-NEXT: br i1 [[TMP8]], label %[[TMP9:.*]], label %[[BB11:.*]]
166-
; IR: [[TMP9]]:
165+
; IR-NEXT: br i1 [[TMP8]], label %[[BB9:.*]], label %[[BB11:.*]]
166+
; IR: [[BB9]]:
167167
; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 [[TMP7]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
168168
; IR-NEXT: br label %[[BB11]]
169169
; IR: [[BB11]]:
170-
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], %[[TMP9]] ]
170+
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], %[[BB9]] ]
171171
; IR-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP12]])
172172
; IR-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], [[TMP5]]
173173
; IR-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32> [[ARG]], <4 x i32> [[ARG]], i32 [[TMP14]], i32 0, i32 0, i32 0)
@@ -261,12 +261,12 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
261261
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
262262
; IR-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 1
263263
; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], 0
264-
; IR-NEXT: br i1 [[TMP9]], label %[[TMP10:.*]], label %[[BB12:.*]]
265-
; IR: [[TMP10]]:
264+
; IR-NEXT: br i1 [[TMP9]], label %[[BB10:.*]], label %[[BB12:.*]]
265+
; IR: [[BB10]]:
266266
; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 [[TMP8]], <4 x i32> [[ARG]], i32 0, i32 0, i32 0, i32 0)
267267
; IR-NEXT: br label %[[BB12]]
268268
; IR: [[BB12]]:
269-
; IR-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP11]], %[[TMP10]] ]
269+
; IR-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP11]], %[[BB10]] ]
270270
; IR-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP13]])
271271
; IR-NEXT: [[TMP15:%.*]] = and i32 [[TMP5]], 1
272272
; IR-NEXT: [[TMP16:%.*]] = xor i32 [[TMP14]], [[TMP15]]
@@ -360,12 +360,12 @@ define amdgpu_cs void @atomic_ptr_add_and_format(ptr addrspace(8) inreg %arg) {
360360
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
361361
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
362362
; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0
363-
; IR-NEXT: br i1 [[TMP8]], label %[[TMP9:.*]], label %[[BB11:.*]]
364-
; IR: [[TMP9]]:
363+
; IR-NEXT: br i1 [[TMP8]], label %[[BB9:.*]], label %[[BB11:.*]]
364+
; IR: [[BB9]]:
365365
; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 [[TMP7]], ptr addrspace(8) [[ARG]], i32 0, i32 0, i32 0, i32 0)
366366
; IR-NEXT: br label %[[BB11]]
367367
; IR: [[BB11]]:
368-
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], %[[TMP9]] ]
368+
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], %[[BB9]] ]
369369
; IR-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP12]])
370370
; IR-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[TMP5]]
371371
; IR-NEXT: [[ARG_INT:%.*]] = ptrtoint ptr addrspace(8) [[ARG]] to i128
@@ -460,12 +460,12 @@ define amdgpu_cs void @atomic_ptr_sub_and_format(ptr addrspace(8) inreg %arg) {
460460
; IR-NEXT: [[TMP6:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0]])
461461
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
462462
; IR-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP5]], 0
463-
; IR-NEXT: br i1 [[TMP8]], label %[[TMP9:.*]], label %[[BB11:.*]]
464-
; IR: [[TMP9]]:
463+
; IR-NEXT: br i1 [[TMP8]], label %[[BB9:.*]], label %[[BB11:.*]]
464+
; IR: [[BB9]]:
465465
; IR-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub.i32(i32 [[TMP7]], ptr addrspace(8) [[ARG]], i32 0, i32 0, i32 0, i32 0)
466466
; IR-NEXT: br label %[[BB11]]
467467
; IR: [[BB11]]:
468-
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], %[[TMP9]] ]
468+
; IR-NEXT: [[TMP12:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP10]], %[[BB9]] ]
469469
; IR-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP12]])
470470
; IR-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], [[TMP5]]
471471
; IR-NEXT: [[ARG_INT:%.*]] = ptrtoint ptr addrspace(8) [[ARG]] to i128
@@ -563,12 +563,12 @@ define amdgpu_cs void @atomic_ptr_xor_and_format(ptr addrspace(8) inreg %arg) {
563563
; IR-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
564564
; IR-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 1
565565
; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], 0
566-
; IR-NEXT: br i1 [[TMP9]], label %[[TMP10:.*]], label %[[BB12:.*]]
567-
; IR: [[TMP10]]:
566+
; IR-NEXT: br i1 [[TMP9]], label %[[BB10:.*]], label %[[BB12:.*]]
567+
; IR: [[BB10]]:
568568
; IR-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32(i32 [[TMP8]], ptr addrspace(8) [[ARG]], i32 0, i32 0, i32 0, i32 0)
569569
; IR-NEXT: br label %[[BB12]]
570570
; IR: [[BB12]]:
571-
; IR-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP11]], %[[TMP10]] ]
571+
; IR-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[DOTENTRY:%.*]] ], [ [[TMP11]], %[[BB10]] ]
572572
; IR-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TMP13]])
573573
; IR-NEXT: [[TMP15:%.*]] = and i32 [[TMP5]], 1
574574
; IR-NEXT: [[TMP16:%.*]] = xor i32 [[TMP14]], [[TMP15]]
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
2+
3+
---
4+
name: uaddo_s32_ss
5+
legalized: true
6+
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0, $sgpr1
10+
; CHECK-LABEL: name: uaddo_s32_ss
11+
; CHECK: liveins: $sgpr0, $sgpr1
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
14+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
15+
; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s1) = G_UADDO [[COPY]], [[COPY1]]
16+
%0:_(s32) = COPY $sgpr0
17+
%1:_(s32) = COPY $sgpr1
18+
%2:_(s32), %3:_(s1) = G_UADDO %0, %1
19+
...
20+
21+
---
22+
name: uaddo_s32_sv
23+
legalized: true
24+
25+
body: |
26+
bb.0:
27+
liveins: $sgpr0, $vgpr1
28+
; CHECK-LABEL: name: uaddo_s32_sv
29+
; CHECK: liveins: $sgpr0, $vgpr1
30+
; CHECK-NEXT: {{ $}}
31+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
32+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
33+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
34+
; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]]
35+
%0:_(s32) = COPY $sgpr0
36+
%1:_(s32) = COPY $vgpr1
37+
%2:_(s32), %3:_(s1) = G_UADDO %0, %1
38+
...
39+
40+
---
41+
name: uaddo_s32_vs
42+
legalized: true
43+
44+
body: |
45+
bb.0:
46+
liveins: $vgpr0, $sgpr1
47+
; CHECK-LABEL: name: uaddo_s32_vs
48+
; CHECK: liveins: $vgpr0, $sgpr1
49+
; CHECK-NEXT: {{ $}}
50+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
51+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
52+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
53+
; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]]
54+
%0:_(s32) = COPY $vgpr0
55+
%1:_(s32) = COPY $sgpr1
56+
%2:_(s32), %3:_(s1) = G_UADDO %0, %1
57+
...
58+
59+
---
60+
name: uaddo_s32_vv
61+
legalized: true
62+
63+
body: |
64+
bb.0:
65+
liveins: $vgpr0, $vgpr1
66+
; CHECK-LABEL: name: uaddo_s32_vv
67+
; CHECK: liveins: $vgpr0, $vgpr1
68+
; CHECK-NEXT: {{ $}}
69+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
70+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
71+
; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]]
72+
%0:_(s32) = COPY $vgpr0
73+
%1:_(s32) = COPY $vgpr1
74+
%2:_(s32), %3:_(s1) = G_UADDO %0, %1
75+
...
76+
77+
---
78+
name: uadde_s32_ss
79+
legalized: true
80+
81+
body: |
82+
bb.0:
83+
liveins: $sgpr0, $sgpr1, $sgpr2
84+
; CHECK-LABEL: name: uadde_s32_ss
85+
; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
86+
; CHECK-NEXT: {{ $}}
87+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
88+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
89+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
90+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY2]](s32)
91+
; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s1) = G_UADDE [[COPY]], [[COPY1]], [[TRUNC]]
92+
%0:_(s32) = COPY $sgpr0
93+
%1:_(s32) = COPY $sgpr1
94+
%2:_(s32) = COPY $sgpr2
95+
%3:_(s1) = G_TRUNC %2
96+
%4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
97+
...
98+
99+
---
100+
name: uadde_s32_sv
101+
legalized: true
102+
103+
body: |
104+
bb.0:
105+
liveins: $sgpr0, $vgpr1, $sgpr2
106+
; CHECK-LABEL: name: uadde_s32_sv
107+
; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2
108+
; CHECK-NEXT: {{ $}}
109+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
110+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
111+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
112+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
113+
; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
114+
; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]]
115+
%0:_(s32) = COPY $sgpr0
116+
%1:_(s32) = COPY $vgpr1
117+
%2:_(s32) = COPY $sgpr2
118+
%3:_(s1) = G_TRUNC %2
119+
%4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
120+
...
121+
122+
---
123+
name: uadde_s32_vs
124+
legalized: true
125+
126+
body: |
127+
bb.0:
128+
liveins: $vgpr0, $sgpr1, $sgpr2
129+
; CHECK-LABEL: name: uadde_s32_vs
130+
; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2
131+
; CHECK-NEXT: {{ $}}
132+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
133+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
134+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
135+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
136+
; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
137+
; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]]
138+
%0:_(s32) = COPY $vgpr0
139+
%1:_(s32) = COPY $sgpr1
140+
%2:_(s32) = COPY $sgpr2
141+
%3:_(s1) = G_TRUNC %2
142+
%4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
143+
...
144+
145+
---
146+
name: uadde_s32_vv
147+
legalized: true
148+
149+
body: |
150+
bb.0:
151+
liveins: $vgpr0, $vgpr1, $vgpr2
152+
; CHECK-LABEL: name: uadde_s32_vv
153+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
154+
; CHECK-NEXT: {{ $}}
155+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
156+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
157+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
158+
; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
159+
; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]]
160+
; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
161+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]]
162+
; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]]
163+
%0:_(s32) = COPY $vgpr0
164+
%1:_(s32) = COPY $vgpr1
165+
%2:_(s32) = COPY $vgpr2
166+
%3:_(s1) = G_TRUNC %2
167+
%4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
168+
...

0 commit comments

Comments
 (0)