Skip to content

Commit 63e7766

Browse files
authored
[SystemZ] Allow forming overflow op for i128 (#153557)
Allow matching i128 overflow pattern into UADDO, which then allows use of vaccq.
1 parent cba5f1b commit 63e7766

File tree

3 files changed

+225
-49
lines changed

3 files changed

+225
-49
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ class SystemZTargetLowering : public TargetLowering {
523523
bool MathUsed) const override {
524524
// Form add and sub with overflow intrinsics regardless of any extra
525525
// users of the math result.
526-
return VT == MVT::i32 || VT == MVT::i64;
526+
return VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i128;
527527
}
528528

529529
bool shouldConsiderGEPOffsetSplit() const override { return true; }

llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -363,10 +363,11 @@ define i128 @atomicrmw_uinc_wrap(ptr %src, i128 %b) {
363363
define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) {
364364
; CHECK-LABEL: atomicrmw_udec_wrap:
365365
; CHECK: # %bb.0:
366+
; CHECK-NEXT: larl %r1, .LCPI12_0
366367
; CHECK-NEXT: vl %v0, 0(%r4), 3
367368
; CHECK-NEXT: vl %v3, 0(%r3), 4
368-
; CHECK-NEXT: vgbm %v1, 65535
369-
; CHECK-NEXT: vgbm %v2, 0
369+
; CHECK-NEXT: vl %v1, 0(%r1), 3
370+
; CHECK-NEXT: vgbm %v2, 65535
370371
; CHECK-NEXT: j .LBB12_2
371372
; CHECK-NEXT: .LBB12_1: # %atomicrmw.start
372373
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
@@ -379,6 +380,9 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) {
379380
; CHECK-NEXT: je .LBB12_8
380381
; CHECK-NEXT: .LBB12_2: # %atomicrmw.start
381382
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
383+
; CHECK-NEXT: vscbiq %v4, %v3, %v1
384+
; CHECK-NEXT: vlgvf %r0, %v4, 3
385+
; CHECK-NEXT: xilf %r0, 1
382386
; CHECK-NEXT: veclg %v0, %v3
383387
; CHECK-NEXT: jlh .LBB12_4
384388
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
@@ -390,12 +394,11 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) {
390394
; CHECK-NEXT: jl .LBB12_6
391395
; CHECK-NEXT: # %bb.5: # %atomicrmw.start
392396
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
393-
; CHECK-NEXT: vaq %v4, %v3, %v1
397+
; CHECK-NEXT: vaq %v4, %v3, %v2
394398
; CHECK-NEXT: .LBB12_6: # %atomicrmw.start
395399
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
396-
; CHECK-NEXT: vceqgs %v5, %v3, %v2
397400
; CHECK-NEXT: vlr %v5, %v0
398-
; CHECK-NEXT: je .LBB12_1
401+
; CHECK-NEXT: cijlh %r0, 0, .LBB12_1
399402
; CHECK-NEXT: # %bb.7: # %atomicrmw.start
400403
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
401404
; CHECK-NEXT: vlr %v5, %v4
Lines changed: 216 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,260 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; Test usage of VACC/VSCBI.
33
;
4-
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
4+
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=BASELINE
5+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s --check-prefix=Z13
56

67
define i128 @i128_subc_1(i128 %a, i128 %b) unnamed_addr {
7-
; CHECK-LABEL: i128_subc_1:
8-
; CHECK: # %bb.0:
9-
; CHECK-NEXT: vl %v0, 0(%r4), 3
10-
; CHECK-NEXT: vl %v1, 0(%r3), 3
11-
; CHECK-NEXT: vscbiq %v0, %v1, %v0
12-
; CHECK-NEXT: vst %v0, 0(%r2), 3
13-
; CHECK-NEXT: br %r14
8+
; BASELINE-LABEL: i128_subc_1:
9+
; BASELINE: # %bb.0:
10+
; BASELINE-NEXT: stmg %r14, %r15, 112(%r15)
11+
; BASELINE-NEXT: .cfi_offset %r14, -48
12+
; BASELINE-NEXT: .cfi_offset %r15, -40
13+
; BASELINE-NEXT: lg %r5, 0(%r4)
14+
; BASELINE-NEXT: lg %r14, 0(%r3)
15+
; BASELINE-NEXT: lg %r1, 8(%r3)
16+
; BASELINE-NEXT: clgr %r14, %r5
17+
; BASELINE-NEXT: ipm %r0
18+
; BASELINE-NEXT: clg %r1, 8(%r4)
19+
; BASELINE-NEXT: ipm %r1
20+
; BASELINE-NEXT: cgrjlh %r14, %r5, .LBB0_2
21+
; BASELINE-NEXT: # %bb.1:
22+
; BASELINE-NEXT: xilf %r1, 4294967295
23+
; BASELINE-NEXT: risbg %r0, %r1, 63, 191, 36
24+
; BASELINE-NEXT: j .LBB0_3
25+
; BASELINE-NEXT: .LBB0_2:
26+
; BASELINE-NEXT: xilf %r0, 4294967295
27+
; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 36
28+
; BASELINE-NEXT: .LBB0_3:
29+
; BASELINE-NEXT: llgfr %r0, %r0
30+
; BASELINE-NEXT: stg %r0, 8(%r2)
31+
; BASELINE-NEXT: mvghi 0(%r2), 0
32+
; BASELINE-NEXT: lmg %r14, %r15, 112(%r15)
33+
; BASELINE-NEXT: br %r14
34+
;
35+
; Z13-LABEL: i128_subc_1:
36+
; Z13: # %bb.0:
37+
; Z13-NEXT: vl %v0, 0(%r4), 3
38+
; Z13-NEXT: vl %v1, 0(%r3), 3
39+
; Z13-NEXT: vscbiq %v0, %v1, %v0
40+
; Z13-NEXT: vst %v0, 0(%r2), 3
41+
; Z13-NEXT: br %r14
1442
%cmp = icmp uge i128 %a, %b
1543
%ext = zext i1 %cmp to i128
1644
ret i128 %ext
1745
}
1846

1947
define i128 @i128_subc_2(i128 %a, i128 %b) unnamed_addr {
20-
; CHECK-LABEL: i128_subc_2:
21-
; CHECK: # %bb.0:
22-
; CHECK-NEXT: vl %v0, 0(%r3), 3
23-
; CHECK-NEXT: vl %v1, 0(%r4), 3
24-
; CHECK-NEXT: vscbiq %v0, %v1, %v0
25-
; CHECK-NEXT: vst %v0, 0(%r2), 3
26-
; CHECK-NEXT: br %r14
48+
; BASELINE-LABEL: i128_subc_2:
49+
; BASELINE: # %bb.0:
50+
; BASELINE-NEXT: stmg %r14, %r15, 112(%r15)
51+
; BASELINE-NEXT: .cfi_offset %r14, -48
52+
; BASELINE-NEXT: .cfi_offset %r15, -40
53+
; BASELINE-NEXT: lg %r5, 0(%r4)
54+
; BASELINE-NEXT: lg %r14, 0(%r3)
55+
; BASELINE-NEXT: lg %r0, 8(%r3)
56+
; BASELINE-NEXT: clgr %r14, %r5
57+
; BASELINE-NEXT: ipm %r1
58+
; BASELINE-NEXT: clg %r0, 8(%r4)
59+
; BASELINE-NEXT: ipm %r0
60+
; BASELINE-NEXT: cgrjlh %r14, %r5, .LBB1_2
61+
; BASELINE-NEXT: # %bb.1:
62+
; BASELINE-NEXT: afi %r0, -536870912
63+
; BASELINE-NEXT: srl %r0, 31
64+
; BASELINE-NEXT: j .LBB1_3
65+
; BASELINE-NEXT: .LBB1_2:
66+
; BASELINE-NEXT: afi %r1, -536870912
67+
; BASELINE-NEXT: srl %r1, 31
68+
; BASELINE-NEXT: lr %r0, %r1
69+
; BASELINE-NEXT: .LBB1_3:
70+
; BASELINE-NEXT: llgfr %r0, %r0
71+
; BASELINE-NEXT: stg %r0, 8(%r2)
72+
; BASELINE-NEXT: mvghi 0(%r2), 0
73+
; BASELINE-NEXT: lmg %r14, %r15, 112(%r15)
74+
; BASELINE-NEXT: br %r14
75+
;
76+
; Z13-LABEL: i128_subc_2:
77+
; Z13: # %bb.0:
78+
; Z13-NEXT: vl %v0, 0(%r3), 3
79+
; Z13-NEXT: vl %v1, 0(%r4), 3
80+
; Z13-NEXT: vscbiq %v0, %v1, %v0
81+
; Z13-NEXT: vst %v0, 0(%r2), 3
82+
; Z13-NEXT: br %r14
2783
%cmp = icmp ule i128 %a, %b
2884
%ext = zext i1 %cmp to i128
2985
ret i128 %ext
3086
}
3187

3288
define i128 @i128_addc_1(i128 %a, i128 %b) {
33-
; CHECK-LABEL: i128_addc_1:
34-
; CHECK: # %bb.0:
35-
; CHECK-NEXT: vl %v0, 0(%r4), 3
36-
; CHECK-NEXT: vl %v1, 0(%r3), 3
37-
; CHECK-NEXT: vaccq %v0, %v1, %v0
38-
; CHECK-NEXT: vst %v0, 0(%r2), 3
39-
; CHECK-NEXT: br %r14
89+
; BASELINE-LABEL: i128_addc_1:
90+
; BASELINE: # %bb.0:
91+
; BASELINE-NEXT: lg %r0, 8(%r3)
92+
; BASELINE-NEXT: lg %r1, 0(%r3)
93+
; BASELINE-NEXT: alg %r0, 8(%r4)
94+
; BASELINE-NEXT: alcg %r1, 0(%r4)
95+
; BASELINE-NEXT: ipm %r0
96+
; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35
97+
; BASELINE-NEXT: stg %r0, 8(%r2)
98+
; BASELINE-NEXT: mvghi 0(%r2), 0
99+
; BASELINE-NEXT: br %r14
100+
;
101+
; Z13-LABEL: i128_addc_1:
102+
; Z13: # %bb.0:
103+
; Z13-NEXT: vl %v0, 0(%r4), 3
104+
; Z13-NEXT: vl %v1, 0(%r3), 3
105+
; Z13-NEXT: vaccq %v0, %v1, %v0
106+
; Z13-NEXT: vst %v0, 0(%r2), 3
107+
; Z13-NEXT: br %r14
40108
%sum = add i128 %a, %b
41109
%cmp = icmp ult i128 %sum, %a
42110
%ext = zext i1 %cmp to i128
43111
ret i128 %ext
44112
}
45113

46114
define i128 @i128_addc_2(i128 %a, i128 %b) {
47-
; CHECK-LABEL: i128_addc_2:
48-
; CHECK: # %bb.0:
49-
; CHECK-NEXT: vl %v0, 0(%r4), 3
50-
; CHECK-NEXT: vl %v1, 0(%r3), 3
51-
; CHECK-NEXT: vaccq %v0, %v1, %v0
52-
; CHECK-NEXT: vst %v0, 0(%r2), 3
53-
; CHECK-NEXT: br %r14
115+
; BASELINE-LABEL: i128_addc_2:
116+
; BASELINE: # %bb.0:
117+
; BASELINE-NEXT: lg %r0, 8(%r3)
118+
; BASELINE-NEXT: lg %r1, 0(%r3)
119+
; BASELINE-NEXT: alg %r0, 8(%r4)
120+
; BASELINE-NEXT: alcg %r1, 0(%r4)
121+
; BASELINE-NEXT: ipm %r0
122+
; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35
123+
; BASELINE-NEXT: stg %r0, 8(%r2)
124+
; BASELINE-NEXT: mvghi 0(%r2), 0
125+
; BASELINE-NEXT: br %r14
126+
;
127+
; Z13-LABEL: i128_addc_2:
128+
; Z13: # %bb.0:
129+
; Z13-NEXT: vl %v0, 0(%r4), 3
130+
; Z13-NEXT: vl %v1, 0(%r3), 3
131+
; Z13-NEXT: vaccq %v0, %v1, %v0
132+
; Z13-NEXT: vst %v0, 0(%r2), 3
133+
; Z13-NEXT: br %r14
54134
%sum = add i128 %a, %b
55135
%cmp = icmp ult i128 %sum, %b
56136
%ext = zext i1 %cmp to i128
57137
ret i128 %ext
58138
}
59139

60140
define i128 @i128_addc_3(i128 %a, i128 %b) {
61-
; CHECK-LABEL: i128_addc_3:
62-
; CHECK: # %bb.0:
63-
; CHECK-NEXT: vl %v0, 0(%r4), 3
64-
; CHECK-NEXT: vl %v1, 0(%r3), 3
65-
; CHECK-NEXT: vaccq %v0, %v1, %v0
66-
; CHECK-NEXT: vst %v0, 0(%r2), 3
67-
; CHECK-NEXT: br %r14
141+
; BASELINE-LABEL: i128_addc_3:
142+
; BASELINE: # %bb.0:
143+
; BASELINE-NEXT: lg %r0, 8(%r3)
144+
; BASELINE-NEXT: lg %r1, 0(%r3)
145+
; BASELINE-NEXT: alg %r0, 8(%r4)
146+
; BASELINE-NEXT: alcg %r1, 0(%r4)
147+
; BASELINE-NEXT: ipm %r0
148+
; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35
149+
; BASELINE-NEXT: stg %r0, 8(%r2)
150+
; BASELINE-NEXT: mvghi 0(%r2), 0
151+
; BASELINE-NEXT: br %r14
152+
;
153+
; Z13-LABEL: i128_addc_3:
154+
; Z13: # %bb.0:
155+
; Z13-NEXT: vl %v0, 0(%r4), 3
156+
; Z13-NEXT: vl %v1, 0(%r3), 3
157+
; Z13-NEXT: vaccq %v0, %v1, %v0
158+
; Z13-NEXT: vst %v0, 0(%r2), 3
159+
; Z13-NEXT: br %r14
68160
%sum = add i128 %a, %b
69161
%cmp = icmp ugt i128 %a, %sum
70162
%ext = zext i1 %cmp to i128
71163
ret i128 %ext
72164
}
73165

74166
define i128 @i128_addc_4(i128 %a, i128 %b) {
75-
; CHECK-LABEL: i128_addc_4:
76-
; CHECK: # %bb.0:
77-
; CHECK-NEXT: vl %v0, 0(%r4), 3
78-
; CHECK-NEXT: vl %v1, 0(%r3), 3
79-
; CHECK-NEXT: vaccq %v0, %v1, %v0
80-
; CHECK-NEXT: vst %v0, 0(%r2), 3
81-
; CHECK-NEXT: br %r14
167+
; BASELINE-LABEL: i128_addc_4:
168+
; BASELINE: # %bb.0:
169+
; BASELINE-NEXT: lg %r0, 8(%r3)
170+
; BASELINE-NEXT: lg %r1, 0(%r3)
171+
; BASELINE-NEXT: alg %r0, 8(%r4)
172+
; BASELINE-NEXT: alcg %r1, 0(%r4)
173+
; BASELINE-NEXT: ipm %r0
174+
; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35
175+
; BASELINE-NEXT: stg %r0, 8(%r2)
176+
; BASELINE-NEXT: mvghi 0(%r2), 0
177+
; BASELINE-NEXT: br %r14
178+
;
179+
; Z13-LABEL: i128_addc_4:
180+
; Z13: # %bb.0:
181+
; Z13-NEXT: vl %v0, 0(%r4), 3
182+
; Z13-NEXT: vl %v1, 0(%r3), 3
183+
; Z13-NEXT: vaccq %v0, %v1, %v0
184+
; Z13-NEXT: vst %v0, 0(%r2), 3
185+
; Z13-NEXT: br %r14
82186
%sum = add i128 %a, %b
83187
%cmp = icmp ugt i128 %b, %sum
84188
%ext = zext i1 %cmp to i128
85189
ret i128 %ext
86190
}
87191

192+
define i128 @i128_addc_xor(i128 %a, i128 %b) {
193+
; BASELINE-LABEL: i128_addc_xor:
194+
; BASELINE: # %bb.0:
195+
; BASELINE-NEXT: lg %r0, 8(%r4)
196+
; BASELINE-NEXT: lg %r1, 0(%r4)
197+
; BASELINE-NEXT: alg %r0, 8(%r3)
198+
; BASELINE-NEXT: alcg %r1, 0(%r3)
199+
; BASELINE-NEXT: ipm %r0
200+
; BASELINE-NEXT: risbg %r0, %r0, 63, 191, 35
201+
; BASELINE-NEXT: stg %r0, 8(%r2)
202+
; BASELINE-NEXT: mvghi 0(%r2), 0
203+
; BASELINE-NEXT: br %r14
204+
;
205+
; Z13-LABEL: i128_addc_xor:
206+
; Z13: # %bb.0:
207+
; Z13-NEXT: vl %v0, 0(%r3), 3
208+
; Z13-NEXT: vl %v1, 0(%r4), 3
209+
; Z13-NEXT: vaccq %v0, %v1, %v0
210+
; Z13-NEXT: vst %v0, 0(%r2), 3
211+
; Z13-NEXT: br %r14
212+
%b.not = xor i128 %b, -1
213+
%cmp = icmp ugt i128 %a, %b.not
214+
%ext = zext i1 %cmp to i128
215+
ret i128 %ext
216+
}
217+
218+
define i128 @i128_addc_xor_inv(i128 %a, i128 %b) {
219+
; BASELINE-LABEL: i128_addc_xor_inv:
220+
; BASELINE: # %bb.0:
221+
; BASELINE-NEXT: stmg %r14, %r15, 112(%r15)
222+
; BASELINE-NEXT: .cfi_offset %r14, -48
223+
; BASELINE-NEXT: .cfi_offset %r15, -40
224+
; BASELINE-NEXT: lg %r5, 0(%r3)
225+
; BASELINE-NEXT: lghi %r14, -1
226+
; BASELINE-NEXT: xg %r14, 0(%r4)
227+
; BASELINE-NEXT: lghi %r1, -1
228+
; BASELINE-NEXT: xg %r1, 8(%r4)
229+
; BASELINE-NEXT: clgr %r5, %r14
230+
; BASELINE-NEXT: ipm %r0
231+
; BASELINE-NEXT: clg %r1, 8(%r3)
232+
; BASELINE-NEXT: ipm %r1
233+
; BASELINE-NEXT: cgrjlh %r5, %r14, .LBB7_2
234+
; BASELINE-NEXT: # %bb.1:
235+
; BASELINE-NEXT: xilf %r1, 4294967295
236+
; BASELINE-NEXT: risbg %r0, %r1, 63, 191, 36
237+
; BASELINE-NEXT: j .LBB7_3
238+
; BASELINE-NEXT: .LBB7_2:
239+
; BASELINE-NEXT: afi %r0, -536870912
240+
; BASELINE-NEXT: srl %r0, 31
241+
; BASELINE-NEXT: .LBB7_3:
242+
; BASELINE-NEXT: llgfr %r0, %r0
243+
; BASELINE-NEXT: stg %r0, 8(%r2)
244+
; BASELINE-NEXT: mvghi 0(%r2), 0
245+
; BASELINE-NEXT: lmg %r14, %r15, 112(%r15)
246+
; BASELINE-NEXT: br %r14
247+
;
248+
; Z13-LABEL: i128_addc_xor_inv:
249+
; Z13: # %bb.0:
250+
; Z13-NEXT: vl %v1, 0(%r4), 3
251+
; Z13-NEXT: vl %v0, 0(%r3), 3
252+
; Z13-NEXT: vno %v1, %v1, %v1
253+
; Z13-NEXT: vscbiq %v0, %v1, %v0
254+
; Z13-NEXT: vst %v0, 0(%r2), 3
255+
; Z13-NEXT: br %r14
256+
%b.not = xor i128 %b, -1
257+
%cmp = icmp ule i128 %a, %b.not
258+
%ext = zext i1 %cmp to i128
259+
ret i128 %ext
260+
}

0 commit comments

Comments
 (0)