Skip to content

Commit e6ca023

Browse files
rotaterighttstellar
authored andcommitted
[AArch64][x86] add tests for add-with-overflow folds; NFC
There's a generic combine for these, but no test coverage. It's not clear if this is actually a good fold. The combine was added with D58874, but it has a bug that can cause crashing ( https://llvm.org/PR51238 ). (cherry picked from commit e427077)
1 parent 2f43c81 commit e6ca023

File tree

2 files changed

+96
-20
lines changed

2 files changed

+96
-20
lines changed

llvm/test/CodeGen/AArch64/addsub.ll

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,4 +223,48 @@ test6:
223223
ret:
224224
ret void
225225
}
226+
227+
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
228+
229+
define i1 @sadd_add(i32 %a, i32 %b, i32* %p) {
230+
; CHECK-LABEL: sadd_add:
231+
; CHECK: // %bb.0:
232+
; CHECK-NEXT: mvn w8, w0
233+
; CHECK-NEXT: cmn w8, w1
234+
; CHECK-NEXT: cset w8, vs
235+
; CHECK-NEXT: sub w9, w1, w0
236+
; CHECK-NEXT: mov w0, w8
237+
; CHECK-NEXT: str w9, [x2]
238+
; CHECK-NEXT: ret
239+
%nota = xor i32 %a, -1
240+
%a0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %nota, i32 %b)
241+
%e0 = extractvalue {i32, i1} %a0, 0
242+
%e1 = extractvalue {i32, i1} %a0, 1
243+
%res = add i32 %e0, 1
244+
store i32 %res, i32* %p
245+
ret i1 %e1
246+
}
247+
248+
declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
249+
250+
define i1 @uadd_add(i8 %a, i8 %b, i8* %p) {
251+
; CHECK-LABEL: uadd_add:
252+
; CHECK: // %bb.0:
253+
; CHECK-NEXT: mvn w8, w0
254+
; CHECK-NEXT: and w8, w8, #0xff
255+
; CHECK-NEXT: add w8, w8, w1, uxtb
256+
; CHECK-NEXT: lsr w8, w8, #8
257+
; CHECK-NEXT: sub w9, w1, w0
258+
; CHECK-NEXT: mov w0, w8
259+
; CHECK-NEXT: strb w9, [x2]
260+
; CHECK-NEXT: ret
261+
%nota = xor i8 %a, -1
262+
%a0 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %nota, i8 %b)
263+
%e0 = extractvalue {i8, i1} %a0, 0
264+
%e1 = extractvalue {i8, i1} %a0, 1
265+
%res = add i8 %e0, 1
266+
store i8 %res, i8* %p
267+
ret i1 %e1
268+
}
269+
226270
; TODO: adds/subs

llvm/test/CodeGen/X86/combine-add.ll

Lines changed: 52 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
44

55
; fold (add x, 0) -> x
66
define <4 x i32> @combine_vec_add_to_zero(<4 x i32> %a) {
7-
; SSE-LABEL: combine_vec_add_to_zero:
8-
; SSE: # %bb.0:
9-
; SSE-NEXT: retq
10-
;
11-
; AVX-LABEL: combine_vec_add_to_zero:
12-
; AVX: # %bb.0:
13-
; AVX-NEXT: retq
7+
; CHECK-LABEL: combine_vec_add_to_zero:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: retq
1410
%1 = add <4 x i32> %a, zeroinitializer
1511
ret <4 x i32> %1
1612
}
@@ -352,17 +348,11 @@ define <4 x i32> @combine_vec_add_sextinreg(<4 x i32> %a0, <4 x i32> %a1) {
352348

353349
; (add (add (xor a, -1), b), 1) -> (sub b, a)
354350
define i32 @combine_add_add_not(i32 %a, i32 %b) {
355-
; SSE-LABEL: combine_add_add_not:
356-
; SSE: # %bb.0:
357-
; SSE-NEXT: movl %esi, %eax
358-
; SSE-NEXT: subl %edi, %eax
359-
; SSE-NEXT: retq
360-
;
361-
; AVX-LABEL: combine_add_add_not:
362-
; AVX: # %bb.0:
363-
; AVX-NEXT: movl %esi, %eax
364-
; AVX-NEXT: subl %edi, %eax
365-
; AVX-NEXT: retq
351+
; CHECK-LABEL: combine_add_add_not:
352+
; CHECK: # %bb.0:
353+
; CHECK-NEXT: movl %esi, %eax
354+
; CHECK-NEXT: subl %edi, %eax
355+
; CHECK-NEXT: retq
366356
%nota = xor i32 %a, -1
367357
%add = add i32 %nota, %b
368358
%r = add i32 %add, 1
@@ -385,3 +375,45 @@ define <4 x i32> @combine_vec_add_add_not(<4 x i32> %a, <4 x i32> %b) {
385375
%r = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
386376
ret <4 x i32> %r
387377
}
378+
379+
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
380+
381+
define i1 @sadd_add(i32 %a, i32 %b, i32* %p) {
382+
; CHECK-LABEL: sadd_add:
383+
; CHECK: # %bb.0:
384+
; CHECK-NEXT: movl %edi, %eax
385+
; CHECK-NEXT: notl %eax
386+
; CHECK-NEXT: addl %esi, %eax
387+
; CHECK-NEXT: seto %al
388+
; CHECK-NEXT: subl %edi, %esi
389+
; CHECK-NEXT: movl %esi, (%rdx)
390+
; CHECK-NEXT: retq
391+
%nota = xor i32 %a, -1
392+
%a0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %nota, i32 %b)
393+
%e0 = extractvalue {i32, i1} %a0, 0
394+
%e1 = extractvalue {i32, i1} %a0, 1
395+
%res = add i32 %e0, 1
396+
store i32 %res, i32* %p
397+
ret i1 %e1
398+
}
399+
400+
declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
401+
402+
define i1 @uadd_add(i8 %a, i8 %b, i8* %p) {
403+
; CHECK-LABEL: uadd_add:
404+
; CHECK: # %bb.0:
405+
; CHECK-NEXT: movl %edi, %eax
406+
; CHECK-NEXT: notb %al
407+
; CHECK-NEXT: addb %sil, %al
408+
; CHECK-NEXT: setb %al
409+
; CHECK-NEXT: subb %dil, %sil
410+
; CHECK-NEXT: movb %sil, (%rdx)
411+
; CHECK-NEXT: retq
412+
%nota = xor i8 %a, -1
413+
%a0 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %nota, i8 %b)
414+
%e0 = extractvalue {i8, i1} %a0, 0
415+
%e1 = extractvalue {i8, i1} %a0, 1
416+
%res = add i8 %e0, 1
417+
store i8 %res, i8* %p
418+
ret i1 %e1
419+
}

0 commit comments

Comments
 (0)