Skip to content

Commit c31e6be

Browse files
authored
Merge branch 'main' into x86-mul-vXi8-bv
2 parents 0becf5b + 8a055f8 commit c31e6be

File tree

2 files changed

+324
-0
lines changed

2 files changed

+324
-0
lines changed
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
3+
4+
; GitHub issue #161036
5+
6+
; Positive test : umin(sub(a,b),a) with scalar types should be folded
7+
define i64 @underflow_compare_fold_i64(i64 %a, i64 %b) {
8+
; CHECK-LABEL: underflow_compare_fold_i64:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: sub x8, x0, x1
11+
; CHECK-NEXT: cmp x8, x0
12+
; CHECK-NEXT: csel x0, x8, x0, lo
13+
; CHECK-NEXT: ret
14+
%sub = sub i64 %a, %b
15+
%cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a)
16+
ret i64 %cond
17+
}
18+
19+
; Positive test : umin(a,sub(a,b)) with scalar types should be folded
20+
define i64 @underflow_compare_fold_i64_commute(i64 %a, i64 %b) {
21+
; CHECK-LABEL: underflow_compare_fold_i64_commute:
22+
; CHECK: // %bb.0:
23+
; CHECK-NEXT: sub x8, x0, x1
24+
; CHECK-NEXT: cmp x0, x8
25+
; CHECK-NEXT: csel x0, x0, x8, lo
26+
; CHECK-NEXT: ret
27+
%sub = sub i64 %a, %b
28+
%cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub)
29+
ret i64 %cond
30+
}
31+
32+
; Positive test : multi-use is OK since the sub instruction still runs once
33+
define i64 @underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) {
34+
; CHECK-LABEL: underflow_compare_fold_i64_multi_use:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: sub x8, x0, x1
37+
; CHECK-NEXT: cmp x8, x0
38+
; CHECK-NEXT: str x8, [x2]
39+
; CHECK-NEXT: csel x0, x8, x0, lo
40+
; CHECK-NEXT: ret
41+
%sub = sub i64 %a, %b
42+
store i64 %sub, ptr addrspace(1) %ptr
43+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
44+
ret i64 %cond
45+
}
46+
47+
; Positive test : i32
48+
define i32 @underflow_compare_fold_i32(i32 %a, i32 %b) {
49+
; CHECK-LABEL: underflow_compare_fold_i32:
50+
; CHECK: // %bb.0:
51+
; CHECK-NEXT: sub w8, w0, w1
52+
; CHECK-NEXT: cmp w8, w0
53+
; CHECK-NEXT: csel w0, w8, w0, lo
54+
; CHECK-NEXT: ret
55+
%sub = sub i32 %a, %b
56+
%cond = tail call i32 @llvm.umin.i32(i32 %sub, i32 %a)
57+
ret i32 %cond
58+
}
59+
60+
; Positive test : i32
61+
define i32 @underflow_compare_fold_i32_commute(i32 %a, i32 %b) {
62+
; CHECK-LABEL: underflow_compare_fold_i32_commute:
63+
; CHECK: // %bb.0:
64+
; CHECK-NEXT: sub w8, w0, w1
65+
; CHECK-NEXT: cmp w0, w8
66+
; CHECK-NEXT: csel w0, w0, w8, lo
67+
; CHECK-NEXT: ret
68+
%sub = sub i32 %a, %b
69+
%cond = tail call i32 @llvm.umin.i32(i32 %a, i32 %sub)
70+
ret i32 %cond
71+
}
72+
73+
; Positive test : i32
74+
define i32 @underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) {
75+
; CHECK-LABEL: underflow_compare_fold_i32_multi_use:
76+
; CHECK: // %bb.0:
77+
; CHECK-NEXT: sub w8, w0, w1
78+
; CHECK-NEXT: cmp w8, w0
79+
; CHECK-NEXT: str w8, [x2]
80+
; CHECK-NEXT: csel w0, w8, w0, lo
81+
; CHECK-NEXT: ret
82+
%sub = sub i32 %a, %b
83+
store i32 %sub, ptr addrspace(1) %ptr
84+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
85+
ret i32 %cond
86+
}
87+
88+
; Negative test : i16
89+
define i16 @underflow_compare_fold_i16(i16 %a, i16 %b) {
90+
; CHECK-LABEL: underflow_compare_fold_i16:
91+
; CHECK: // %bb.0:
92+
; CHECK-NEXT: sub w8, w0, w1
93+
; CHECK-NEXT: and w9, w0, #0xffff
94+
; CHECK-NEXT: and w8, w8, #0xffff
95+
; CHECK-NEXT: cmp w8, w9
96+
; CHECK-NEXT: csel w0, w8, w9, lo
97+
; CHECK-NEXT: ret
98+
%sub = sub i16 %a, %b
99+
%cond = tail call i16 @llvm.umin.i16(i16 %sub, i16 %a)
100+
ret i16 %cond
101+
}
102+
103+
; Negative test : i16
104+
define i16 @underflow_compare_fold_i16_commute(i16 %a, i16 %b) {
105+
; CHECK-LABEL: underflow_compare_fold_i16_commute:
106+
; CHECK: // %bb.0:
107+
; CHECK-NEXT: sub w8, w0, w1
108+
; CHECK-NEXT: and w9, w0, #0xffff
109+
; CHECK-NEXT: and w8, w8, #0xffff
110+
; CHECK-NEXT: cmp w9, w8
111+
; CHECK-NEXT: csel w0, w9, w8, lo
112+
; CHECK-NEXT: ret
113+
%sub = sub i16 %a, %b
114+
%cond = tail call i16 @llvm.umin.i16(i16 %a, i16 %sub)
115+
ret i16 %cond
116+
}
117+
118+
; Negative test : i16
119+
define i16 @underflow_compare_fold_i16_multi_use(i16 %a, i16 %b, ptr addrspace(1) %ptr) {
120+
; CHECK-LABEL: underflow_compare_fold_i16_multi_use:
121+
; CHECK: // %bb.0:
122+
; CHECK-NEXT: sub w8, w0, w1
123+
; CHECK-NEXT: and w9, w0, #0xffff
124+
; CHECK-NEXT: and w10, w8, #0xffff
125+
; CHECK-NEXT: strh w8, [x2]
126+
; CHECK-NEXT: cmp w10, w9
127+
; CHECK-NEXT: csel w0, w10, w9, lo
128+
; CHECK-NEXT: ret
129+
%sub = sub i16 %a, %b
130+
store i16 %sub, ptr addrspace(1) %ptr
131+
%cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
132+
ret i16 %cond
133+
}
134+
135+
; Negative test, vector types : umin(sub(a,b),a) but with vectors
136+
define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) {
137+
; CHECK-LABEL: underflow_compare_dontfold_vectors:
138+
; CHECK: // %bb.0:
139+
; CHECK-NEXT: sub v1.16b, v0.16b, v1.16b
140+
; CHECK-NEXT: umin v0.16b, v1.16b, v0.16b
141+
; CHECK-NEXT: ret
142+
%sub = sub <16 x i8> %a, %b
143+
%cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a)
144+
ret <16 x i8> %cond
145+
}
146+
147+
; Negative test, pattern mismatch : umin(add(a,b),a)
148+
define i64 @umin_add(i64 %a, i64 %b) {
149+
; CHECK-LABEL: umin_add:
150+
; CHECK: // %bb.0:
151+
; CHECK-NEXT: add x8, x0, x1
152+
; CHECK-NEXT: cmp x8, x0
153+
; CHECK-NEXT: csel x0, x8, x0, lo
154+
; CHECK-NEXT: ret
155+
%add = add i64 %a, %b
156+
%cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a)
157+
ret i64 %cond
158+
}
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
3+
4+
; GitHub issue #161036
5+
6+
; Positive test : umin(sub(a,b),a) with scalar types should be folded
7+
define i64 @underflow_compare_fold_i64(i64 %a, i64 %b) {
8+
; CHECK-LABEL: underflow_compare_fold_i64:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: movq %rdi, %rax
11+
; CHECK-NEXT: subq %rsi, %rax
12+
; CHECK-NEXT: cmpq %rdi, %rax
13+
; CHECK-NEXT: cmovaeq %rdi, %rax
14+
; CHECK-NEXT: retq
15+
%sub = sub i64 %a, %b
16+
%cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a)
17+
ret i64 %cond
18+
}
19+
20+
; Positive test : umin(a,sub(a,b)) with scalar types should be folded
21+
define i64 @underflow_compare_fold_i64_commute(i64 %a, i64 %b) {
22+
; CHECK-LABEL: underflow_compare_fold_i64_commute:
23+
; CHECK: # %bb.0:
24+
; CHECK-NEXT: movq %rdi, %rax
25+
; CHECK-NEXT: subq %rsi, %rax
26+
; CHECK-NEXT: cmpq %rax, %rdi
27+
; CHECK-NEXT: cmovbq %rdi, %rax
28+
; CHECK-NEXT: retq
29+
%sub = sub i64 %a, %b
30+
%cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub)
31+
ret i64 %cond
32+
}
33+
34+
; Positive test : multi-use is OK since the sub instruction still runs once
35+
define i64 @underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) {
36+
; CHECK-LABEL: underflow_compare_fold_i64_multi_use:
37+
; CHECK: # %bb.0:
38+
; CHECK-NEXT: movq %rdi, %rax
39+
; CHECK-NEXT: subq %rsi, %rax
40+
; CHECK-NEXT: movq %rax, (%rdx)
41+
; CHECK-NEXT: cmpq %rdi, %rax
42+
; CHECK-NEXT: cmovaeq %rdi, %rax
43+
; CHECK-NEXT: retq
44+
%sub = sub i64 %a, %b
45+
store i64 %sub, ptr addrspace(1) %ptr
46+
%cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a)
47+
ret i64 %cond
48+
}
49+
50+
; Positive test : i32
51+
define i32 @underflow_compare_fold_i32(i32 %a, i32 %b) {
52+
; CHECK-LABEL: underflow_compare_fold_i32:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: movl %edi, %eax
55+
; CHECK-NEXT: subl %esi, %eax
56+
; CHECK-NEXT: cmpl %edi, %eax
57+
; CHECK-NEXT: cmovael %edi, %eax
58+
; CHECK-NEXT: retq
59+
%sub = sub i32 %a, %b
60+
%cond = tail call i32 @llvm.umin.i32(i32 %sub, i32 %a)
61+
ret i32 %cond
62+
}
63+
64+
; Positive test : i32
65+
define i32 @underflow_compare_fold_i32_commute(i32 %a, i32 %b) {
66+
; CHECK-LABEL: underflow_compare_fold_i32_commute:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: movl %edi, %eax
69+
; CHECK-NEXT: subl %esi, %eax
70+
; CHECK-NEXT: cmpl %eax, %edi
71+
; CHECK-NEXT: cmovbl %edi, %eax
72+
; CHECK-NEXT: retq
73+
%sub = sub i32 %a, %b
74+
%cond = tail call i32 @llvm.umin.i32(i32 %a, i32 %sub)
75+
ret i32 %cond
76+
}
77+
78+
; Positive test : i32
79+
define i32 @underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) {
80+
; CHECK-LABEL: underflow_compare_fold_i32_multi_use:
81+
; CHECK: # %bb.0:
82+
; CHECK-NEXT: movl %edi, %eax
83+
; CHECK-NEXT: subl %esi, %eax
84+
; CHECK-NEXT: movl %eax, (%rdx)
85+
; CHECK-NEXT: cmpl %edi, %eax
86+
; CHECK-NEXT: cmovael %edi, %eax
87+
; CHECK-NEXT: retq
88+
%sub = sub i32 %a, %b
89+
store i32 %sub, ptr addrspace(1) %ptr
90+
%cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a)
91+
ret i32 %cond
92+
}
93+
94+
; Positive test : i16
95+
define i16 @underflow_compare_fold_i16(i16 %a, i16 %b) {
96+
; CHECK-LABEL: underflow_compare_fold_i16:
97+
; CHECK: # %bb.0:
98+
; CHECK-NEXT: movl %edi, %eax
99+
; CHECK-NEXT: subl %esi, %eax
100+
; CHECK-NEXT: cmpw %di, %ax
101+
; CHECK-NEXT: cmovael %edi, %eax
102+
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
103+
; CHECK-NEXT: retq
104+
%sub = sub i16 %a, %b
105+
%cond = tail call i16 @llvm.umin.i16(i16 %sub, i16 %a)
106+
ret i16 %cond
107+
}
108+
109+
; Positive test : i16
110+
define i16 @underflow_compare_fold_i16_commute(i16 %a, i16 %b) {
111+
; CHECK-LABEL: underflow_compare_fold_i16_commute:
112+
; CHECK: # %bb.0:
113+
; CHECK-NEXT: movl %edi, %eax
114+
; CHECK-NEXT: subl %esi, %eax
115+
; CHECK-NEXT: cmpw %ax, %di
116+
; CHECK-NEXT: cmovbl %edi, %eax
117+
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
118+
; CHECK-NEXT: retq
119+
%sub = sub i16 %a, %b
120+
%cond = tail call i16 @llvm.umin.i16(i16 %a, i16 %sub)
121+
ret i16 %cond
122+
}
123+
124+
; Positive test : i16
125+
define i16 @underflow_compare_fold_i16_multi_use(i16 %a, i16 %b, ptr addrspace(1) %ptr) {
126+
; CHECK-LABEL: underflow_compare_fold_i16_multi_use:
127+
; CHECK: # %bb.0:
128+
; CHECK-NEXT: movl %edi, %eax
129+
; CHECK-NEXT: subl %esi, %eax
130+
; CHECK-NEXT: movw %ax, (%rdx)
131+
; CHECK-NEXT: cmpw %di, %ax
132+
; CHECK-NEXT: cmovael %edi, %eax
133+
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
134+
; CHECK-NEXT: retq
135+
%sub = sub i16 %a, %b
136+
store i16 %sub, ptr addrspace(1) %ptr
137+
%cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a)
138+
ret i16 %cond
139+
}
140+
141+
142+
; Negative test, vector types : umin(sub(a,b),a) but with vectors
143+
define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) {
144+
; CHECK-LABEL: underflow_compare_dontfold_vectors:
145+
; CHECK: # %bb.0:
146+
; CHECK-NEXT: movdqa %xmm0, %xmm2
147+
; CHECK-NEXT: psubb %xmm1, %xmm2
148+
; CHECK-NEXT: pminub %xmm2, %xmm0
149+
; CHECK-NEXT: retq
150+
%sub = sub <16 x i8> %a, %b
151+
%cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a)
152+
ret <16 x i8> %cond
153+
}
154+
155+
; Negative test, pattern mismatch : umin(add(a,b),a)
156+
define i64 @umin_add(i64 %a, i64 %b) {
157+
; CHECK-LABEL: umin_add:
158+
; CHECK: # %bb.0:
159+
; CHECK-NEXT: leaq (%rsi,%rdi), %rax
160+
; CHECK-NEXT: cmpq %rdi, %rax
161+
; CHECK-NEXT: cmovaeq %rdi, %rax
162+
; CHECK-NEXT: retq
163+
%add = add i64 %a, %b
164+
%cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a)
165+
ret i64 %cond
166+
}

0 commit comments

Comments
 (0)