Skip to content

Commit 6675bd9

Browse files
committed
Add tests
1 parent f7685af commit 6675bd9

File tree

3 files changed

+267
-22
lines changed

3 files changed

+267
-22
lines changed

llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
declare void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8>, <vscale x 16 x ptr>, i32 immarg, <vscale x 16 x i1>)
77

8-
define fastcc i8 @allocno_reload_assign() {
8+
define fastcc i8 @allocno_reload_assign(ptr %p) {
99
; CHECK-LABEL: allocno_reload_assign:
1010
; CHECK: // %bb.0:
1111
; CHECK-NEXT: fmov d0, xzr
@@ -14,8 +14,8 @@ define fastcc i8 @allocno_reload_assign() {
1414
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
1515
; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s
1616
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
17-
; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
18-
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
17+
; CHECK-NEXT: uzp1 p8.b, p0.b, p0.b
18+
; CHECK-NEXT: mov z0.b, p8/z, #1 // =0x1
1919
; CHECK-NEXT: fmov w8, s0
2020
; CHECK-NEXT: mov z0.b, #0 // =0x0
2121
; CHECK-NEXT: uunpklo z1.h, z0.b
@@ -30,34 +30,35 @@ define fastcc i8 @allocno_reload_assign() {
3030
; CHECK-NEXT: punpklo p1.h, p0.b
3131
; CHECK-NEXT: punpkhi p0.h, p0.b
3232
; CHECK-NEXT: punpklo p2.h, p1.b
33-
; CHECK-NEXT: punpkhi p3.h, p1.b
33+
; CHECK-NEXT: punpkhi p4.h, p1.b
3434
; CHECK-NEXT: uunpklo z0.d, z2.s
3535
; CHECK-NEXT: uunpkhi z1.d, z2.s
36-
; CHECK-NEXT: punpklo p5.h, p0.b
36+
; CHECK-NEXT: punpklo p6.h, p0.b
3737
; CHECK-NEXT: uunpklo z2.d, z3.s
3838
; CHECK-NEXT: uunpkhi z3.d, z3.s
39-
; CHECK-NEXT: punpkhi p7.h, p0.b
39+
; CHECK-NEXT: punpkhi p0.h, p0.b
4040
; CHECK-NEXT: uunpklo z4.d, z5.s
4141
; CHECK-NEXT: uunpkhi z5.d, z5.s
4242
; CHECK-NEXT: uunpklo z6.d, z7.s
4343
; CHECK-NEXT: uunpkhi z7.d, z7.s
44-
; CHECK-NEXT: punpklo p0.h, p2.b
45-
; CHECK-NEXT: punpkhi p1.h, p2.b
46-
; CHECK-NEXT: punpklo p2.h, p3.b
47-
; CHECK-NEXT: punpkhi p3.h, p3.b
48-
; CHECK-NEXT: punpklo p4.h, p5.b
49-
; CHECK-NEXT: punpkhi p5.h, p5.b
50-
; CHECK-NEXT: punpklo p6.h, p7.b
51-
; CHECK-NEXT: punpkhi p7.h, p7.b
44+
; CHECK-NEXT: punpklo p1.h, p2.b
45+
; CHECK-NEXT: punpkhi p2.h, p2.b
46+
; CHECK-NEXT: punpklo p3.h, p4.b
47+
; CHECK-NEXT: punpkhi p4.h, p4.b
48+
; CHECK-NEXT: punpklo p5.h, p6.b
49+
; CHECK-NEXT: punpkhi p6.h, p6.b
50+
; CHECK-NEXT: punpklo p7.h, p0.b
51+
; CHECK-NEXT: punpkhi p0.h, p0.b
5252
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
53-
; CHECK-NEXT: st1b { z0.d }, p0, [z16.d]
54-
; CHECK-NEXT: st1b { z1.d }, p1, [z16.d]
55-
; CHECK-NEXT: st1b { z2.d }, p2, [z16.d]
56-
; CHECK-NEXT: st1b { z3.d }, p3, [z16.d]
57-
; CHECK-NEXT: st1b { z4.d }, p4, [z16.d]
58-
; CHECK-NEXT: st1b { z5.d }, p5, [z16.d]
59-
; CHECK-NEXT: st1b { z6.d }, p6, [z16.d]
60-
; CHECK-NEXT: st1b { z7.d }, p7, [z16.d]
53+
; CHECK-NEXT: st1b { z0.d }, p1, [z16.d]
54+
; CHECK-NEXT: st1b { z1.d }, p2, [z16.d]
55+
; CHECK-NEXT: st1b { z2.d }, p3, [z16.d]
56+
; CHECK-NEXT: st1b { z3.d }, p4, [z16.d]
57+
; CHECK-NEXT: st1b { z4.d }, p5, [z16.d]
58+
; CHECK-NEXT: st1b { z5.d }, p6, [z16.d]
59+
; CHECK-NEXT: st1b { z6.d }, p7, [z16.d]
60+
; CHECK-NEXT: st1b { z7.d }, p0, [z16.d]
61+
; CHECK-NEXT: str p8, [x0]
6162
; CHECK-NEXT: b .LBB0_1
6263
br label %1
6364

@@ -66,6 +67,7 @@ define fastcc i8 @allocno_reload_assign() {
6667
%constexpr1 = shufflevector <vscale x 16 x i1> %constexpr, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
6768
%constexpr2 = xor <vscale x 16 x i1> %constexpr1, shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
6869
call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x ptr> zeroinitializer, i32 0, <vscale x 16 x i1> %constexpr2)
70+
store <vscale x 16 x i1> %constexpr, ptr %p, align 16
6971
br label %1
7072
}
7173

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sve < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
7+
define i1 @extract_icmp_v4i32_const_splat_rhs(<4 x i32> %a) {
8+
; CHECK-LABEL: extract_icmp_v4i32_const_splat_rhs:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: movi v1.4s, #5
11+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
12+
; CHECK-NEXT: xtn v0.4h, v0.4s
13+
; CHECK-NEXT: umov w8, v0.h[1]
14+
; CHECK-NEXT: and w0, w8, #0x1
15+
; CHECK-NEXT: ret
16+
%icmp = icmp ult <4 x i32> %a, splat (i32 5)
17+
%ext = extractelement <4 x i1> %icmp, i32 1
18+
ret i1 %ext
19+
}
20+
21+
define i1 @extract_icmp_v4i32_const_splat_lhs(<4 x i32> %a) {
22+
; CHECK-LABEL: extract_icmp_v4i32_const_splat_lhs:
23+
; CHECK: // %bb.0:
24+
; CHECK-NEXT: movi v1.4s, #7
25+
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
26+
; CHECK-NEXT: xtn v0.4h, v0.4s
27+
; CHECK-NEXT: umov w8, v0.h[1]
28+
; CHECK-NEXT: and w0, w8, #0x1
29+
; CHECK-NEXT: ret
30+
%icmp = icmp ult <4 x i32> splat(i32 7), %a
31+
%ext = extractelement <4 x i1> %icmp, i32 1
32+
ret i1 %ext
33+
}
34+
35+
define i1 @extract_icmp_v4i32_const_vec_rhs(<4 x i32> %a) {
36+
; CHECK-LABEL: extract_icmp_v4i32_const_vec_rhs:
37+
; CHECK: // %bb.0:
38+
; CHECK-NEXT: adrp x8, .LCPI2_0
39+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
40+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
41+
; CHECK-NEXT: xtn v0.4h, v0.4s
42+
; CHECK-NEXT: umov w8, v0.h[1]
43+
; CHECK-NEXT: and w0, w8, #0x1
44+
; CHECK-NEXT: ret
45+
%icmp = icmp ult <4 x i32> %a, <i32 5, i32 234, i32 -1, i32 7>
46+
%ext = extractelement <4 x i1> %icmp, i32 1
47+
ret i1 %ext
48+
}
49+
50+
define i1 @extract_fcmp_v4f32_const_splat_rhs(<4 x float> %a) {
51+
; CHECK-LABEL: extract_fcmp_v4f32_const_splat_rhs:
52+
; CHECK: // %bb.0:
53+
; CHECK-NEXT: fmov v1.4s, #4.00000000
54+
; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s
55+
; CHECK-NEXT: mvn v0.16b, v0.16b
56+
; CHECK-NEXT: xtn v0.4h, v0.4s
57+
; CHECK-NEXT: umov w8, v0.h[1]
58+
; CHECK-NEXT: and w0, w8, #0x1
59+
; CHECK-NEXT: ret
60+
%fcmp = fcmp ult <4 x float> %a, splat(float 4.0e+0)
61+
%ext = extractelement <4 x i1> %fcmp, i32 1
62+
ret i1 %ext
63+
}
64+
65+
define i128 @extract_icmp_v1i128(ptr %p) {
66+
; CHECK-LABEL: extract_icmp_v1i128:
67+
; CHECK: // %bb.0:
68+
; CHECK-NEXT: ldp x9, x8, [x0]
69+
; CHECK-NEXT: orr x8, x9, x8
70+
; CHECK-NEXT: cmp x8, #0
71+
; CHECK-NEXT: cset w8, eq
72+
; CHECK-NEXT: sbfx x0, x8, #0, #1
73+
; CHECK-NEXT: mov x1, x0
74+
; CHECK-NEXT: ret
75+
%load = load <1 x i128>, ptr %p, align 16
76+
%cmp = icmp eq <1 x i128> %load, zeroinitializer
77+
%sext = sext <1 x i1> %cmp to <1 x i128>
78+
%res = extractelement <1 x i128> %sext, i32 0
79+
ret i128 %res
80+
}
81+
82+
define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
83+
; CHECK-LABEL: vector_loop_with_icmp:
84+
; CHECK: // %bb.0: // %entry
85+
; CHECK-NEXT: index z0.d, #0, #1
86+
; CHECK-NEXT: mov w8, #15 // =0xf
87+
; CHECK-NEXT: mov w9, #2 // =0x2
88+
; CHECK-NEXT: dup v1.2d, x8
89+
; CHECK-NEXT: dup v2.2d, x9
90+
; CHECK-NEXT: add x9, x0, #4
91+
; CHECK-NEXT: mov w10, #16 // =0x10
92+
; CHECK-NEXT: mov w11, #1 // =0x1
93+
; CHECK-NEXT: b .LBB5_2
94+
; CHECK-NEXT: .LBB5_1: // %pred.store.continue6
95+
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
96+
; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
97+
; CHECK-NEXT: subs x10, x10, #2
98+
; CHECK-NEXT: add x9, x9, #8
99+
; CHECK-NEXT: b.eq .LBB5_6
100+
; CHECK-NEXT: .LBB5_2: // %vector.body
101+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
102+
; CHECK-NEXT: cmhi v3.2d, v1.2d, v0.2d
103+
; CHECK-NEXT: xtn v3.2s, v3.2d
104+
; CHECK-NEXT: fmov w12, s3
105+
; CHECK-NEXT: tbz w12, #0, .LBB5_4
106+
; CHECK-NEXT: // %bb.3: // %pred.store.if
107+
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
108+
; CHECK-NEXT: stur w11, [x9, #-4]
109+
; CHECK-NEXT: .LBB5_4: // %pred.store.continue
110+
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
111+
; CHECK-NEXT: dup v3.2d, x8
112+
; CHECK-NEXT: cmhi v3.2d, v3.2d, v0.2d
113+
; CHECK-NEXT: xtn v3.2s, v3.2d
114+
; CHECK-NEXT: mov w12, v3.s[1]
115+
; CHECK-NEXT: tbz w12, #0, .LBB5_1
116+
; CHECK-NEXT: // %bb.5: // %pred.store.if5
117+
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
118+
; CHECK-NEXT: str w11, [x9]
119+
; CHECK-NEXT: b .LBB5_1
120+
; CHECK-NEXT: .LBB5_6: // %for.cond.cleanup
121+
; CHECK-NEXT: ret
122+
entry:
123+
br label %vector.body
124+
125+
vector.body:
126+
%index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue6 ]
127+
%vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %entry ], [ %vec.ind.next, %pred.store.continue6 ]
128+
%vec.cmp = icmp ult <2 x i64> %vec.ind, <i64 15, i64 15>
129+
%c0 = extractelement <2 x i1> %vec.cmp, i64 0
130+
br i1 %c0, label %pred.store.if, label %pred.store.continue
131+
132+
pred.store.if:
133+
%arrayidx = getelementptr inbounds i32, ptr %dest, i64 %index
134+
store i32 1, ptr %arrayidx, align 4
135+
br label %pred.store.continue
136+
137+
pred.store.continue:
138+
%c1 = extractelement <2 x i1> %vec.cmp, i64 1
139+
br i1 %c1, label %pred.store.if5, label %pred.store.continue6
140+
141+
pred.store.if5:
142+
%indexp1 = or disjoint i64 %index, 1
143+
%arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %indexp1
144+
store i32 1, ptr %arrayidx2, align 4
145+
br label %pred.store.continue6
146+
147+
pred.store.continue6:
148+
%index.next = add i64 %index, 2
149+
%vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2>
150+
%index.cmp = icmp eq i64 %index.next, 16
151+
br i1 %index.cmp, label %for.cond.cleanup, label %vector.body
152+
153+
for.cond.cleanup:
154+
ret void
155+
}
156+
157+
158+
; Negative tests
159+
160+
define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
161+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs:
162+
; CHECK: // %bb.0:
163+
; CHECK-NEXT: dup v1.4s, w0
164+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
165+
; CHECK-NEXT: xtn v0.4h, v0.4s
166+
; CHECK-NEXT: umov w8, v0.h[1]
167+
; CHECK-NEXT: and w0, w8, #0x1
168+
; CHECK-NEXT: ret
169+
%ins = insertelement <4 x i32> poison, i32 %b, i32 0
170+
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
171+
%icmp = icmp ult <4 x i32> %a, %splat
172+
%ext = extractelement <4 x i1> %icmp, i32 1
173+
ret i1 %ext
174+
}
175+
176+
define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
177+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
178+
; CHECK: // %bb.0:
179+
; CHECK-NEXT: movi v1.4s, #235
180+
; CHECK-NEXT: adrp x9, .LCPI7_0
181+
; CHECK-NEXT: mov x8, x0
182+
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_0]
183+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
184+
; CHECK-NEXT: xtn v1.4h, v0.4s
185+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
186+
; CHECK-NEXT: addv s0, v0.4s
187+
; CHECK-NEXT: umov w9, v1.h[1]
188+
; CHECK-NEXT: fmov w10, s0
189+
; CHECK-NEXT: and w0, w9, #0x1
190+
; CHECK-NEXT: strb w10, [x8]
191+
; CHECK-NEXT: ret
192+
%icmp = icmp ult <4 x i32> %a, splat(i32 235)
193+
%ext = extractelement <4 x i1> %icmp, i32 1
194+
store <4 x i1> %icmp, ptr %p, align 4
195+
ret i1 %ext
196+
}
197+
198+
define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) {
199+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_unknown_idx:
200+
; CHECK: // %bb.0:
201+
; CHECK-NEXT: sub sp, sp, #16
202+
; CHECK-NEXT: .cfi_def_cfa_offset 16
203+
; CHECK-NEXT: movi v1.4s, #127
204+
; CHECK-NEXT: add x8, sp, #8
205+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
206+
; CHECK-NEXT: bfi x8, x0, #1, #2
207+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
208+
; CHECK-NEXT: xtn v0.4h, v0.4s
209+
; CHECK-NEXT: str d0, [sp, #8]
210+
; CHECK-NEXT: ldrh w8, [x8]
211+
; CHECK-NEXT: and w0, w8, #0x1
212+
; CHECK-NEXT: add sp, sp, #16
213+
; CHECK-NEXT: ret
214+
%icmp = icmp ult <4 x i32> %a, splat(i32 127)
215+
%ext = extractelement <4 x i1> %icmp, i32 %c
216+
ret i1 %ext
217+
}

llvm/test/CodeGen/X86/vselect.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,3 +796,29 @@ define i64 @vselect_any_extend_vector_inreg_crash(ptr %x) {
796796
ret i64 %4
797797
}
798798

799+
; Tests the scalarizeBinOp code in DAGCombiner
800+
define void @scalarize_binop(<1 x i1> %a) {
801+
; SSE-LABEL: scalarize_binop:
802+
; SSE: # %bb.0: # %bb0
803+
; SSE-NEXT: .p2align 4
804+
; SSE-NEXT: .LBB35_1: # %bb1
805+
; SSE-NEXT: # =>This Inner Loop Header: Depth=1
806+
; SSE-NEXT: jmp .LBB35_1
807+
;
808+
; AVX-LABEL: scalarize_binop:
809+
; AVX: # %bb.0: # %bb0
810+
; AVX-NEXT: .p2align 4
811+
; AVX-NEXT: .LBB35_1: # %bb1
812+
; AVX-NEXT: # =>This Inner Loop Header: Depth=1
813+
; AVX-NEXT: jmp .LBB35_1
814+
bb0:
815+
br label %bb1
816+
817+
bb1:
818+
%b = select <1 x i1> %a, <1 x i1> zeroinitializer, <1 x i1> splat (i1 true)
819+
br label %bb2
820+
821+
bb2:
822+
%c = extractelement <1 x i1> %b, i32 0
823+
br label %bb1
824+
}

0 commit comments

Comments
 (0)