Skip to content

Commit 4a82aea

Browse files
committed
Precommit test for avgr pattern
1 parent 920079b commit 4a82aea

File tree

1 file changed

+230
-0
lines changed

1 file changed

+230
-0
lines changed
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -O2 -mtriple=wasm32 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
3+
4+
;void f(unsigned char *x, unsigned char *y, int n) {
5+
; for (int i = 0; i < n; i++)
6+
; x[i] = (x[i] + y[i] + 1) / 2;
7+
;}
8+
9+
define void @f(ptr %x, ptr %y, i32 %n) {
10+
; CHECK-LABEL: f:
11+
; CHECK: .functype f (i32, i32, i32) -> ()
12+
; CHECK-NEXT: # %bb.0: # %entry
13+
; CHECK-NEXT: block
14+
; CHECK-NEXT: i32.const $push0=, 1
15+
; CHECK-NEXT: i32.lt_s $push1=, $2, $pop0
16+
; CHECK-NEXT: br_if 0, $pop1 # 0: down to label0
17+
; CHECK-NEXT: # %bb.1: # %for.body.preheader
18+
; CHECK-NEXT: i32.const $5=, 0
19+
; CHECK-NEXT: block
20+
; CHECK-NEXT: i32.const $push2=, 16
21+
; CHECK-NEXT: i32.lt_u $push3=, $2, $pop2
22+
; CHECK-NEXT: br_if 0, $pop3 # 0: down to label1
23+
; CHECK-NEXT: # %bb.2: # %vector.memcheck
24+
; CHECK-NEXT: block
25+
; CHECK-NEXT: i32.add $push5=, $1, $2
26+
; CHECK-NEXT: i32.ge_u $push6=, $0, $pop5
27+
; CHECK-NEXT: br_if 0, $pop6 # 0: down to label2
28+
; CHECK-NEXT: # %bb.3: # %vector.memcheck
29+
; CHECK-NEXT: i32.add $push4=, $0, $2
30+
; CHECK-NEXT: i32.lt_u $push7=, $1, $pop4
31+
; CHECK-NEXT: br_if 1, $pop7 # 1: down to label1
32+
; CHECK-NEXT: .LBB0_4: # %vector.ph
33+
; CHECK-NEXT: end_block # label2:
34+
; CHECK-NEXT: local.copy $6=, $0
35+
; CHECK-NEXT: local.copy $7=, $1
36+
; CHECK-NEXT: i32.const $push8=, 2147483632
37+
; CHECK-NEXT: i32.and $push34=, $2, $pop8
38+
; CHECK-NEXT: local.tee $push33=, $5=, $pop34
39+
; CHECK-NEXT: local.copy $8=, $pop33
40+
; CHECK-NEXT: .LBB0_5: # %vector.body
41+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
42+
; CHECK-NEXT: loop # label3:
43+
; CHECK-NEXT: v128.load $push44=, 0($6):p2align=0
44+
; CHECK-NEXT: local.tee $push43=, $4=, $pop44
45+
; CHECK-NEXT: v128.load $push42=, 0($7):p2align=0
46+
; CHECK-NEXT: local.tee $push41=, $3=, $pop42
47+
; CHECK-NEXT: v128.or $push9=, $pop43, $pop41
48+
; CHECK-NEXT: v128.xor $push10=, $4, $3
49+
; CHECK-NEXT: i32.const $push40=, 1
50+
; CHECK-NEXT: i8x16.shr_u $push11=, $pop10, $pop40
51+
; CHECK-NEXT: i8x16.sub $push12=, $pop9, $pop11
52+
; CHECK-NEXT: v128.store 0($6):p2align=0, $pop12
53+
; CHECK-NEXT: i32.const $push39=, 16
54+
; CHECK-NEXT: i32.add $6=, $6, $pop39
55+
; CHECK-NEXT: i32.const $push38=, 16
56+
; CHECK-NEXT: i32.add $7=, $7, $pop38
57+
; CHECK-NEXT: i32.const $push37=, -16
58+
; CHECK-NEXT: i32.add $push36=, $8, $pop37
59+
; CHECK-NEXT: local.tee $push35=, $8=, $pop36
60+
; CHECK-NEXT: br_if 0, $pop35 # 0: up to label3
61+
; CHECK-NEXT: # %bb.6: # %middle.block
62+
; CHECK-NEXT: end_loop
63+
; CHECK-NEXT: i32.eq $push13=, $2, $5
64+
; CHECK-NEXT: br_if 1, $pop13 # 1: down to label0
65+
; CHECK-NEXT: .LBB0_7: # %for.body.preheader16
66+
; CHECK-NEXT: end_block # label1:
67+
; CHECK-NEXT: i32.const $push46=, 1
68+
; CHECK-NEXT: i32.or $6=, $5, $pop46
69+
; CHECK-NEXT: block
70+
; CHECK-NEXT: i32.const $push45=, 1
71+
; CHECK-NEXT: i32.and $push14=, $2, $pop45
72+
; CHECK-NEXT: i32.eqz $push64=, $pop14
73+
; CHECK-NEXT: br_if 0, $pop64 # 0: down to label4
74+
; CHECK-NEXT: # %bb.8: # %for.body.prol
75+
; CHECK-NEXT: i32.add $push50=, $0, $5
76+
; CHECK-NEXT: local.tee $push49=, $7=, $pop50
77+
; CHECK-NEXT: i32.load8_u $push17=, 0($7)
78+
; CHECK-NEXT: i32.add $push15=, $1, $5
79+
; CHECK-NEXT: i32.load8_u $push16=, 0($pop15)
80+
; CHECK-NEXT: i32.add $push18=, $pop17, $pop16
81+
; CHECK-NEXT: i32.const $push48=, 1
82+
; CHECK-NEXT: i32.add $push19=, $pop18, $pop48
83+
; CHECK-NEXT: i32.const $push47=, 1
84+
; CHECK-NEXT: i32.shr_u $push20=, $pop19, $pop47
85+
; CHECK-NEXT: i32.store8 0($pop49), $pop20
86+
; CHECK-NEXT: local.copy $5=, $6
87+
; CHECK-NEXT: .LBB0_9: # %for.body.prol.loopexit
88+
; CHECK-NEXT: end_block # label4:
89+
; CHECK-NEXT: i32.eq $push21=, $2, $6
90+
; CHECK-NEXT: br_if 0, $pop21 # 0: down to label0
91+
; CHECK-NEXT: # %bb.10: # %for.body.preheader1
92+
; CHECK-NEXT: i32.add $6=, $0, $5
93+
; CHECK-NEXT: i32.add $7=, $1, $5
94+
; CHECK-NEXT: i32.sub $8=, $2, $5
95+
; CHECK-NEXT: .LBB0_11: # %for.body
96+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
97+
; CHECK-NEXT: loop # label5:
98+
; CHECK-NEXT: i32.load8_u $push23=, 0($6)
99+
; CHECK-NEXT: i32.load8_u $push22=, 0($7)
100+
; CHECK-NEXT: i32.add $push24=, $pop23, $pop22
101+
; CHECK-NEXT: i32.const $push63=, 1
102+
; CHECK-NEXT: i32.add $push25=, $pop24, $pop63
103+
; CHECK-NEXT: i32.const $push62=, 1
104+
; CHECK-NEXT: i32.shr_u $push26=, $pop25, $pop62
105+
; CHECK-NEXT: i32.store8 0($6), $pop26
106+
; CHECK-NEXT: i32.const $push61=, 1
107+
; CHECK-NEXT: i32.add $push60=, $6, $pop61
108+
; CHECK-NEXT: local.tee $push59=, $2=, $pop60
109+
; CHECK-NEXT: i32.load8_u $push27=, 0($2)
110+
; CHECK-NEXT: i32.const $push58=, 1
111+
; CHECK-NEXT: i32.add $push28=, $7, $pop58
112+
; CHECK-NEXT: i32.load8_u $push29=, 0($pop28)
113+
; CHECK-NEXT: i32.add $push30=, $pop27, $pop29
114+
; CHECK-NEXT: i32.const $push57=, 1
115+
; CHECK-NEXT: i32.add $push31=, $pop30, $pop57
116+
; CHECK-NEXT: i32.const $push56=, 1
117+
; CHECK-NEXT: i32.shr_u $push32=, $pop31, $pop56
118+
; CHECK-NEXT: i32.store8 0($pop59), $pop32
119+
; CHECK-NEXT: i32.const $push55=, 2
120+
; CHECK-NEXT: i32.add $6=, $6, $pop55
121+
; CHECK-NEXT: i32.const $push54=, 2
122+
; CHECK-NEXT: i32.add $7=, $7, $pop54
123+
; CHECK-NEXT: i32.const $push53=, -2
124+
; CHECK-NEXT: i32.add $push52=, $8, $pop53
125+
; CHECK-NEXT: local.tee $push51=, $8=, $pop52
126+
; CHECK-NEXT: br_if 0, $pop51 # 0: up to label5
127+
; CHECK-NEXT: .LBB0_12: # %for.cond.cleanup
128+
; CHECK-NEXT: end_loop
129+
; CHECK-NEXT: end_block # label0:
130+
; CHECK-NEXT: return
131+
entry:
132+
%cmp12 = icmp sgt i32 %n, 0
133+
br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup
134+
135+
for.body.preheader:
136+
%min.iters.check = icmp ult i32 %n, 16
137+
br i1 %min.iters.check, label %for.body.preheader16, label %vector.memcheck
138+
139+
vector.memcheck:
140+
%scevgep = getelementptr i8, ptr %x, i32 %n
141+
%scevgep14 = getelementptr i8, ptr %y, i32 %n
142+
%bound0 = icmp ult ptr %x, %scevgep14
143+
%bound1 = icmp ult ptr %y, %scevgep
144+
%found.conflict = and i1 %bound0, %bound1
145+
br i1 %found.conflict, label %for.body.preheader16, label %vector.ph
146+
147+
vector.ph:
148+
%n.vec = and i32 %n, 2147483632
149+
br label %vector.body
150+
151+
vector.body:
152+
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
153+
%0 = getelementptr inbounds nuw i8, ptr %x, i32 %index
154+
%wide.load = load <16 x i8>, ptr %0, align 1
155+
%1 = zext <16 x i8> %wide.load to <16 x i16>
156+
%2 = getelementptr inbounds nuw i8, ptr %y, i32 %index
157+
%wide.load15 = load <16 x i8>, ptr %2, align 1
158+
%3 = zext <16 x i8> %wide.load15 to <16 x i16>
159+
%4 = add nuw nsw <16 x i16> %1, splat (i16 1)
160+
%5 = add nuw nsw <16 x i16> %4, %3
161+
%6 = lshr <16 x i16> %5, splat (i16 1)
162+
%7 = trunc nuw <16 x i16> %6 to <16 x i8>
163+
store <16 x i8> %7, ptr %0, align 1
164+
%index.next = add nuw i32 %index, 16
165+
%8 = icmp eq i32 %index.next, %n.vec
166+
br i1 %8, label %middle.block, label %vector.body
167+
168+
middle.block:
169+
%cmp.n = icmp eq i32 %n, %n.vec
170+
br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader16
171+
172+
for.body.preheader16:
173+
%i.013.ph = phi i32 [ 0, %vector.memcheck ], [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
174+
%.neg = or disjoint i32 %i.013.ph, 1
175+
%xtraiter = and i32 %n, 1
176+
%lcmp.mod.not = icmp eq i32 %xtraiter, 0
177+
br i1 %lcmp.mod.not, label %for.body.prol.loopexit, label %for.body.prol
178+
179+
for.body.prol:
180+
%arrayidx.prol = getelementptr inbounds nuw i8, ptr %x, i32 %i.013.ph
181+
%9 = load i8, ptr %arrayidx.prol, align 1
182+
%conv.prol = zext i8 %9 to i16
183+
%arrayidx1.prol = getelementptr inbounds nuw i8, ptr %y, i32 %i.013.ph
184+
%10 = load i8, ptr %arrayidx1.prol, align 1
185+
%conv2.prol = zext i8 %10 to i16
186+
%add.prol = add nuw nsw i16 %conv.prol, 1
187+
%add3.prol = add nuw nsw i16 %add.prol, %conv2.prol
188+
%div11.prol = lshr i16 %add3.prol, 1
189+
%conv4.prol = trunc nuw i16 %div11.prol to i8
190+
store i8 %conv4.prol, ptr %arrayidx.prol, align 1
191+
%inc.prol = or disjoint i32 %i.013.ph, 1
192+
br label %for.body.prol.loopexit
193+
194+
for.body.prol.loopexit:
195+
%i.013.unr = phi i32 [ %i.013.ph, %for.body.preheader16 ], [ %inc.prol, %for.body.prol ]
196+
%11 = icmp eq i32 %n, %.neg
197+
br i1 %11, label %for.cond.cleanup, label %for.body
198+
199+
for.cond.cleanup:
200+
ret void
201+
202+
for.body:
203+
%i.013 = phi i32 [ %inc.1, %for.body ], [ %i.013.unr, %for.body.prol.loopexit ]
204+
%arrayidx = getelementptr inbounds nuw i8, ptr %x, i32 %i.013
205+
%12 = load i8, ptr %arrayidx, align 1
206+
%conv = zext i8 %12 to i16
207+
%arrayidx1 = getelementptr inbounds nuw i8, ptr %y, i32 %i.013
208+
%13 = load i8, ptr %arrayidx1, align 1
209+
%conv2 = zext i8 %13 to i16
210+
%add = add nuw nsw i16 %conv, 1
211+
%add3 = add nuw nsw i16 %add, %conv2
212+
%div11 = lshr i16 %add3, 1
213+
%conv4 = trunc nuw i16 %div11 to i8
214+
store i8 %conv4, ptr %arrayidx, align 1
215+
%inc = add nuw nsw i32 %i.013, 1
216+
%arrayidx.1 = getelementptr inbounds nuw i8, ptr %x, i32 %inc
217+
%14 = load i8, ptr %arrayidx.1, align 1
218+
%conv.1 = zext i8 %14 to i16
219+
%arrayidx1.1 = getelementptr inbounds nuw i8, ptr %y, i32 %inc
220+
%15 = load i8, ptr %arrayidx1.1, align 1
221+
%conv2.1 = zext i8 %15 to i16
222+
%add.1 = add nuw nsw i16 %conv.1, 1
223+
%add3.1 = add nuw nsw i16 %add.1, %conv2.1
224+
%div11.1 = lshr i16 %add3.1, 1
225+
%conv4.1 = trunc nuw i16 %div11.1 to i8
226+
store i8 %conv4.1, ptr %arrayidx.1, align 1
227+
%inc.1 = add nuw nsw i32 %i.013, 2
228+
%exitcond.not.1 = icmp eq i32 %inc.1, %n
229+
br i1 %exitcond.not.1, label %for.cond.cleanup, label %for.body
230+
}

0 commit comments

Comments
 (0)