Skip to content

Commit fe2ee69

Browse files
committed
Support avgr_u in loop construct
1 parent 4a82aea commit fe2ee69

File tree

3 files changed

+76
-75
lines changed

3 files changed

+76
-75
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
246246
MVT::v2f64})
247247
setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
248248

249+
// Set avgceilu as legal for i8x16 and i16x8
250+
// and isel will convert to AVGR_U w/ tablegen
251+
setOperationAction({ISD::AVGCEILU}, {MVT::v8i16, MVT::v16i8}, Legal);
252+
249253
// Custom lowering since wasm shifts must have a scalar shift amount
250254
for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
251255
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,6 +1144,9 @@ def : Pat<(wasm_shr_u
11441144
(vec.splat (i32 1))),
11451145
(i32 1)),
11461146
(inst $lhs, $rhs)>;
1147+
1148+
def : Pat<(vec.vt(avgceilu(vec.vt V128:$lhs), (vec.vt V128:$rhs))), (inst $lhs,
1149+
$rhs)>;
11471150
}
11481151

11491152
// Widening dot product: i32x4.dot_i16x8_s

llvm/test/CodeGen/WebAssembly/simd-avgr.ll

Lines changed: 69 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define void @f(ptr %x, ptr %y, i32 %n) {
1515
; CHECK-NEXT: i32.lt_s $push1=, $2, $pop0
1616
; CHECK-NEXT: br_if 0, $pop1 # 0: down to label0
1717
; CHECK-NEXT: # %bb.1: # %for.body.preheader
18-
; CHECK-NEXT: i32.const $5=, 0
18+
; CHECK-NEXT: i32.const $3=, 0
1919
; CHECK-NEXT: block
2020
; CHECK-NEXT: i32.const $push2=, 16
2121
; CHECK-NEXT: i32.lt_u $push3=, $2, $pop2
@@ -31,99 +31,93 @@ define void @f(ptr %x, ptr %y, i32 %n) {
3131
; CHECK-NEXT: br_if 1, $pop7 # 1: down to label1
3232
; CHECK-NEXT: .LBB0_4: # %vector.ph
3333
; CHECK-NEXT: end_block # label2:
34-
; CHECK-NEXT: local.copy $6=, $0
35-
; CHECK-NEXT: local.copy $7=, $1
34+
; CHECK-NEXT: local.copy $4=, $0
35+
; CHECK-NEXT: local.copy $5=, $1
3636
; CHECK-NEXT: i32.const $push8=, 2147483632
37-
; CHECK-NEXT: i32.and $push34=, $2, $pop8
38-
; CHECK-NEXT: local.tee $push33=, $5=, $pop34
39-
; CHECK-NEXT: local.copy $8=, $pop33
37+
; CHECK-NEXT: i32.and $push33=, $2, $pop8
38+
; CHECK-NEXT: local.tee $push32=, $3=, $pop33
39+
; CHECK-NEXT: local.copy $6=, $pop32
4040
; CHECK-NEXT: .LBB0_5: # %vector.body
4141
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4242
; CHECK-NEXT: loop # label3:
43-
; CHECK-NEXT: v128.load $push44=, 0($6):p2align=0
44-
; CHECK-NEXT: local.tee $push43=, $4=, $pop44
45-
; CHECK-NEXT: v128.load $push42=, 0($7):p2align=0
46-
; CHECK-NEXT: local.tee $push41=, $3=, $pop42
47-
; CHECK-NEXT: v128.or $push9=, $pop43, $pop41
48-
; CHECK-NEXT: v128.xor $push10=, $4, $3
49-
; CHECK-NEXT: i32.const $push40=, 1
50-
; CHECK-NEXT: i8x16.shr_u $push11=, $pop10, $pop40
51-
; CHECK-NEXT: i8x16.sub $push12=, $pop9, $pop11
52-
; CHECK-NEXT: v128.store 0($6):p2align=0, $pop12
53-
; CHECK-NEXT: i32.const $push39=, 16
54-
; CHECK-NEXT: i32.add $6=, $6, $pop39
43+
; CHECK-NEXT: v128.load $push10=, 0($4):p2align=0
44+
; CHECK-NEXT: v128.load $push9=, 0($5):p2align=0
45+
; CHECK-NEXT: i8x16.avgr_u $push11=, $pop10, $pop9
46+
; CHECK-NEXT: v128.store 0($4):p2align=0, $pop11
5547
; CHECK-NEXT: i32.const $push38=, 16
56-
; CHECK-NEXT: i32.add $7=, $7, $pop38
57-
; CHECK-NEXT: i32.const $push37=, -16
58-
; CHECK-NEXT: i32.add $push36=, $8, $pop37
59-
; CHECK-NEXT: local.tee $push35=, $8=, $pop36
60-
; CHECK-NEXT: br_if 0, $pop35 # 0: up to label3
48+
; CHECK-NEXT: i32.add $4=, $4, $pop38
49+
; CHECK-NEXT: i32.const $push37=, 16
50+
; CHECK-NEXT: i32.add $5=, $5, $pop37
51+
; CHECK-NEXT: i32.const $push36=, -16
52+
; CHECK-NEXT: i32.add $push35=, $6, $pop36
53+
; CHECK-NEXT: local.tee $push34=, $6=, $pop35
54+
; CHECK-NEXT: br_if 0, $pop34 # 0: up to label3
6155
; CHECK-NEXT: # %bb.6: # %middle.block
6256
; CHECK-NEXT: end_loop
63-
; CHECK-NEXT: i32.eq $push13=, $2, $5
64-
; CHECK-NEXT: br_if 1, $pop13 # 1: down to label0
57+
; CHECK-NEXT: i32.eq $push12=, $2, $3
58+
; CHECK-NEXT: br_if 1, $pop12 # 1: down to label0
6559
; CHECK-NEXT: .LBB0_7: # %for.body.preheader16
6660
; CHECK-NEXT: end_block # label1:
67-
; CHECK-NEXT: i32.const $push46=, 1
68-
; CHECK-NEXT: i32.or $6=, $5, $pop46
61+
; CHECK-NEXT: i32.const $push40=, 1
62+
; CHECK-NEXT: i32.or $4=, $3, $pop40
6963
; CHECK-NEXT: block
70-
; CHECK-NEXT: i32.const $push45=, 1
71-
; CHECK-NEXT: i32.and $push14=, $2, $pop45
72-
; CHECK-NEXT: i32.eqz $push64=, $pop14
73-
; CHECK-NEXT: br_if 0, $pop64 # 0: down to label4
64+
; CHECK-NEXT: i32.const $push39=, 1
65+
; CHECK-NEXT: i32.and $push13=, $2, $pop39
66+
; CHECK-NEXT: i32.eqz $push58=, $pop13
67+
; CHECK-NEXT: br_if 0, $pop58 # 0: down to label4
7468
; CHECK-NEXT: # %bb.8: # %for.body.prol
75-
; CHECK-NEXT: i32.add $push50=, $0, $5
76-
; CHECK-NEXT: local.tee $push49=, $7=, $pop50
77-
; CHECK-NEXT: i32.load8_u $push17=, 0($7)
78-
; CHECK-NEXT: i32.add $push15=, $1, $5
79-
; CHECK-NEXT: i32.load8_u $push16=, 0($pop15)
80-
; CHECK-NEXT: i32.add $push18=, $pop17, $pop16
81-
; CHECK-NEXT: i32.const $push48=, 1
82-
; CHECK-NEXT: i32.add $push19=, $pop18, $pop48
83-
; CHECK-NEXT: i32.const $push47=, 1
84-
; CHECK-NEXT: i32.shr_u $push20=, $pop19, $pop47
85-
; CHECK-NEXT: i32.store8 0($pop49), $pop20
86-
; CHECK-NEXT: local.copy $5=, $6
69+
; CHECK-NEXT: i32.add $push44=, $0, $3
70+
; CHECK-NEXT: local.tee $push43=, $5=, $pop44
71+
; CHECK-NEXT: i32.load8_u $push16=, 0($5)
72+
; CHECK-NEXT: i32.add $push14=, $1, $3
73+
; CHECK-NEXT: i32.load8_u $push15=, 0($pop14)
74+
; CHECK-NEXT: i32.add $push17=, $pop16, $pop15
75+
; CHECK-NEXT: i32.const $push42=, 1
76+
; CHECK-NEXT: i32.add $push18=, $pop17, $pop42
77+
; CHECK-NEXT: i32.const $push41=, 1
78+
; CHECK-NEXT: i32.shr_u $push19=, $pop18, $pop41
79+
; CHECK-NEXT: i32.store8 0($pop43), $pop19
80+
; CHECK-NEXT: local.copy $3=, $4
8781
; CHECK-NEXT: .LBB0_9: # %for.body.prol.loopexit
8882
; CHECK-NEXT: end_block # label4:
89-
; CHECK-NEXT: i32.eq $push21=, $2, $6
90-
; CHECK-NEXT: br_if 0, $pop21 # 0: down to label0
83+
; CHECK-NEXT: i32.eq $push20=, $2, $4
84+
; CHECK-NEXT: br_if 0, $pop20 # 0: down to label0
9185
; CHECK-NEXT: # %bb.10: # %for.body.preheader1
92-
; CHECK-NEXT: i32.add $6=, $0, $5
93-
; CHECK-NEXT: i32.add $7=, $1, $5
94-
; CHECK-NEXT: i32.sub $8=, $2, $5
86+
; CHECK-NEXT: i32.add $4=, $0, $3
87+
; CHECK-NEXT: i32.add $5=, $1, $3
88+
; CHECK-NEXT: i32.sub $6=, $2, $3
9589
; CHECK-NEXT: .LBB0_11: # %for.body
9690
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
9791
; CHECK-NEXT: loop # label5:
98-
; CHECK-NEXT: i32.load8_u $push23=, 0($6)
99-
; CHECK-NEXT: i32.load8_u $push22=, 0($7)
100-
; CHECK-NEXT: i32.add $push24=, $pop23, $pop22
101-
; CHECK-NEXT: i32.const $push63=, 1
102-
; CHECK-NEXT: i32.add $push25=, $pop24, $pop63
103-
; CHECK-NEXT: i32.const $push62=, 1
104-
; CHECK-NEXT: i32.shr_u $push26=, $pop25, $pop62
105-
; CHECK-NEXT: i32.store8 0($6), $pop26
106-
; CHECK-NEXT: i32.const $push61=, 1
107-
; CHECK-NEXT: i32.add $push60=, $6, $pop61
108-
; CHECK-NEXT: local.tee $push59=, $2=, $pop60
109-
; CHECK-NEXT: i32.load8_u $push27=, 0($2)
110-
; CHECK-NEXT: i32.const $push58=, 1
111-
; CHECK-NEXT: i32.add $push28=, $7, $pop58
112-
; CHECK-NEXT: i32.load8_u $push29=, 0($pop28)
113-
; CHECK-NEXT: i32.add $push30=, $pop27, $pop29
92+
; CHECK-NEXT: i32.load8_u $push22=, 0($4)
93+
; CHECK-NEXT: i32.load8_u $push21=, 0($5)
94+
; CHECK-NEXT: i32.add $push23=, $pop22, $pop21
11495
; CHECK-NEXT: i32.const $push57=, 1
115-
; CHECK-NEXT: i32.add $push31=, $pop30, $pop57
96+
; CHECK-NEXT: i32.add $push24=, $pop23, $pop57
11697
; CHECK-NEXT: i32.const $push56=, 1
117-
; CHECK-NEXT: i32.shr_u $push32=, $pop31, $pop56
118-
; CHECK-NEXT: i32.store8 0($pop59), $pop32
119-
; CHECK-NEXT: i32.const $push55=, 2
120-
; CHECK-NEXT: i32.add $6=, $6, $pop55
121-
; CHECK-NEXT: i32.const $push54=, 2
122-
; CHECK-NEXT: i32.add $7=, $7, $pop54
123-
; CHECK-NEXT: i32.const $push53=, -2
124-
; CHECK-NEXT: i32.add $push52=, $8, $pop53
125-
; CHECK-NEXT: local.tee $push51=, $8=, $pop52
126-
; CHECK-NEXT: br_if 0, $pop51 # 0: up to label5
98+
; CHECK-NEXT: i32.shr_u $push25=, $pop24, $pop56
99+
; CHECK-NEXT: i32.store8 0($4), $pop25
100+
; CHECK-NEXT: i32.const $push55=, 1
101+
; CHECK-NEXT: i32.add $push54=, $4, $pop55
102+
; CHECK-NEXT: local.tee $push53=, $2=, $pop54
103+
; CHECK-NEXT: i32.load8_u $push26=, 0($2)
104+
; CHECK-NEXT: i32.const $push52=, 1
105+
; CHECK-NEXT: i32.add $push27=, $5, $pop52
106+
; CHECK-NEXT: i32.load8_u $push28=, 0($pop27)
107+
; CHECK-NEXT: i32.add $push29=, $pop26, $pop28
108+
; CHECK-NEXT: i32.const $push51=, 1
109+
; CHECK-NEXT: i32.add $push30=, $pop29, $pop51
110+
; CHECK-NEXT: i32.const $push50=, 1
111+
; CHECK-NEXT: i32.shr_u $push31=, $pop30, $pop50
112+
; CHECK-NEXT: i32.store8 0($pop53), $pop31
113+
; CHECK-NEXT: i32.const $push49=, 2
114+
; CHECK-NEXT: i32.add $4=, $4, $pop49
115+
; CHECK-NEXT: i32.const $push48=, 2
116+
; CHECK-NEXT: i32.add $5=, $5, $pop48
117+
; CHECK-NEXT: i32.const $push47=, -2
118+
; CHECK-NEXT: i32.add $push46=, $6, $pop47
119+
; CHECK-NEXT: local.tee $push45=, $6=, $pop46
120+
; CHECK-NEXT: br_if 0, $pop45 # 0: up to label5
127121
; CHECK-NEXT: .LBB0_12: # %for.cond.cleanup
128122
; CHECK-NEXT: end_loop
129123
; CHECK-NEXT: end_block # label0:

0 commit comments

Comments
 (0)