@@ -103,18 +103,16 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) {
103103define <32 x i1 > @fv32 (ptr %p , i64 %index , i64 %tc ) {
104104; CHECK-LABEL: fv32:
105105; CHECK: # %bb.0:
106- ; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
107- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0)
108106; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
109- ; CHECK-NEXT: vle8.v v16, (a0)
110107; CHECK-NEXT: vid.v v8
108+ ; CHECK-NEXT: li a0, 16
109+ ; CHECK-NEXT: vadd.vx v16, v8, a0
111110; CHECK-NEXT: vsaddu.vx v8, v8, a1
111+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
112+ ; CHECK-NEXT: vmsltu.vx v24, v16, a2
112113; CHECK-NEXT: vmsltu.vx v0, v8, a2
113- ; CHECK-NEXT: vsext.vf8 v8, v16
114- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
115- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
116114; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
117- ; CHECK-NEXT: vslideup.vi v0, v16 , 2
115+ ; CHECK-NEXT: vslideup.vi v0, v24 , 2
118116; CHECK-NEXT: ret
119117 %mask = call <32 x i1 > @llvm.get.active.lane.mask.v32i1.i64 (i64 %index , i64 %tc )
120118 ret <32 x i1 > %mask
@@ -125,30 +123,24 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
125123; CHECK: # %bb.0:
126124; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
127125; CHECK-NEXT: vid.v v8
128- ; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
129- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
130- ; CHECK-NEXT: vle8.v v16, (a0)
131- ; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
132- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
133- ; CHECK-NEXT: vle8.v v17, (a0)
134- ; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
135- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
136- ; CHECK-NEXT: vsaddu.vx v8, v8 , a1
137- ; CHECK-NEXT: vle8.v v18, (a0)
138- ; CHECK-NEXT: vmsltu.vx v0, v8, a2
139- ; CHECK-NEXT: vsext.vf8 v8, v16
126+ ; CHECK-NEXT: li a0, 16
127+ ; CHECK-NEXT: vsaddu.vx v16, v8, a1
128+ ; CHECK-NEXT: vmsltu.vx v0, v16, a2
129+ ; CHECK-NEXT: vadd.vx v16, v8, a0
130+ ; CHECK-NEXT: li a0, 32
131+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
132+ ; CHECK-NEXT: vmsltu.vx v24, v16, a2
133+ ; CHECK-NEXT: vadd.vx v16, v8, a0
134+ ; CHECK-NEXT: vsaddu.vx v16, v16 , a1
135+ ; CHECK-NEXT: vmsltu.vx v25, v16, a2
136+ ; CHECK-NEXT: li a0, 48
137+ ; CHECK-NEXT: vadd.vx v8, v8, a0
140138; CHECK-NEXT: vsaddu.vx v8, v8, a1
141- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
142- ; CHECK-NEXT: vsext.vf8 v8, v17
143- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
144- ; CHECK-NEXT: vmsltu.vx v17, v8, a2
145139; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
146- ; CHECK-NEXT: vslideup.vi v0, v16 , 2
140+ ; CHECK-NEXT: vslideup.vi v0, v24 , 2
147141; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
148- ; CHECK-NEXT: vslideup.vi v0, v17 , 4
142+ ; CHECK-NEXT: vslideup.vi v0, v25 , 4
149143; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
150- ; CHECK-NEXT: vsext.vf8 v8, v18
151- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
152144; CHECK-NEXT: vmsltu.vx v16, v8, a2
153145; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
154146; CHECK-NEXT: vslideup.vi v0, v16, 6
@@ -160,63 +152,49 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
160152define <128 x i1 > @fv128 (ptr %p , i64 %index , i64 %tc ) {
161153; CHECK-LABEL: fv128:
162154; CHECK: # %bb.0:
163- ; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
164- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0)
165155; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
166- ; CHECK-NEXT: vle8.v v16, (a0)
167- ; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
168- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_1)
169- ; CHECK-NEXT: vle8.v v17, (a0)
170- ; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
171- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
172- ; CHECK-NEXT: vle8.v v18, (a0)
173- ; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
174- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
175156; CHECK-NEXT: vid.v v8
176- ; CHECK-NEXT: vle8.v v19, (a0)
177- ; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
178- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
179- ; CHECK-NEXT: vle8.v v20, (a0)
180- ; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
181- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
182- ; CHECK-NEXT: vle8.v v21, (a0)
183- ; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
184- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
185- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
186- ; CHECK-NEXT: vle8.v v22, (a0)
187- ; CHECK-NEXT: vmsltu.vx v0, v8, a2
188- ; CHECK-NEXT: vsext.vf8 v8, v16
157+ ; CHECK-NEXT: li a0, 80
158+ ; CHECK-NEXT: vsaddu.vx v16, v8, a1
159+ ; CHECK-NEXT: vmsltu.vx v0, v16, a2
160+ ; CHECK-NEXT: vadd.vx v16, v8, a0
161+ ; CHECK-NEXT: li a0, 64
162+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
163+ ; CHECK-NEXT: vmsltu.vx v24, v16, a2
164+ ; CHECK-NEXT: vadd.vx v16, v8, a0
165+ ; CHECK-NEXT: li a0, 96
166+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
167+ ; CHECK-NEXT: vmsltu.vx v25, v16, a2
168+ ; CHECK-NEXT: vadd.vx v16, v8, a0
169+ ; CHECK-NEXT: li a0, 112
170+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
171+ ; CHECK-NEXT: vmsltu.vx v26, v16, a2
172+ ; CHECK-NEXT: vadd.vx v16, v8, a0
173+ ; CHECK-NEXT: li a0, 16
174+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
175+ ; CHECK-NEXT: vmsltu.vx v27, v16, a2
176+ ; CHECK-NEXT: vadd.vx v16, v8, a0
177+ ; CHECK-NEXT: li a0, 32
178+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
179+ ; CHECK-NEXT: vmsltu.vx v28, v16, a2
180+ ; CHECK-NEXT: vadd.vx v16, v8, a0
181+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
182+ ; CHECK-NEXT: vmsltu.vx v29, v16, a2
183+ ; CHECK-NEXT: li a0, 48
184+ ; CHECK-NEXT: vadd.vx v8, v8, a0
189185; CHECK-NEXT: vsaddu.vx v8, v8, a1
190186; CHECK-NEXT: vmsltu.vx v16, v8, a2
191- ; CHECK-NEXT: vsext.vf8 v8, v17
192- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
193- ; CHECK-NEXT: vmsltu.vx v17, v8, a2
194- ; CHECK-NEXT: vsext.vf8 v8, v18
195- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
196- ; CHECK-NEXT: vmsltu.vx v18, v8, a2
197- ; CHECK-NEXT: vsext.vf8 v8, v19
198- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
199- ; CHECK-NEXT: vmsltu.vx v19, v8, a2
200- ; CHECK-NEXT: vsext.vf8 v8, v20
201- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
202- ; CHECK-NEXT: vmsltu.vx v20, v8, a2
203- ; CHECK-NEXT: vsext.vf8 v8, v21
204- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
205- ; CHECK-NEXT: vmsltu.vx v21, v8, a2
206- ; CHECK-NEXT: vsext.vf8 v8, v22
207- ; CHECK-NEXT: vsaddu.vx v8, v8, a1
208- ; CHECK-NEXT: vmsltu.vx v22, v8, a2
209187; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
210- ; CHECK-NEXT: vslideup.vi v17, v16 , 2
211- ; CHECK-NEXT: vslideup.vi v0, v20 , 2
188+ ; CHECK-NEXT: vslideup.vi v25, v24 , 2
189+ ; CHECK-NEXT: vslideup.vi v0, v28 , 2
212190; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
213- ; CHECK-NEXT: vslideup.vi v17, v18 , 4
214- ; CHECK-NEXT: vslideup.vi v0, v21 , 4
191+ ; CHECK-NEXT: vslideup.vi v25, v26 , 4
192+ ; CHECK-NEXT: vslideup.vi v0, v29 , 4
215193; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
216- ; CHECK-NEXT: vslideup.vi v17, v19 , 6
217- ; CHECK-NEXT: vslideup.vi v0, v22 , 6
194+ ; CHECK-NEXT: vslideup.vi v25, v27 , 6
195+ ; CHECK-NEXT: vslideup.vi v0, v16 , 6
218196; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
219- ; CHECK-NEXT: vslideup.vi v0, v17 , 8
197+ ; CHECK-NEXT: vslideup.vi v0, v25 , 8
220198; CHECK-NEXT: ret
221199 %mask = call <128 x i1 > @llvm.get.active.lane.mask.v128i1.i64 (i64 %index , i64 %tc )
222200 ret <128 x i1 > %mask
0 commit comments