Skip to content

Commit ab12f92

Browse files
fixup! add instructions to isVectorOpUsedAsScalarOp
1 parent 68cd004 commit ab12f92

File tree

4 files changed

+64
-41
lines changed

4 files changed

+64
-41
lines changed

llvm/test/CodeGen/RISCV/double_reduct.ll

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ define float @fmul_f32(<4 x float> %a, <4 x float> %b) {
2525
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2626
; CHECK-NEXT: vslidedown.vi v10, v8, 2
2727
; CHECK-NEXT: vfmul.vv v8, v8, v10
28+
; CHECK-NEXT: vslidedown.vi v10, v9, 2
29+
; CHECK-NEXT: vfmul.vv v9, v9, v10
2830
; CHECK-NEXT: vrgather.vi v10, v8, 1
2931
; CHECK-NEXT: vfmul.vv v8, v8, v10
32+
; CHECK-NEXT: vrgather.vi v10, v9, 1
33+
; CHECK-NEXT: vfmul.vv v9, v9, v10
3034
; CHECK-NEXT: vfmv.f.s fa5, v8
31-
; CHECK-NEXT: vslidedown.vi v8, v9, 2
32-
; CHECK-NEXT: vfmul.vv v8, v9, v8
33-
; CHECK-NEXT: vrgather.vi v9, v8, 1
34-
; CHECK-NEXT: vfmul.vv v8, v8, v9
35-
; CHECK-NEXT: vfmv.f.s fa4, v8
35+
; CHECK-NEXT: vfmv.f.s fa4, v9
3636
; CHECK-NEXT: fmul.s fa0, fa5, fa4
3737
; CHECK-NEXT: ret
3838
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
@@ -130,14 +130,14 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
130130
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
131131
; RV32-NEXT: vslidedown.vi v10, v8, 2
132132
; RV32-NEXT: vmul.vv v8, v8, v10
133+
; RV32-NEXT: vslidedown.vi v10, v9, 2
134+
; RV32-NEXT: vmul.vv v9, v9, v10
133135
; RV32-NEXT: vrgather.vi v10, v8, 1
134136
; RV32-NEXT: vmul.vv v8, v8, v10
137+
; RV32-NEXT: vrgather.vi v10, v9, 1
138+
; RV32-NEXT: vmul.vv v9, v9, v10
135139
; RV32-NEXT: vmv.x.s a0, v8
136-
; RV32-NEXT: vslidedown.vi v8, v9, 2
137-
; RV32-NEXT: vmul.vv v8, v9, v8
138-
; RV32-NEXT: vrgather.vi v9, v8, 1
139-
; RV32-NEXT: vmul.vv v8, v8, v9
140-
; RV32-NEXT: vmv.x.s a1, v8
140+
; RV32-NEXT: vmv.x.s a1, v9
141141
; RV32-NEXT: mul a0, a0, a1
142142
; RV32-NEXT: ret
143143
;
@@ -146,14 +146,14 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
146146
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
147147
; RV64-NEXT: vslidedown.vi v10, v8, 2
148148
; RV64-NEXT: vmul.vv v8, v8, v10
149+
; RV64-NEXT: vslidedown.vi v10, v9, 2
150+
; RV64-NEXT: vmul.vv v9, v9, v10
149151
; RV64-NEXT: vrgather.vi v10, v8, 1
150152
; RV64-NEXT: vmul.vv v8, v8, v10
153+
; RV64-NEXT: vrgather.vi v10, v9, 1
154+
; RV64-NEXT: vmul.vv v9, v9, v10
151155
; RV64-NEXT: vmv.x.s a0, v8
152-
; RV64-NEXT: vslidedown.vi v8, v9, 2
153-
; RV64-NEXT: vmul.vv v8, v9, v8
154-
; RV64-NEXT: vrgather.vi v9, v8, 1
155-
; RV64-NEXT: vmul.vv v8, v8, v9
156-
; RV64-NEXT: vmv.x.s a1, v8
156+
; RV64-NEXT: vmv.x.s a1, v9
157157
; RV64-NEXT: mulw a0, a0, a1
158158
; RV64-NEXT: ret
159159
%r1 = call i32 @llvm.vector.reduce.mul.i32.v4i32(<4 x i32> %a)
@@ -165,8 +165,9 @@ define i32 @mul_i32(<4 x i32> %a, <4 x i32> %b) {
165165
define i32 @and_i32(<4 x i32> %a, <4 x i32> %b) {
166166
; CHECK-LABEL: and_i32:
167167
; CHECK: # %bb.0:
168-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
168+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
169169
; CHECK-NEXT: vand.vv v8, v8, v9
170+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
170171
; CHECK-NEXT: vredand.vs v8, v8, v8
171172
; CHECK-NEXT: vmv.x.s a0, v8
172173
; CHECK-NEXT: ret
@@ -179,8 +180,9 @@ define i32 @and_i32(<4 x i32> %a, <4 x i32> %b) {
179180
define i32 @or_i32(<4 x i32> %a, <4 x i32> %b) {
180181
; CHECK-LABEL: or_i32:
181182
; CHECK: # %bb.0:
182-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
183+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
183184
; CHECK-NEXT: vor.vv v8, v8, v9
185+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
184186
; CHECK-NEXT: vredor.vs v8, v8, v8
185187
; CHECK-NEXT: vmv.x.s a0, v8
186188
; CHECK-NEXT: ret
@@ -208,8 +210,9 @@ define i32 @xor_i32(<4 x i32> %a, <4 x i32> %b) {
208210
define i32 @umin_i32(<4 x i32> %a, <4 x i32> %b) {
209211
; CHECK-LABEL: umin_i32:
210212
; CHECK: # %bb.0:
211-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
213+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
212214
; CHECK-NEXT: vminu.vv v8, v8, v9
215+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
213216
; CHECK-NEXT: vredminu.vs v8, v8, v8
214217
; CHECK-NEXT: vmv.x.s a0, v8
215218
; CHECK-NEXT: ret
@@ -222,8 +225,9 @@ define i32 @umin_i32(<4 x i32> %a, <4 x i32> %b) {
222225
define i32 @umax_i32(<4 x i32> %a, <4 x i32> %b) {
223226
; CHECK-LABEL: umax_i32:
224227
; CHECK: # %bb.0:
225-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
228+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
226229
; CHECK-NEXT: vmaxu.vv v8, v8, v9
230+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
227231
; CHECK-NEXT: vredmaxu.vs v8, v8, v8
228232
; CHECK-NEXT: vmv.x.s a0, v8
229233
; CHECK-NEXT: ret
@@ -236,8 +240,9 @@ define i32 @umax_i32(<4 x i32> %a, <4 x i32> %b) {
236240
define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) {
237241
; CHECK-LABEL: smin_i32:
238242
; CHECK: # %bb.0:
239-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
243+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
240244
; CHECK-NEXT: vmin.vv v8, v8, v9
245+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
241246
; CHECK-NEXT: vredmin.vs v8, v8, v8
242247
; CHECK-NEXT: vmv.x.s a0, v8
243248
; CHECK-NEXT: ret
@@ -250,8 +255,9 @@ define i32 @smin_i32(<4 x i32> %a, <4 x i32> %b) {
250255
define i32 @smax_i32(<4 x i32> %a, <4 x i32> %b) {
251256
; CHECK-LABEL: smax_i32:
252257
; CHECK: # %bb.0:
253-
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
258+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
254259
; CHECK-NEXT: vmax.vv v8, v8, v9
260+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
255261
; CHECK-NEXT: vredmax.vs v8, v8, v8
256262
; CHECK-NEXT: vmv.x.s a0, v8
257263
; CHECK-NEXT: ret

llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
1212
; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1313
; RV32-NEXT: vmv.v.i v8, 0
1414
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
15+
; RV32-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1516
; RV32-NEXT: vid.v v9
1617
; RV32-NEXT: vrsub.vi v9, v9, 4
1718
; RV32-NEXT: vand.vv v8, v8, v9
19+
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
1820
; RV32-NEXT: vredmaxu.vs v8, v8, v8
1921
; RV32-NEXT: vmv.x.s a0, v8
2022
; RV32-NEXT: li a1, 4
@@ -29,9 +31,11 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
2931
; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
3032
; RV64-NEXT: vmv.v.i v8, 0
3133
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
34+
; RV64-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3235
; RV64-NEXT: vid.v v9
3336
; RV64-NEXT: vrsub.vi v9, v9, 4
3437
; RV64-NEXT: vand.vv v8, v8, v9
38+
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
3539
; RV64-NEXT: vredmaxu.vs v8, v8, v8
3640
; RV64-NEXT: vmv.x.s a0, v8
3741
; RV64-NEXT: li a1, 4

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
437437
; RV32-NEXT: vslidedown.vi v9, v8, 1
438438
; RV32-NEXT: vmv.x.s a1, v9
439439
; RV32-NEXT: vslidedown.vi v9, v8, 2
440-
; RV32-NEXT: vmv.x.s a2, v9
441440
; RV32-NEXT: vslidedown.vi v8, v8, 3
441+
; RV32-NEXT: vmv.x.s a2, v9
442442
; RV32-NEXT: vmv.x.s a3, v8
443443
; RV32-NEXT: add a1, a1, a2
444444
; RV32-NEXT: add a0, a0, a3
@@ -452,8 +452,8 @@ define i32 @reduce_sum_4xi32_reduce_order(<4 x i32> %v) {
452452
; RV64-NEXT: vslidedown.vi v9, v8, 1
453453
; RV64-NEXT: vmv.x.s a1, v9
454454
; RV64-NEXT: vslidedown.vi v9, v8, 2
455-
; RV64-NEXT: vmv.x.s a2, v9
456455
; RV64-NEXT: vslidedown.vi v8, v8, 3
456+
; RV64-NEXT: vmv.x.s a2, v9
457457
; RV64-NEXT: vmv.x.s a3, v8
458458
; RV64-NEXT: add a1, a1, a2
459459
; RV64-NEXT: add a0, a0, a3
@@ -530,7 +530,7 @@ define i32 @reduce_and_16xi32_prefix5(ptr %p) {
530530
; CHECK: # %bb.0:
531531
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
532532
; CHECK-NEXT: vle32.v v8, (a0)
533-
; CHECK-NEXT: vsetivli zero, 5, e32, m1, ta, ma
533+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
534534
; CHECK-NEXT: vmv.v.i v10, -1
535535
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
536536
; CHECK-NEXT: vredand.vs v8, v8, v10
@@ -725,7 +725,7 @@ define i32 @reduce_umin_16xi32_prefix5(ptr %p) {
725725
; RV32: # %bb.0:
726726
; RV32-NEXT: vsetivli zero, 5, e32, m2, ta, ma
727727
; RV32-NEXT: vle32.v v8, (a0)
728-
; RV32-NEXT: vsetivli zero, 5, e32, m1, ta, ma
728+
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
729729
; RV32-NEXT: vmv.v.i v10, -1
730730
; RV32-NEXT: vsetivli zero, 5, e32, m2, ta, ma
731731
; RV32-NEXT: vredminu.vs v8, v8, v10
@@ -799,8 +799,11 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
799799
define float @reduce_fadd_2xf32_non_associative(ptr %p) {
800800
; CHECK-LABEL: reduce_fadd_2xf32_non_associative:
801801
; CHECK: # %bb.0:
802-
; CHECK-NEXT: flw fa5, 0(a0)
803-
; CHECK-NEXT: flw fa4, 4(a0)
802+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
803+
; CHECK-NEXT: vle32.v v8, (a0)
804+
; CHECK-NEXT: vfmv.f.s fa5, v8
805+
; CHECK-NEXT: vslidedown.vi v8, v8, 1
806+
; CHECK-NEXT: vfmv.f.s fa4, v8
804807
; CHECK-NEXT: fadd.s fa0, fa5, fa4
805808
; CHECK-NEXT: ret
806809
%v = load <2 x float>, ptr %p, align 256
@@ -832,8 +835,11 @@ define float @reduce_fadd_2xf32_reassoc_only(ptr %p) {
832835
define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
833836
; CHECK-LABEL: reduce_fadd_2xf32_ninf_only:
834837
; CHECK: # %bb.0:
835-
; CHECK-NEXT: flw fa5, 0(a0)
836-
; CHECK-NEXT: flw fa4, 4(a0)
838+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
839+
; CHECK-NEXT: vle32.v v8, (a0)
840+
; CHECK-NEXT: vfmv.f.s fa5, v8
841+
; CHECK-NEXT: vslidedown.vi v8, v8, 1
842+
; CHECK-NEXT: vfmv.f.s fa4, v8
837843
; CHECK-NEXT: fadd.s fa0, fa5, fa4
838844
; CHECK-NEXT: ret
839845
%v = load <2 x float>, ptr %p, align 256
@@ -848,13 +854,15 @@ define float @reduce_fadd_2xf32_ninf_only(ptr %p) {
848854
define float @reduce_fadd_4xi32_non_associative(ptr %p) {
849855
; CHECK-LABEL: reduce_fadd_4xi32_non_associative:
850856
; CHECK: # %bb.0:
851-
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
857+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
852858
; CHECK-NEXT: vle32.v v8, (a0)
853-
; CHECK-NEXT: flw fa5, 12(a0)
854859
; CHECK-NEXT: lui a0, 524288
855860
; CHECK-NEXT: vmv.s.x v9, a0
856-
; CHECK-NEXT: vfredusum.vs v8, v8, v9
857-
; CHECK-NEXT: vfmv.f.s fa4, v8
861+
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
862+
; CHECK-NEXT: vfredusum.vs v9, v8, v9
863+
; CHECK-NEXT: vslidedown.vi v8, v8, 3
864+
; CHECK-NEXT: vfmv.f.s fa5, v8
865+
; CHECK-NEXT: vfmv.f.s fa4, v9
858866
; CHECK-NEXT: fadd.s fa0, fa4, fa5
859867
; CHECK-NEXT: ret
860868
%v = load <4 x float>, ptr %p, align 256
@@ -873,10 +881,15 @@ define float @reduce_fadd_4xi32_non_associative(ptr %p) {
873881
define float @reduce_fadd_4xi32_non_associative2(ptr %p) {
874882
; CHECK-LABEL: reduce_fadd_4xi32_non_associative2:
875883
; CHECK: # %bb.0:
876-
; CHECK-NEXT: flw fa5, 0(a0)
877-
; CHECK-NEXT: flw fa4, 4(a0)
878-
; CHECK-NEXT: flw fa3, 8(a0)
879-
; CHECK-NEXT: flw fa2, 12(a0)
884+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
885+
; CHECK-NEXT: vle32.v v8, (a0)
886+
; CHECK-NEXT: vfmv.f.s fa5, v8
887+
; CHECK-NEXT: vslidedown.vi v9, v8, 1
888+
; CHECK-NEXT: vfmv.f.s fa4, v9
889+
; CHECK-NEXT: vslidedown.vi v9, v8, 2
890+
; CHECK-NEXT: vslidedown.vi v8, v8, 3
891+
; CHECK-NEXT: vfmv.f.s fa3, v9
892+
; CHECK-NEXT: vfmv.f.s fa2, v8
880893
; CHECK-NEXT: fadd.s fa5, fa5, fa4
881894
; CHECK-NEXT: fadd.s fa4, fa3, fa2
882895
; CHECK-NEXT: fadd.s fa0, fa5, fa4

llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ entry:
1818
define i64 @reduce_add2(<4 x i64> %v) {
1919
; CHECK-LABEL: reduce_add2:
2020
; CHECK: # %bb.0: # %entry
21-
; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
21+
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2222
; CHECK-NEXT: vmv.v.i v10, 8
2323
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2424
; CHECK-NEXT: vredsum.vs v8, v8, v10
@@ -282,10 +282,10 @@ define float @reduce_fadd4(float %x, float %y, <4 x float> %v, <4 x float> %w) {
282282
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
283283
; CHECK-NEXT: vfmv.s.f v10, fa0
284284
; CHECK-NEXT: vfredusum.vs v8, v8, v10
285+
; CHECK-NEXT: vfmv.s.f v10, fa1
286+
; CHECK-NEXT: vfredusum.vs v9, v9, v10
285287
; CHECK-NEXT: vfmv.f.s fa5, v8
286-
; CHECK-NEXT: vfmv.s.f v8, fa1
287-
; CHECK-NEXT: vfredusum.vs v8, v9, v8
288-
; CHECK-NEXT: vfmv.f.s fa4, v8
288+
; CHECK-NEXT: vfmv.f.s fa4, v9
289289
; CHECK-NEXT: fdiv.s fa0, fa5, fa4
290290
; CHECK-NEXT: ret
291291
entry:

0 commit comments

Comments
 (0)