Skip to content

Commit 32d8de5

Browse files
committed
fixup! Address review comments
1 parent 3688672 commit 32d8de5

File tree

2 files changed

+89
-2
lines changed

2 files changed

+89
-2
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -644,9 +644,10 @@ static Value *getMask(Value *WideMask, unsigned Factor) {
644644
SmallVector<Instruction *, 8> DeadInsts;
645645
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
646646
assert(!Operands.empty());
647+
Value *FirstOp = Operands[0];
647648
if (Operands.size() == Factor &&
648-
std::equal(Operands.begin(), Operands.end(), Operands.begin()))
649-
return Operands.front();
649+
llvm::all_of(Operands, [=](Value *Op) { return Op == FirstOp; }))
650+
return FirstOp;
650651
}
651652
}
652653
if (match(WideMask, m_AllOnes()))

llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,5 +666,91 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @not_scalable_vectors(ptr %p
666666
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3
667667
}
668668

669+
define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1> %mask0, <vscale x 2 x i1> %mask1, ptr %ptr, i32 %rvl) {
670+
; RV32-LABEL: not_same_mask:
671+
; RV32: # %bb.0:
672+
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
673+
; RV32-NEXT: vmv1r.v v9, v0
674+
; RV32-NEXT: vmv1r.v v0, v8
675+
; RV32-NEXT: vmv.v.i v8, 0
676+
; RV32-NEXT: li a2, -1
677+
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
678+
; RV32-NEXT: vmv.v.i v10, 0
679+
; RV32-NEXT: csrr a3, vlenb
680+
; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
681+
; RV32-NEXT: vmerge.vim v11, v8, 1, v0
682+
; RV32-NEXT: vmv1r.v v0, v9
683+
; RV32-NEXT: vmerge.vim v9, v8, 1, v0
684+
; RV32-NEXT: srli a3, a3, 2
685+
; RV32-NEXT: vwaddu.vv v12, v9, v11
686+
; RV32-NEXT: vwmaccu.vx v12, a2, v11
687+
; RV32-NEXT: vmsne.vi v0, v12, 0
688+
; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
689+
; RV32-NEXT: vslidedown.vx v9, v12, a3
690+
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
691+
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
692+
; RV32-NEXT: vmsne.vi v0, v9, 0
693+
; RV32-NEXT: add a2, a3, a3
694+
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
695+
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
696+
; RV32-NEXT: vslideup.vx v10, v8, a3
697+
; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
698+
; RV32-NEXT: vmsne.vi v0, v10, 0
699+
; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
700+
; RV32-NEXT: vle32.v v10, (a0), v0.t
701+
; RV32-NEXT: li a0, 32
702+
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
703+
; RV32-NEXT: vnsrl.wx v9, v10, a0
704+
; RV32-NEXT: vnsrl.wi v8, v10, 0
705+
; RV32-NEXT: ret
706+
;
707+
; RV64-LABEL: not_same_mask:
708+
; RV64: # %bb.0:
709+
; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
710+
; RV64-NEXT: vmv1r.v v9, v0
711+
; RV64-NEXT: vmv1r.v v0, v8
712+
; RV64-NEXT: vmv.v.i v8, 0
713+
; RV64-NEXT: li a2, -1
714+
; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
715+
; RV64-NEXT: vmv.v.i v10, 0
716+
; RV64-NEXT: csrr a3, vlenb
717+
; RV64-NEXT: slli a1, a1, 32
718+
; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
719+
; RV64-NEXT: vmerge.vim v11, v8, 1, v0
720+
; RV64-NEXT: vmv1r.v v0, v9
721+
; RV64-NEXT: vmerge.vim v9, v8, 1, v0
722+
; RV64-NEXT: srli a3, a3, 2
723+
; RV64-NEXT: vwaddu.vv v12, v9, v11
724+
; RV64-NEXT: vwmaccu.vx v12, a2, v11
725+
; RV64-NEXT: vmsne.vi v0, v12, 0
726+
; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
727+
; RV64-NEXT: vslidedown.vx v9, v12, a3
728+
; RV64-NEXT: vmerge.vim v10, v10, 1, v0
729+
; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
730+
; RV64-NEXT: vmsne.vi v0, v9, 0
731+
; RV64-NEXT: add a2, a3, a3
732+
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
733+
; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
734+
; RV64-NEXT: vslideup.vx v10, v8, a3
735+
; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
736+
; RV64-NEXT: vmsne.vi v0, v10, 0
737+
; RV64-NEXT: srli a1, a1, 32
738+
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
739+
; RV64-NEXT: vle32.v v10, (a0), v0.t
740+
; RV64-NEXT: li a0, 32
741+
; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
742+
; RV64-NEXT: vnsrl.wx v9, v10, a0
743+
; RV64-NEXT: vnsrl.wi v8, v10, 0
744+
; RV64-NEXT: ret
745+
%interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask0, <vscale x 2 x i1> %mask1)
746+
%wide.masked.load = tail call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %interleaved.mask, i32 %rvl)
747+
%deinterleaved.results = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 4 x i32> %wide.masked.load)
748+
%t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
749+
%t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
750+
%res0 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, <vscale x 2 x i32> %t0, 0
751+
%res1 = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %res0, <vscale x 2 x i32> %t1, 1
752+
ret { <vscale x 2 x i32>, <vscale x 2 x i32> } %res1
753+
}
754+
669755
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
670756
; CHECK: {{.*}}

0 commit comments

Comments
 (0)