Skip to content

Commit e0dc48b

Browse files
preamesmahesh-attarde
authored andcommitted
[RISCV][IA] Support masked.store of deinterleaveN intrinsic (llvm#149893)
This is the masked.store side to the masked.load support added in 881b3fd. With this change, we support masked.load and masked.store via the intrinsic lowering path used primarily with scalable vectors. An upcoming change will extend the fixed vector (i.a. shuffle vector) paths in the same manner.
1 parent 2e07238 commit e0dc48b

File tree

3 files changed

+52
-8
lines changed

3 files changed

+52
-8
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -661,28 +661,38 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
661661
Instruction *StoredBy = dyn_cast<Instruction>(IntII->user_back());
662662
if (!StoredBy)
663663
return false;
664-
if (!isa<StoreInst, VPIntrinsic>(StoredBy))
664+
auto *SI = dyn_cast<StoreInst>(StoredBy);
665+
auto *II = dyn_cast<IntrinsicInst>(StoredBy);
666+
if (!SI && !II)
665667
return false;
666668

667669
SmallVector<Value *, 8> InterleaveValues(IntII->args());
668670
const unsigned Factor = getInterleaveIntrinsicFactor(IntII->getIntrinsicID());
669671
assert(Factor && "unexpected interleave intrinsic");
670672

671673
Value *Mask = nullptr;
672-
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
673-
if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
674+
if (II) {
675+
// Check mask operand. Handle both all-true/false and interleaved mask.
676+
Value *WideMask;
677+
switch (II->getIntrinsicID()) {
678+
default:
674679
return false;
675-
676-
Value *WideMask = VPStore->getOperand(2);
680+
case Intrinsic::vp_store:
681+
WideMask = II->getOperand(2);
682+
break;
683+
case Intrinsic::masked_store:
684+
WideMask = II->getOperand(3);
685+
break;
686+
}
677687
Mask = getMask(WideMask, Factor,
678688
cast<VectorType>(InterleaveValues[0]->getType()));
679689
if (!Mask)
680690
return false;
681691

682-
LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
683-
<< *IntII << " and factor = " << Factor << "\n");
692+
LLVM_DEBUG(dbgs() << "IA: Found a vp.store or masked.store with interleave"
693+
<< " intrinsic " << *IntII << " and factor = "
694+
<< Factor << "\n");
684695
} else {
685-
auto *SI = cast<StoreInst>(StoredBy);
686696
if (!SI->isSimple())
687697
return false;
688698

llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,17 @@ static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
169169
: Constant::getAllOnesValue(XLenTy);
170170
return true;
171171
}
172+
case Intrinsic::masked_store: {
173+
Ptr = II->getOperand(1);
174+
Alignment = cast<ConstantInt>(II->getArgOperand(2))->getAlignValue();
175+
176+
assert(Mask && "masked.store needs a mask!");
177+
178+
VL = isa<FixedVectorType>(VTy)
179+
? Builder.CreateElementCount(XLenTy, VTy->getElementCount())
180+
: Constant::getAllOnesValue(XLenTy);
181+
return true;
182+
}
172183
}
173184
}
174185

llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,3 +303,26 @@ define void @vector_interleave_store_factor8(<vscale x 2 x i32> %a, <vscale x 2
303303
store <vscale x 16 x i32> %v, ptr %p
304304
ret void
305305
}
306+
307+
define void @masked_store_factor3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, ptr %p) {
308+
; CHECK-LABEL: masked_store_factor3:
309+
; CHECK: # %bb.0:
310+
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
311+
; CHECK-NEXT: vsseg3e32.v v8, (a0)
312+
; CHECK-NEXT: ret
313+
%v = call <vscale x 6 x i32> @llvm.vector.interleave3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c)
314+
call void @llvm.masked.store(<vscale x 6 x i32> %v, ptr %p, i32 4, <vscale x 6 x i1> splat (i1 true))
315+
ret void
316+
}
317+
318+
define void @masked_store_factor3_masked(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, ptr %p, <vscale x 2 x i1> %m) {
319+
; CHECK-LABEL: masked_store_factor3_masked:
320+
; CHECK: # %bb.0:
321+
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
322+
; CHECK-NEXT: vsseg3e32.v v8, (a0), v0.t
323+
; CHECK-NEXT: ret
324+
%interleaved.mask = call <vscale x 6 x i1> @llvm.vector.interleave3(<vscale x 2 x i1> %m, <vscale x 2 x i1> %m, <vscale x 2 x i1> %m)
325+
%v = call <vscale x 6 x i32> @llvm.vector.interleave3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c)
326+
call void @llvm.masked.store(<vscale x 6 x i32> %v, ptr %p, i32 4, <vscale x 6 x i1> %interleaved.mask)
327+
ret void
328+
}

0 commit comments

Comments
 (0)