Skip to content

Commit 16915c9

Browse files
committed
fixup! Split out the folding rule on interleave2 of two const splats
1 parent 5cd5be4 commit 16915c9

File tree

2 files changed

+0
-85
lines changed

2 files changed

+0
-85
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -22665,47 +22665,6 @@ bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
2266522665
return true;
2266622666
}
2266722667

22668-
/// If we're interleaving 2 constant splats, for instance `<vscale x 8 x i32>
22669-
/// <splat of 666>` and `<vscale x 8 x i32> <splat of 777>`, we can create a
22670-
/// larger splat
22671-
/// `<vscale x 4 x i64> <splat of ((777 << 32) | 666)>` first before casting it
22672-
/// into
22673-
/// `<vscale x 8 x i32>`. This will resuling a simple unit stride store rather
22674-
/// than a segment store, which is more expensive in this case.
22675-
static Value *foldInterleaved2OfConstSplats(Value *Op0, Value *Op1,
22676-
VectorType *VTy,
22677-
const TargetLowering *TLI,
22678-
Instruction *VPStore) {
22679-
auto *SplatVal0 = dyn_cast_or_null<ConstantInt>(getSplatValue(Op0));
22680-
auto *SplatVal1 = dyn_cast_or_null<ConstantInt>(getSplatValue(Op1));
22681-
if (!SplatVal0 || !SplatVal1)
22682-
return nullptr;
22683-
22684-
auto &Ctx = VPStore->getContext();
22685-
auto &DL = VPStore->getModule()->getDataLayout();
22686-
22687-
auto *NewVTy = VectorType::getExtendedElementVectorType(VTy);
22688-
if (!TLI->isTypeLegal(TLI->getValueType(DL, NewVTy)))
22689-
return nullptr;
22690-
22691-
// InterleavedAccessPass will remove VPStore after this but we still want to
22692-
// preserve it, hence clone another one here.
22693-
auto *ClonedVPStore = VPStore->clone();
22694-
ClonedVPStore->insertBefore(VPStore);
22695-
IRBuilder<> Builder(ClonedVPStore);
22696-
22697-
Type *ETy = VTy->getElementType();
22698-
unsigned Width = ETy->getIntegerBitWidth();
22699-
22700-
APInt NewSplatVal(Width * 2, SplatVal1->getZExtValue());
22701-
NewSplatVal <<= Width;
22702-
NewSplatVal |= SplatVal0->getZExtValue();
22703-
auto *NewSplat = ConstantVector::getSplat(NewVTy->getElementCount(),
22704-
ConstantInt::get(Ctx, NewSplatVal));
22705-
return Builder.CreateBitCast(NewSplat,
22706-
VectorType::getDoubleElementsVectorType(VTy));
22707-
}
22708-
2270922668
/// Lower an interleaved vp.store into a vssegN intrinsic.
2271022669
///
2271122670
/// E.g. Lower an interleaved vp.store (Factor = 2):
@@ -22748,14 +22707,6 @@ bool RISCVTargetLowering::lowerInterleavedIntrinsicToVPStore(
2274822707
Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL))
2274922708
return false;
2275022709

22751-
if (Factor == 2)
22752-
if (Value *BC = foldInterleaved2OfConstSplats(
22753-
InterleaveOperands[0], InterleaveOperands[1], VTy, this, Store)) {
22754-
// Store is guranteed to be the only user of the interleaved intrinsic.
22755-
Store->getOperand(0)->replaceAllUsesWith(BC);
22756-
return true;
22757-
}
22758-
2275922710
IRBuilder<> Builder(Store);
2276022711
Value *WideEVL = Store->getArgOperand(3);
2276122712
// Conservatively check if EVL is a multiple of factor, otherwise some

llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -137,42 +137,6 @@ define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
137137
ret void
138138
}
139139

140-
; Expecting unit-stride store here rather than segmented store.
141-
define void @store_factor2_const_splat(ptr %dst) {
142-
; RV32-LABEL: store_factor2_const_splat:
143-
; RV32: # %bb.0:
144-
; RV32-NEXT: addi sp, sp, -16
145-
; RV32-NEXT: .cfi_def_cfa_offset 16
146-
; RV32-NEXT: li a1, 777
147-
; RV32-NEXT: li a2, 666
148-
; RV32-NEXT: sw a2, 8(sp)
149-
; RV32-NEXT: sw a1, 12(sp)
150-
; RV32-NEXT: addi a1, sp, 8
151-
; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
152-
; RV32-NEXT: vlse64.v v8, (a1), zero
153-
; RV32-NEXT: li a1, 88
154-
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
155-
; RV32-NEXT: vse32.v v8, (a0)
156-
; RV32-NEXT: addi sp, sp, 16
157-
; RV32-NEXT: .cfi_def_cfa_offset 0
158-
; RV32-NEXT: ret
159-
;
160-
; RV64-LABEL: store_factor2_const_splat:
161-
; RV64: # %bb.0:
162-
; RV64-NEXT: li a1, 777
163-
; RV64-NEXT: slli a1, a1, 32
164-
; RV64-NEXT: addi a1, a1, 666
165-
; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
166-
; RV64-NEXT: vmv.v.x v8, a1
167-
; RV64-NEXT: li a1, 88
168-
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
169-
; RV64-NEXT: vse32.v v8, (a0)
170-
; RV64-NEXT: ret
171-
%interleave2 = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> splat (i32 666), <vscale x 8 x i32> splat (i32 777))
172-
call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %interleave2, ptr %dst, <vscale x 16 x i1> splat (i1 true), i32 88)
173-
ret void
174-
}
175-
176140
define void @store_factor4_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, ptr %ptr, i32 %evl) {
177141
; RV32-LABEL: store_factor4_v2:
178142
; RV32: # %bb.0:

0 commit comments

Comments
 (0)