Skip to content

Commit f7242df

Browse files
committed
fixup! Address some of the review comments
1 parent 6b7f8e9 commit f7242df

File tree

3 files changed

+16
-27
lines changed

3 files changed

+16
-27
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -630,13 +630,8 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
630630
return true;
631631
}
632632

633-
/// Check the interleaved mask
634-
///
635-
/// - if a value within the optional is non-nullptr, the value corresponds to
636-
/// deinterleaved mask
637-
/// - if a value within the option is nullptr, the value corresponds to all-true
638-
/// mask
639-
/// - return nullopt if mask cannot be deinterleaved
633+
// Return nullptr if the value corresponds to a all-true mask. Otherwise,
634+
// return the value that is corresponded to a deinterleaved mask.
640635
static Value *getMask(Value *WideMask, unsigned Factor) {
641636
using namespace llvm::PatternMatch;
642637
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22607,8 +22607,7 @@ bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
2260722607
Factor);
2260822608

2260922609
Value *PoisonVal = PoisonValue::get(VecTupTy);
22610-
SmallVector<Value *> Operands;
22611-
Operands.append({PoisonVal, Load->getArgOperand(0)});
22610+
SmallVector<Value *> Operands{PoisonVal, Load->getArgOperand(0)};
2261222611

2261322612
Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
2261422613
Load->getModule(), IntrMaskIds[Factor - 2],
@@ -22618,8 +22617,8 @@ bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
2261822617

2261922618
Operands.push_back(EVL);
2262022619

22621-
// Tail-policy
22622-
Operands.push_back(ConstantInt::get(XLenTy, RISCVII::TAIL_AGNOSTIC));
22620+
Operands.push_back(ConstantInt::get(XLenTy, RISCVII::TAIL_AGNOSTIC |
22621+
RISCVII::MASK_AGNOSTIC));
2262322622

2262422623
Operands.push_back(ConstantInt::get(XLenTy, Log2_64(SEW)));
2262522624

llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %
1616
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1717
; RV64-NEXT: vlseg2e32.v v8, (a0)
1818
; RV64-NEXT: ret
19-
%wide.masked.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 %rvl)
19+
%wide.masked.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %rvl)
2020
%deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %wide.masked.load)
2121
%t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
2222
%t1 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
@@ -39,7 +39,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
3939
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
4040
; RV64-NEXT: vlseg4e32.v v8, (a0)
4141
; RV64-NEXT: ret
42-
%wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %rvl)
42+
%wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
4343
%d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
4444
%d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
4545
%d0.1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 1
@@ -71,7 +71,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
7171
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
7272
; RV64-NEXT: vlseg8e32.v v8, (a0)
7373
; RV64-NEXT: ret
74-
%wide.masked.load = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i32 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 %rvl)
74+
%wide.masked.load = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %rvl)
7575
%d0 = call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %wide.masked.load)
7676
%d0.0 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %d0, 0
7777
%d0.1 = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %d0, 1
@@ -121,7 +121,7 @@ define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
121121
; RV64-NEXT: vsseg2e32.v v8, (a0)
122122
; RV64-NEXT: ret
123123
%interleaved.vec = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1)
124-
call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %interleaved.vec, ptr %ptr, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 %rvl)
124+
call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %interleaved.vec, ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %rvl)
125125
ret void
126126
}
127127

@@ -156,13 +156,8 @@ define void @store_factor2_const_splat(ptr %dst) {
156156
; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
157157
; RV64-NEXT: vse32.v v8, (a0)
158158
; RV64-NEXT: ret
159-
%interleave2 = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(
160-
<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 666, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer),
161-
<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 777, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
162-
)
163-
call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %interleave2, ptr %dst,
164-
<vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 1, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer),
165-
i32 87)
159+
%interleave2 = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> splat (i32 666), <vscale x 8 x i32> splat (i32 777))
160+
call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %interleave2, ptr %dst, <vscale x 16 x i1> splat (i1 true), i32 87)
166161
ret void
167162
}
168163

@@ -187,7 +182,7 @@ define void @store_factor4_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
187182
%interleaved.vec0 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
188183
%interleaved.vec1 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v1, <vscale x 1 x i32> %v1)
189184
%interleaved.vec2 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %interleaved.vec0, <vscale x 2 x i32> %interleaved.vec1)
190-
call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %interleaved.vec2, ptr %ptr, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 %rvl)
185+
call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %interleaved.vec2, ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %rvl)
191186
ret void
192187
}
193188

@@ -224,7 +219,7 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
224219
%interleaved.vec4 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v1, <vscale x 1 x i32> %v1)
225220
%interleaved.vec5 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %interleaved.vec3, <vscale x 2 x i32> %interleaved.vec4)
226221
%interleaved.vec6 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %interleaved.vec2, <vscale x 4 x i32> %interleaved.vec5)
227-
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %interleaved.vec6, ptr %ptr, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %rvl)
222+
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %interleaved.vec6, ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
228223
ret void
229224
}
230225

@@ -489,7 +484,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
489484
; RV64-NEXT: vmv.v.v v10, v9
490485
; RV64-NEXT: vmv.v.v v11, v9
491486
; RV64-NEXT: ret
492-
%wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %rvl)
487+
%wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
493488
%d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
494489
%d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
495490
%d0.1 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
@@ -542,7 +537,7 @@ define {<vscale x 4 x i32>, <vscale x 2 x i32>, <vscale x 1 x i32>, <vscale x 1
542537
; RV64-NEXT: vnsrl.wx v12, v11, a0
543538
; RV64-NEXT: vnsrl.wi v11, v11, 0
544539
; RV64-NEXT: ret
545-
%wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %rvl)
540+
%wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
546541
%d0 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
547542
%d0.0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 0
548543
%t0 = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %d0, 1
@@ -611,7 +606,7 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
611606
%interleaved.vec0 = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v0)
612607
%interleaved.vec1 = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 2 x i32> %interleaved.vec0, <vscale x 2 x i32> %v1)
613608
%interleaved.vec2 = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 4 x i32> %interleaved.vec1, <vscale x 4 x i32> %v2)
614-
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %interleaved.vec2, ptr %ptr, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i32 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 %rvl)
609+
call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %interleaved.vec2, ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
615610
ret void
616611
}
617612

0 commit comments

Comments
 (0)