Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,8 +596,26 @@ static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,

if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID());
F && F == Factor && llvm::all_equal(IMI->args())) {
return {IMI->getArgOperand(0), GapMask};
F && F == Factor) {
Value *RefArg = nullptr;
// Check if all the intrinsic arguments are the same, except those that
// are zeros, which we mark as gaps in the gap mask.
for (auto [Idx, Arg] : enumerate(IMI->args())) {
if (auto *C = dyn_cast<Constant>(Arg); C && C->isZeroValue()) {
GapMask.clearBit(Idx);
continue;
}

if (!RefArg)
RefArg = Arg;
else if (RefArg != Arg)
return {nullptr, GapMask};
}

// In a very rare occasion, all the intrinsic arguments might be zeros,
// in which case we still want to return an all-zeros constant instead of
// nullptr.
return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
}
}

Expand Down
93 changes: 55 additions & 38 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,23 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %pt
ret {<4 x i32>, <4 x i32>} %res1
}

; mask = %m, skip the last two fields.
define {<2 x i32>, <2 x i32>} @vpload_factor4_interleaved_mask_intrinsic_skip_fields(ptr %ptr, <2 x i1> %m) {
; CHECK-LABEL: vpload_factor4_interleaved_mask_intrinsic_skip_fields:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t
; CHECK-NEXT: ret
%interleaved.mask = call <8 x i1> @llvm.vector.interleave4(<2 x i1> %m, <2 x i1> %m, <2 x i1> splat (i1 false), <2 x i1> splat (i1 false))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a test with zero as the first operand.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had thought about it, but right now RISCV does not support non-trailing gap, so the transformation wouldn't kick in (and thus the emitted code will look the same) regardless of whether we return the correct value like now or incorrect one like I did before.

%interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
%v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 0, i32 4>
%v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 1, i32 5>
%res0 = insertvalue {<2 x i32>, <2 x i32>} undef, <2 x i32> %v0, 0
%res1 = insertvalue {<2 x i32>, <2 x i32>} %res0, <2 x i32> %v1, 1
ret {<2 x i32>, <2 x i32>} %res1
}

define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle(ptr %ptr, <4 x i1> %m) {
; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle:
; CHECK: # %bb.0:
Expand Down Expand Up @@ -532,8 +549,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: li a2, 32
; RV32-NEXT: lui a3, 12
; RV32-NEXT: lui a6, 12291
; RV32-NEXT: lui a7, %hi(.LCPI26_0)
; RV32-NEXT: addi a7, a7, %lo(.LCPI26_0)
; RV32-NEXT: lui a7, %hi(.LCPI27_0)
; RV32-NEXT: addi a7, a7, %lo(.LCPI27_0)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vle32.v v24, (a5)
; RV32-NEXT: vmv.s.x v0, a3
Expand Down Expand Up @@ -618,12 +635,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
; RV32-NEXT: lui a7, 49164
; RV32-NEXT: lui a1, %hi(.LCPI26_1)
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_1)
; RV32-NEXT: lui a1, %hi(.LCPI27_1)
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_1)
; RV32-NEXT: lui t2, 3
; RV32-NEXT: lui t1, 196656
; RV32-NEXT: lui a4, %hi(.LCPI26_3)
; RV32-NEXT: addi a4, a4, %lo(.LCPI26_3)
; RV32-NEXT: lui a4, %hi(.LCPI27_3)
; RV32-NEXT: addi a4, a4, %lo(.LCPI27_3)
; RV32-NEXT: lui t0, 786624
; RV32-NEXT: li a5, 48
; RV32-NEXT: lui a6, 768
Expand Down Expand Up @@ -802,8 +819,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
; RV32-NEXT: lui a1, %hi(.LCPI26_2)
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_2)
; RV32-NEXT: lui a1, %hi(.LCPI27_2)
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_2)
; RV32-NEXT: lui a3, 3073
; RV32-NEXT: addi a3, a3, -1024
; RV32-NEXT: vmv.s.x v0, a3
Expand Down Expand Up @@ -867,16 +884,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v28, v24
; RV32-NEXT: lui a1, %hi(.LCPI26_4)
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_4)
; RV32-NEXT: lui a2, %hi(.LCPI26_5)
; RV32-NEXT: addi a2, a2, %lo(.LCPI26_5)
; RV32-NEXT: lui a1, %hi(.LCPI27_4)
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_4)
; RV32-NEXT: lui a2, %hi(.LCPI27_5)
; RV32-NEXT: addi a2, a2, %lo(.LCPI27_5)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-NEXT: vle16.v v24, (a2)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: lui a1, %hi(.LCPI26_7)
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_7)
; RV32-NEXT: lui a1, %hi(.LCPI27_7)
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_7)
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vle16.v v10, (a1)
; RV32-NEXT: csrr a1, vlenb
Expand Down Expand Up @@ -904,14 +921,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
; RV32-NEXT: lui a1, %hi(.LCPI26_6)
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_6)
; RV32-NEXT: lui a2, %hi(.LCPI26_8)
; RV32-NEXT: addi a2, a2, %lo(.LCPI26_8)
; RV32-NEXT: lui a1, %hi(.LCPI27_6)
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_6)
; RV32-NEXT: lui a2, %hi(.LCPI27_8)
; RV32-NEXT: addi a2, a2, %lo(.LCPI27_8)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v4, (a1)
; RV32-NEXT: lui a1, %hi(.LCPI26_9)
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_9)
; RV32-NEXT: lui a1, %hi(.LCPI27_9)
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_9)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-NEXT: vle16.v v6, (a1)
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
Expand Down Expand Up @@ -998,8 +1015,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: li a4, 128
; RV64-NEXT: lui a1, 1
; RV64-NEXT: vle64.v v8, (a3)
; RV64-NEXT: lui a3, %hi(.LCPI26_0)
; RV64-NEXT: addi a3, a3, %lo(.LCPI26_0)
; RV64-NEXT: lui a3, %hi(.LCPI27_0)
; RV64-NEXT: addi a3, a3, %lo(.LCPI27_0)
; RV64-NEXT: vmv.s.x v0, a4
; RV64-NEXT: csrr a4, vlenb
; RV64-NEXT: li a5, 61
Expand Down Expand Up @@ -1187,8 +1204,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
; RV64-NEXT: lui a2, %hi(.LCPI26_1)
; RV64-NEXT: addi a2, a2, %lo(.LCPI26_1)
; RV64-NEXT: lui a2, %hi(.LCPI27_1)
; RV64-NEXT: addi a2, a2, %lo(.LCPI27_1)
; RV64-NEXT: li a3, 192
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vle16.v v6, (a2)
Expand Down Expand Up @@ -1222,8 +1239,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
; RV64-NEXT: lui a2, %hi(.LCPI26_2)
; RV64-NEXT: addi a2, a2, %lo(.LCPI26_2)
; RV64-NEXT: lui a2, %hi(.LCPI27_2)
; RV64-NEXT: addi a2, a2, %lo(.LCPI27_2)
; RV64-NEXT: li a3, 1040
; RV64-NEXT: vmv.s.x v0, a3
; RV64-NEXT: addi a1, a1, -2016
Expand Down Expand Up @@ -1307,12 +1324,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
; RV64-NEXT: lui a1, %hi(.LCPI26_3)
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_3)
; RV64-NEXT: lui a1, %hi(.LCPI27_3)
; RV64-NEXT: addi a1, a1, %lo(.LCPI27_3)
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vle16.v v20, (a1)
; RV64-NEXT: lui a1, %hi(.LCPI26_4)
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_4)
; RV64-NEXT: lui a1, %hi(.LCPI27_4)
; RV64-NEXT: addi a1, a1, %lo(.LCPI27_4)
; RV64-NEXT: vle16.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 77
Expand Down Expand Up @@ -1363,8 +1380,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
; RV64-NEXT: lui a1, %hi(.LCPI26_5)
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_5)
; RV64-NEXT: lui a1, %hi(.LCPI27_5)
; RV64-NEXT: addi a1, a1, %lo(.LCPI27_5)
; RV64-NEXT: vle16.v v20, (a1)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 61
Expand Down Expand Up @@ -1981,8 +1998,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
; RV32-NEXT: lui a1, %hi(.LCPI62_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI62_0)
; RV32-NEXT: lui a1, %hi(.LCPI63_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI63_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
Expand Down Expand Up @@ -2057,8 +2074,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
; RV32-NEXT: lui a0, %hi(.LCPI63_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI63_0)
; RV32-NEXT: lui a0, %hi(.LCPI64_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI64_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
Expand Down Expand Up @@ -2277,8 +2294,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_invalid_skip_field(
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
; RV32-NEXT: lui a1, %hi(.LCPI72_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI72_0)
; RV32-NEXT: lui a1, %hi(.LCPI73_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI73_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
Expand Down
Loading