Skip to content

Commit 53fe83e

Browse files
committed
[IA][RISCV] Detecting gap mask from masks assembled by interleaveN intrinsics
1 parent 8cdab07 commit 53fe83e

File tree

2 files changed

+75
-39
lines changed

2 files changed

+75
-39
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,26 @@ static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
596596

597597
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
598598
if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID());
599-
F && F == Factor && llvm::all_equal(IMI->args())) {
599+
F && F == Factor) {
600+
Value *RefArg = nullptr;
601+
// Check if all the intrinsic arguments are the same, except those that
602+
// are zeros, which we mark them as gaps in the gap mask.
603+
for (auto [Idx, Arg] : enumerate(IMI->args())) {
604+
if (auto *C = dyn_cast<Constant>(Arg); C && C->isZeroValue()) {
605+
GapMask.clearBit(Idx);
606+
continue;
607+
}
608+
609+
if (!RefArg)
610+
RefArg = Arg;
611+
612+
if (RefArg != Arg)
613+
return {nullptr, GapMask};
614+
}
615+
616+
// In a very rare occasion, all the intrinsic arguments might be zeros,
617+
// in which case we still want to return an all-zeros constant instead of
618+
// nullptr, so we're not using RefArg here.
600619
return {IMI->getArgOperand(0), GapMask};
601620
}
602621
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 55 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,23 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %pt
205205
ret {<4 x i32>, <4 x i32>} %res1
206206
}
207207

208+
define {<2 x i32>, <2 x i32>} @vpload_factor4_interleaved_mask_intrinsic_skip_fields(ptr %ptr, <2 x i1> %m) {
209+
; mask = %m, skip the last two fields.
210+
; CHECK-LABEL: vpload_factor4_interleaved_mask_intrinsic_skip_fields:
211+
; CHECK: # %bb.0:
212+
; CHECK-NEXT: li a1, 16
213+
; CHECK-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
214+
; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t
215+
; CHECK-NEXT: ret
216+
%interleaved.mask = call <8 x i1> @llvm.vector.interleave4(<2 x i1> %m, <2 x i1> %m, <2 x i1> splat (i1 false), <2 x i1> splat (i1 false))
217+
%interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
218+
%v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 0, i32 4>
219+
%v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 1, i32 5>
220+
%res0 = insertvalue {<2 x i32>, <2 x i32>} undef, <2 x i32> %v0, 0
221+
%res1 = insertvalue {<2 x i32>, <2 x i32>} %res0, <2 x i32> %v1, 1
222+
ret {<2 x i32>, <2 x i32>} %res1
223+
}
224+
208225
define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle(ptr %ptr, <4 x i1> %m) {
209226
; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle:
210227
; CHECK: # %bb.0:
@@ -514,8 +531,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
514531
; RV32-NEXT: li a2, 32
515532
; RV32-NEXT: lui a3, 12
516533
; RV32-NEXT: lui a6, 12291
517-
; RV32-NEXT: lui a7, %hi(.LCPI25_0)
518-
; RV32-NEXT: addi a7, a7, %lo(.LCPI25_0)
534+
; RV32-NEXT: lui a7, %hi(.LCPI26_0)
535+
; RV32-NEXT: addi a7, a7, %lo(.LCPI26_0)
519536
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
520537
; RV32-NEXT: vle32.v v24, (a5)
521538
; RV32-NEXT: vmv.s.x v0, a3
@@ -600,12 +617,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
600617
; RV32-NEXT: addi a1, a1, 16
601618
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
602619
; RV32-NEXT: lui a7, 49164
603-
; RV32-NEXT: lui a1, %hi(.LCPI25_1)
604-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_1)
620+
; RV32-NEXT: lui a1, %hi(.LCPI26_1)
621+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_1)
605622
; RV32-NEXT: lui t2, 3
606623
; RV32-NEXT: lui t1, 196656
607-
; RV32-NEXT: lui a4, %hi(.LCPI25_3)
608-
; RV32-NEXT: addi a4, a4, %lo(.LCPI25_3)
624+
; RV32-NEXT: lui a4, %hi(.LCPI26_3)
625+
; RV32-NEXT: addi a4, a4, %lo(.LCPI26_3)
609626
; RV32-NEXT: lui t0, 786624
610627
; RV32-NEXT: li a5, 48
611628
; RV32-NEXT: lui a6, 768
@@ -784,8 +801,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
784801
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
785802
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
786803
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
787-
; RV32-NEXT: lui a1, %hi(.LCPI25_2)
788-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_2)
804+
; RV32-NEXT: lui a1, %hi(.LCPI26_2)
805+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_2)
789806
; RV32-NEXT: lui a3, 3073
790807
; RV32-NEXT: addi a3, a3, -1024
791808
; RV32-NEXT: vmv.s.x v0, a3
@@ -849,16 +866,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
849866
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
850867
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
851868
; RV32-NEXT: vmv.v.v v28, v24
852-
; RV32-NEXT: lui a1, %hi(.LCPI25_4)
853-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_4)
854-
; RV32-NEXT: lui a2, %hi(.LCPI25_5)
855-
; RV32-NEXT: addi a2, a2, %lo(.LCPI25_5)
869+
; RV32-NEXT: lui a1, %hi(.LCPI26_4)
870+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_4)
871+
; RV32-NEXT: lui a2, %hi(.LCPI26_5)
872+
; RV32-NEXT: addi a2, a2, %lo(.LCPI26_5)
856873
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
857874
; RV32-NEXT: vle16.v v24, (a2)
858875
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
859876
; RV32-NEXT: vle16.v v8, (a1)
860-
; RV32-NEXT: lui a1, %hi(.LCPI25_7)
861-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_7)
877+
; RV32-NEXT: lui a1, %hi(.LCPI26_7)
878+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_7)
862879
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
863880
; RV32-NEXT: vle16.v v10, (a1)
864881
; RV32-NEXT: csrr a1, vlenb
@@ -886,14 +903,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
886903
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
887904
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
888905
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
889-
; RV32-NEXT: lui a1, %hi(.LCPI25_6)
890-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_6)
891-
; RV32-NEXT: lui a2, %hi(.LCPI25_8)
892-
; RV32-NEXT: addi a2, a2, %lo(.LCPI25_8)
906+
; RV32-NEXT: lui a1, %hi(.LCPI26_6)
907+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_6)
908+
; RV32-NEXT: lui a2, %hi(.LCPI26_8)
909+
; RV32-NEXT: addi a2, a2, %lo(.LCPI26_8)
893910
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
894911
; RV32-NEXT: vle16.v v4, (a1)
895-
; RV32-NEXT: lui a1, %hi(.LCPI25_9)
896-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_9)
912+
; RV32-NEXT: lui a1, %hi(.LCPI26_9)
913+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_9)
897914
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
898915
; RV32-NEXT: vle16.v v6, (a1)
899916
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -980,8 +997,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
980997
; RV64-NEXT: li a4, 128
981998
; RV64-NEXT: lui a1, 1
982999
; RV64-NEXT: vle64.v v8, (a3)
983-
; RV64-NEXT: lui a3, %hi(.LCPI25_0)
984-
; RV64-NEXT: addi a3, a3, %lo(.LCPI25_0)
1000+
; RV64-NEXT: lui a3, %hi(.LCPI26_0)
1001+
; RV64-NEXT: addi a3, a3, %lo(.LCPI26_0)
9851002
; RV64-NEXT: vmv.s.x v0, a4
9861003
; RV64-NEXT: csrr a4, vlenb
9871004
; RV64-NEXT: li a5, 61
@@ -1169,8 +1186,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11691186
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
11701187
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11711188
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1172-
; RV64-NEXT: lui a2, %hi(.LCPI25_1)
1173-
; RV64-NEXT: addi a2, a2, %lo(.LCPI25_1)
1189+
; RV64-NEXT: lui a2, %hi(.LCPI26_1)
1190+
; RV64-NEXT: addi a2, a2, %lo(.LCPI26_1)
11741191
; RV64-NEXT: li a3, 192
11751192
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
11761193
; RV64-NEXT: vle16.v v6, (a2)
@@ -1204,8 +1221,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12041221
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
12051222
; RV64-NEXT: addi a2, sp, 16
12061223
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1207-
; RV64-NEXT: lui a2, %hi(.LCPI25_2)
1208-
; RV64-NEXT: addi a2, a2, %lo(.LCPI25_2)
1224+
; RV64-NEXT: lui a2, %hi(.LCPI26_2)
1225+
; RV64-NEXT: addi a2, a2, %lo(.LCPI26_2)
12091226
; RV64-NEXT: li a3, 1040
12101227
; RV64-NEXT: vmv.s.x v0, a3
12111228
; RV64-NEXT: addi a1, a1, -2016
@@ -1289,12 +1306,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12891306
; RV64-NEXT: add a1, sp, a1
12901307
; RV64-NEXT: addi a1, a1, 16
12911308
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1292-
; RV64-NEXT: lui a1, %hi(.LCPI25_3)
1293-
; RV64-NEXT: addi a1, a1, %lo(.LCPI25_3)
1309+
; RV64-NEXT: lui a1, %hi(.LCPI26_3)
1310+
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_3)
12941311
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
12951312
; RV64-NEXT: vle16.v v20, (a1)
1296-
; RV64-NEXT: lui a1, %hi(.LCPI25_4)
1297-
; RV64-NEXT: addi a1, a1, %lo(.LCPI25_4)
1313+
; RV64-NEXT: lui a1, %hi(.LCPI26_4)
1314+
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_4)
12981315
; RV64-NEXT: vle16.v v8, (a1)
12991316
; RV64-NEXT: csrr a1, vlenb
13001317
; RV64-NEXT: li a2, 77
@@ -1345,8 +1362,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
13451362
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
13461363
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
13471364
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1348-
; RV64-NEXT: lui a1, %hi(.LCPI25_5)
1349-
; RV64-NEXT: addi a1, a1, %lo(.LCPI25_5)
1365+
; RV64-NEXT: lui a1, %hi(.LCPI26_5)
1366+
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_5)
13501367
; RV64-NEXT: vle16.v v20, (a1)
13511368
; RV64-NEXT: csrr a1, vlenb
13521369
; RV64-NEXT: li a2, 61
@@ -1963,8 +1980,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
19631980
; RV32-NEXT: vle32.v v12, (a0), v0.t
19641981
; RV32-NEXT: li a0, 36
19651982
; RV32-NEXT: vmv.s.x v20, a1
1966-
; RV32-NEXT: lui a1, %hi(.LCPI61_0)
1967-
; RV32-NEXT: addi a1, a1, %lo(.LCPI61_0)
1983+
; RV32-NEXT: lui a1, %hi(.LCPI62_0)
1984+
; RV32-NEXT: addi a1, a1, %lo(.LCPI62_0)
19681985
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
19691986
; RV32-NEXT: vle16.v v21, (a1)
19701987
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -2039,8 +2056,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
20392056
; RV32-NEXT: vmv.s.x v10, a0
20402057
; RV32-NEXT: li a0, 146
20412058
; RV32-NEXT: vmv.s.x v11, a0
2042-
; RV32-NEXT: lui a0, %hi(.LCPI62_0)
2043-
; RV32-NEXT: addi a0, a0, %lo(.LCPI62_0)
2059+
; RV32-NEXT: lui a0, %hi(.LCPI63_0)
2060+
; RV32-NEXT: addi a0, a0, %lo(.LCPI63_0)
20442061
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
20452062
; RV32-NEXT: vle16.v v20, (a0)
20462063
; RV32-NEXT: li a0, 36
@@ -2198,8 +2215,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_invalid_skip_field(
21982215
; RV32-NEXT: vle32.v v12, (a0), v0.t
21992216
; RV32-NEXT: li a0, 36
22002217
; RV32-NEXT: vmv.s.x v20, a1
2201-
; RV32-NEXT: lui a1, %hi(.LCPI68_0)
2202-
; RV32-NEXT: addi a1, a1, %lo(.LCPI68_0)
2218+
; RV32-NEXT: lui a1, %hi(.LCPI69_0)
2219+
; RV32-NEXT: addi a1, a1, %lo(.LCPI69_0)
22032220
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
22042221
; RV32-NEXT: vle16.v v21, (a1)
22052222
; RV32-NEXT: vcompress.vm v8, v12, v11

0 commit comments

Comments
 (0)