Skip to content

Commit 0e9b6d6

Browse files
authored
[IA][RISCV] Detecting gap mask from a mask assembled by interleaveN intrinsics (#153510)
If the mask of a (fixed-vector) deinterleaved load is assembled by `vector.interleaveN` intrinsic, any intrinsic arguments that are all-zeros are regarded as gaps.
1 parent b0d2b57 commit 0e9b6d6

File tree

2 files changed

+75
-40
lines changed

2 files changed

+75
-40
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,8 +596,26 @@ static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
596596

597597
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
598598
if (unsigned F = getInterleaveIntrinsicFactor(IMI->getIntrinsicID());
599-
F && F == Factor && llvm::all_equal(IMI->args())) {
600-
return {IMI->getArgOperand(0), GapMask};
599+
F && F == Factor) {
600+
Value *RefArg = nullptr;
601+
// Check if all the intrinsic arguments are the same, except those that
602+
// are zeros, which we mark as gaps in the gap mask.
603+
for (auto [Idx, Arg] : enumerate(IMI->args())) {
604+
if (auto *C = dyn_cast<Constant>(Arg); C && C->isZeroValue()) {
605+
GapMask.clearBit(Idx);
606+
continue;
607+
}
608+
609+
if (!RefArg)
610+
RefArg = Arg;
611+
else if (RefArg != Arg)
612+
return {nullptr, GapMask};
613+
}
614+
615+
// In a very rare occasion, all the intrinsic arguments might be zeros,
616+
// in which case we still want to return an all-zeros constant instead of
617+
// nullptr.
618+
return {RefArg ? RefArg : IMI->getArgOperand(0), GapMask};
601619
}
602620
}
603621

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 55 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,23 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %pt
205205
ret {<4 x i32>, <4 x i32>} %res1
206206
}
207207

208+
; mask = %m, skip the last two fields.
209+
define {<2 x i32>, <2 x i32>} @vpload_factor4_interleaved_mask_intrinsic_skip_fields(ptr %ptr, <2 x i1> %m) {
210+
; CHECK-LABEL: vpload_factor4_interleaved_mask_intrinsic_skip_fields:
211+
; CHECK: # %bb.0:
212+
; CHECK-NEXT: li a1, 16
213+
; CHECK-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
214+
; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t
215+
; CHECK-NEXT: ret
216+
%interleaved.mask = call <8 x i1> @llvm.vector.interleave4(<2 x i1> %m, <2 x i1> %m, <2 x i1> splat (i1 false), <2 x i1> splat (i1 false))
217+
%interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
218+
%v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 0, i32 4>
219+
%v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 1, i32 5>
220+
%res0 = insertvalue {<2 x i32>, <2 x i32>} undef, <2 x i32> %v0, 0
221+
%res1 = insertvalue {<2 x i32>, <2 x i32>} %res0, <2 x i32> %v1, 1
222+
ret {<2 x i32>, <2 x i32>} %res1
223+
}
224+
208225
define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_shuffle(ptr %ptr, <4 x i1> %m) {
209226
; CHECK-LABEL: vpload_factor2_interleaved_mask_shuffle:
210227
; CHECK: # %bb.0:
@@ -532,8 +549,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
532549
; RV32-NEXT: li a2, 32
533550
; RV32-NEXT: lui a3, 12
534551
; RV32-NEXT: lui a6, 12291
535-
; RV32-NEXT: lui a7, %hi(.LCPI26_0)
536-
; RV32-NEXT: addi a7, a7, %lo(.LCPI26_0)
552+
; RV32-NEXT: lui a7, %hi(.LCPI27_0)
553+
; RV32-NEXT: addi a7, a7, %lo(.LCPI27_0)
537554
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
538555
; RV32-NEXT: vle32.v v24, (a5)
539556
; RV32-NEXT: vmv.s.x v0, a3
@@ -618,12 +635,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
618635
; RV32-NEXT: addi a1, a1, 16
619636
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
620637
; RV32-NEXT: lui a7, 49164
621-
; RV32-NEXT: lui a1, %hi(.LCPI26_1)
622-
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_1)
638+
; RV32-NEXT: lui a1, %hi(.LCPI27_1)
639+
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_1)
623640
; RV32-NEXT: lui t2, 3
624641
; RV32-NEXT: lui t1, 196656
625-
; RV32-NEXT: lui a4, %hi(.LCPI26_3)
626-
; RV32-NEXT: addi a4, a4, %lo(.LCPI26_3)
642+
; RV32-NEXT: lui a4, %hi(.LCPI27_3)
643+
; RV32-NEXT: addi a4, a4, %lo(.LCPI27_3)
627644
; RV32-NEXT: lui t0, 786624
628645
; RV32-NEXT: li a5, 48
629646
; RV32-NEXT: lui a6, 768
@@ -802,8 +819,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
802819
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
803820
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
804821
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
805-
; RV32-NEXT: lui a1, %hi(.LCPI26_2)
806-
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_2)
822+
; RV32-NEXT: lui a1, %hi(.LCPI27_2)
823+
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_2)
807824
; RV32-NEXT: lui a3, 3073
808825
; RV32-NEXT: addi a3, a3, -1024
809826
; RV32-NEXT: vmv.s.x v0, a3
@@ -867,16 +884,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
867884
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
868885
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
869886
; RV32-NEXT: vmv.v.v v28, v24
870-
; RV32-NEXT: lui a1, %hi(.LCPI26_4)
871-
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_4)
872-
; RV32-NEXT: lui a2, %hi(.LCPI26_5)
873-
; RV32-NEXT: addi a2, a2, %lo(.LCPI26_5)
887+
; RV32-NEXT: lui a1, %hi(.LCPI27_4)
888+
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_4)
889+
; RV32-NEXT: lui a2, %hi(.LCPI27_5)
890+
; RV32-NEXT: addi a2, a2, %lo(.LCPI27_5)
874891
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
875892
; RV32-NEXT: vle16.v v24, (a2)
876893
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
877894
; RV32-NEXT: vle16.v v8, (a1)
878-
; RV32-NEXT: lui a1, %hi(.LCPI26_7)
879-
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_7)
895+
; RV32-NEXT: lui a1, %hi(.LCPI27_7)
896+
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_7)
880897
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
881898
; RV32-NEXT: vle16.v v10, (a1)
882899
; RV32-NEXT: csrr a1, vlenb
@@ -904,14 +921,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
904921
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
905922
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
906923
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
907-
; RV32-NEXT: lui a1, %hi(.LCPI26_6)
908-
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_6)
909-
; RV32-NEXT: lui a2, %hi(.LCPI26_8)
910-
; RV32-NEXT: addi a2, a2, %lo(.LCPI26_8)
924+
; RV32-NEXT: lui a1, %hi(.LCPI27_6)
925+
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_6)
926+
; RV32-NEXT: lui a2, %hi(.LCPI27_8)
927+
; RV32-NEXT: addi a2, a2, %lo(.LCPI27_8)
911928
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
912929
; RV32-NEXT: vle16.v v4, (a1)
913-
; RV32-NEXT: lui a1, %hi(.LCPI26_9)
914-
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_9)
930+
; RV32-NEXT: lui a1, %hi(.LCPI27_9)
931+
; RV32-NEXT: addi a1, a1, %lo(.LCPI27_9)
915932
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
916933
; RV32-NEXT: vle16.v v6, (a1)
917934
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -998,8 +1015,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
9981015
; RV64-NEXT: li a4, 128
9991016
; RV64-NEXT: lui a1, 1
10001017
; RV64-NEXT: vle64.v v8, (a3)
1001-
; RV64-NEXT: lui a3, %hi(.LCPI26_0)
1002-
; RV64-NEXT: addi a3, a3, %lo(.LCPI26_0)
1018+
; RV64-NEXT: lui a3, %hi(.LCPI27_0)
1019+
; RV64-NEXT: addi a3, a3, %lo(.LCPI27_0)
10031020
; RV64-NEXT: vmv.s.x v0, a4
10041021
; RV64-NEXT: csrr a4, vlenb
10051022
; RV64-NEXT: li a5, 61
@@ -1187,8 +1204,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11871204
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
11881205
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11891206
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1190-
; RV64-NEXT: lui a2, %hi(.LCPI26_1)
1191-
; RV64-NEXT: addi a2, a2, %lo(.LCPI26_1)
1207+
; RV64-NEXT: lui a2, %hi(.LCPI27_1)
1208+
; RV64-NEXT: addi a2, a2, %lo(.LCPI27_1)
11921209
; RV64-NEXT: li a3, 192
11931210
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
11941211
; RV64-NEXT: vle16.v v6, (a2)
@@ -1222,8 +1239,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12221239
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
12231240
; RV64-NEXT: addi a2, sp, 16
12241241
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1225-
; RV64-NEXT: lui a2, %hi(.LCPI26_2)
1226-
; RV64-NEXT: addi a2, a2, %lo(.LCPI26_2)
1242+
; RV64-NEXT: lui a2, %hi(.LCPI27_2)
1243+
; RV64-NEXT: addi a2, a2, %lo(.LCPI27_2)
12271244
; RV64-NEXT: li a3, 1040
12281245
; RV64-NEXT: vmv.s.x v0, a3
12291246
; RV64-NEXT: addi a1, a1, -2016
@@ -1307,12 +1324,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
13071324
; RV64-NEXT: add a1, sp, a1
13081325
; RV64-NEXT: addi a1, a1, 16
13091326
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1310-
; RV64-NEXT: lui a1, %hi(.LCPI26_3)
1311-
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_3)
1327+
; RV64-NEXT: lui a1, %hi(.LCPI27_3)
1328+
; RV64-NEXT: addi a1, a1, %lo(.LCPI27_3)
13121329
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
13131330
; RV64-NEXT: vle16.v v20, (a1)
1314-
; RV64-NEXT: lui a1, %hi(.LCPI26_4)
1315-
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_4)
1331+
; RV64-NEXT: lui a1, %hi(.LCPI27_4)
1332+
; RV64-NEXT: addi a1, a1, %lo(.LCPI27_4)
13161333
; RV64-NEXT: vle16.v v8, (a1)
13171334
; RV64-NEXT: csrr a1, vlenb
13181335
; RV64-NEXT: li a2, 77
@@ -1363,8 +1380,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
13631380
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
13641381
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
13651382
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1366-
; RV64-NEXT: lui a1, %hi(.LCPI26_5)
1367-
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_5)
1383+
; RV64-NEXT: lui a1, %hi(.LCPI27_5)
1384+
; RV64-NEXT: addi a1, a1, %lo(.LCPI27_5)
13681385
; RV64-NEXT: vle16.v v20, (a1)
13691386
; RV64-NEXT: csrr a1, vlenb
13701387
; RV64-NEXT: li a2, 61
@@ -1981,8 +1998,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
19811998
; RV32-NEXT: vle32.v v12, (a0), v0.t
19821999
; RV32-NEXT: li a0, 36
19832000
; RV32-NEXT: vmv.s.x v20, a1
1984-
; RV32-NEXT: lui a1, %hi(.LCPI62_0)
1985-
; RV32-NEXT: addi a1, a1, %lo(.LCPI62_0)
2001+
; RV32-NEXT: lui a1, %hi(.LCPI63_0)
2002+
; RV32-NEXT: addi a1, a1, %lo(.LCPI63_0)
19862003
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
19872004
; RV32-NEXT: vle16.v v21, (a1)
19882005
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -2057,8 +2074,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
20572074
; RV32-NEXT: vmv.s.x v10, a0
20582075
; RV32-NEXT: li a0, 146
20592076
; RV32-NEXT: vmv.s.x v11, a0
2060-
; RV32-NEXT: lui a0, %hi(.LCPI63_0)
2061-
; RV32-NEXT: addi a0, a0, %lo(.LCPI63_0)
2077+
; RV32-NEXT: lui a0, %hi(.LCPI64_0)
2078+
; RV32-NEXT: addi a0, a0, %lo(.LCPI64_0)
20622079
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
20632080
; RV32-NEXT: vle16.v v20, (a0)
20642081
; RV32-NEXT: li a0, 36
@@ -2277,8 +2294,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_invalid_skip_field(
22772294
; RV32-NEXT: vle32.v v12, (a0), v0.t
22782295
; RV32-NEXT: li a0, 36
22792296
; RV32-NEXT: vmv.s.x v20, a1
2280-
; RV32-NEXT: lui a1, %hi(.LCPI72_0)
2281-
; RV32-NEXT: addi a1, a1, %lo(.LCPI72_0)
2297+
; RV32-NEXT: lui a1, %hi(.LCPI73_0)
2298+
; RV32-NEXT: addi a1, a1, %lo(.LCPI73_0)
22822299
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
22832300
; RV32-NEXT: vle16.v v21, (a1)
22842301
; RV32-NEXT: vcompress.vm v8, v12, v11

0 commit comments

Comments
 (0)