Skip to content

Commit c202d2f

Browse files
authored
[IA][RISCV] Recognizing gap masks assembled from bitwise AND (#153324)
For a deinterleaved masked.load / vp.load, if it's mask, `%c`, is synthesized by the following snippet: ``` %m = shufflevector %s, poison, <0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3> %g = <1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0> %c = and %m, %g ``` Then we can know that `%g` is the gap mask and `%s` is the mask for each field / component. This patch teaches InterleaveAccess pass to recognize such patterns
1 parent ff0ce74 commit c202d2f

File tree

2 files changed

+132
-38
lines changed

2 files changed

+132
-38
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,21 @@ static std::pair<Value *, APInt> getMask(Value *WideMask, unsigned Factor,
601601
}
602602
}
603603

604+
// Masks that are assembled from bitwise AND.
605+
if (auto *AndOp = dyn_cast<BinaryOperator>(WideMask);
606+
AndOp && AndOp->getOpcode() == Instruction::And) {
607+
auto [MaskLHS, GapMaskLHS] =
608+
getMask(AndOp->getOperand(0), Factor, LeafValueEC);
609+
auto [MaskRHS, GapMaskRHS] =
610+
getMask(AndOp->getOperand(1), Factor, LeafValueEC);
611+
if (!MaskLHS || !MaskRHS)
612+
return {nullptr, GapMask};
613+
// Using IRBuilder here so that any trivial constants could be folded right
614+
// away.
615+
return {IRBuilder<>(AndOp).CreateAnd(MaskLHS, MaskRHS),
616+
GapMaskLHS & GapMaskRHS};
617+
}
618+
604619
if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
605620
if (auto *Splat = ConstMask->getSplatValue())
606621
// All-ones or all-zeros mask.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 117 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,24 @@ define {<4 x i32>, <4 x i32>} @vpload_factor3_mask_skip_fields(ptr %ptr) {
367367
ret {<4 x i32>, <4 x i32>} %res1
368368
}
369369

370+
define {<4 x i32>, <4 x i32>} @vpload_factor3_combined_mask_skip_field(ptr %ptr, <4 x i1> %mask) {
371+
; CHECK-LABEL: vpload_factor3_combined_mask_skip_field:
372+
; CHECK: # %bb.0:
373+
; CHECK-NEXT: li a1, 12
374+
; CHECK-NEXT: vsetivli zero, 6, e32, m1, ta, ma
375+
; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t
376+
; CHECK-NEXT: ret
377+
%interleaved.mask = shufflevector <4 x i1> %mask, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
378+
%combined = and <12 x i1> %interleaved.mask, <i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false>
379+
%interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> %combined, i32 12)
380+
; mask = %mask, skip the last field
381+
%v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
382+
%v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
383+
%res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
384+
%res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
385+
ret {<4 x i32>, <4 x i32>} %res1
386+
}
387+
370388
define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor4(ptr %ptr) {
371389
; CHECK-LABEL: vpload_factor4:
372390
; CHECK: # %bb.0:
@@ -514,8 +532,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
514532
; RV32-NEXT: li a2, 32
515533
; RV32-NEXT: lui a3, 12
516534
; RV32-NEXT: lui a6, 12291
517-
; RV32-NEXT: lui a7, %hi(.LCPI25_0)
518-
; RV32-NEXT: addi a7, a7, %lo(.LCPI25_0)
535+
; RV32-NEXT: lui a7, %hi(.LCPI26_0)
536+
; RV32-NEXT: addi a7, a7, %lo(.LCPI26_0)
519537
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
520538
; RV32-NEXT: vle32.v v24, (a5)
521539
; RV32-NEXT: vmv.s.x v0, a3
@@ -600,12 +618,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
600618
; RV32-NEXT: addi a1, a1, 16
601619
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
602620
; RV32-NEXT: lui a7, 49164
603-
; RV32-NEXT: lui a1, %hi(.LCPI25_1)
604-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_1)
621+
; RV32-NEXT: lui a1, %hi(.LCPI26_1)
622+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_1)
605623
; RV32-NEXT: lui t2, 3
606624
; RV32-NEXT: lui t1, 196656
607-
; RV32-NEXT: lui a4, %hi(.LCPI25_3)
608-
; RV32-NEXT: addi a4, a4, %lo(.LCPI25_3)
625+
; RV32-NEXT: lui a4, %hi(.LCPI26_3)
626+
; RV32-NEXT: addi a4, a4, %lo(.LCPI26_3)
609627
; RV32-NEXT: lui t0, 786624
610628
; RV32-NEXT: li a5, 48
611629
; RV32-NEXT: lui a6, 768
@@ -784,8 +802,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
784802
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
785803
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
786804
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
787-
; RV32-NEXT: lui a1, %hi(.LCPI25_2)
788-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_2)
805+
; RV32-NEXT: lui a1, %hi(.LCPI26_2)
806+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_2)
789807
; RV32-NEXT: lui a3, 3073
790808
; RV32-NEXT: addi a3, a3, -1024
791809
; RV32-NEXT: vmv.s.x v0, a3
@@ -849,16 +867,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
849867
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
850868
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
851869
; RV32-NEXT: vmv.v.v v28, v24
852-
; RV32-NEXT: lui a1, %hi(.LCPI25_4)
853-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_4)
854-
; RV32-NEXT: lui a2, %hi(.LCPI25_5)
855-
; RV32-NEXT: addi a2, a2, %lo(.LCPI25_5)
870+
; RV32-NEXT: lui a1, %hi(.LCPI26_4)
871+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_4)
872+
; RV32-NEXT: lui a2, %hi(.LCPI26_5)
873+
; RV32-NEXT: addi a2, a2, %lo(.LCPI26_5)
856874
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
857875
; RV32-NEXT: vle16.v v24, (a2)
858876
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
859877
; RV32-NEXT: vle16.v v8, (a1)
860-
; RV32-NEXT: lui a1, %hi(.LCPI25_7)
861-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_7)
878+
; RV32-NEXT: lui a1, %hi(.LCPI26_7)
879+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_7)
862880
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
863881
; RV32-NEXT: vle16.v v10, (a1)
864882
; RV32-NEXT: csrr a1, vlenb
@@ -886,14 +904,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
886904
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
887905
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
888906
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
889-
; RV32-NEXT: lui a1, %hi(.LCPI25_6)
890-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_6)
891-
; RV32-NEXT: lui a2, %hi(.LCPI25_8)
892-
; RV32-NEXT: addi a2, a2, %lo(.LCPI25_8)
907+
; RV32-NEXT: lui a1, %hi(.LCPI26_6)
908+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_6)
909+
; RV32-NEXT: lui a2, %hi(.LCPI26_8)
910+
; RV32-NEXT: addi a2, a2, %lo(.LCPI26_8)
893911
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
894912
; RV32-NEXT: vle16.v v4, (a1)
895-
; RV32-NEXT: lui a1, %hi(.LCPI25_9)
896-
; RV32-NEXT: addi a1, a1, %lo(.LCPI25_9)
913+
; RV32-NEXT: lui a1, %hi(.LCPI26_9)
914+
; RV32-NEXT: addi a1, a1, %lo(.LCPI26_9)
897915
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
898916
; RV32-NEXT: vle16.v v6, (a1)
899917
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -980,8 +998,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
980998
; RV64-NEXT: li a4, 128
981999
; RV64-NEXT: lui a1, 1
9821000
; RV64-NEXT: vle64.v v8, (a3)
983-
; RV64-NEXT: lui a3, %hi(.LCPI25_0)
984-
; RV64-NEXT: addi a3, a3, %lo(.LCPI25_0)
1001+
; RV64-NEXT: lui a3, %hi(.LCPI26_0)
1002+
; RV64-NEXT: addi a3, a3, %lo(.LCPI26_0)
9851003
; RV64-NEXT: vmv.s.x v0, a4
9861004
; RV64-NEXT: csrr a4, vlenb
9871005
; RV64-NEXT: li a5, 61
@@ -1169,8 +1187,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11691187
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
11701188
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11711189
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1172-
; RV64-NEXT: lui a2, %hi(.LCPI25_1)
1173-
; RV64-NEXT: addi a2, a2, %lo(.LCPI25_1)
1190+
; RV64-NEXT: lui a2, %hi(.LCPI26_1)
1191+
; RV64-NEXT: addi a2, a2, %lo(.LCPI26_1)
11741192
; RV64-NEXT: li a3, 192
11751193
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
11761194
; RV64-NEXT: vle16.v v6, (a2)
@@ -1204,8 +1222,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12041222
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
12051223
; RV64-NEXT: addi a2, sp, 16
12061224
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1207-
; RV64-NEXT: lui a2, %hi(.LCPI25_2)
1208-
; RV64-NEXT: addi a2, a2, %lo(.LCPI25_2)
1225+
; RV64-NEXT: lui a2, %hi(.LCPI26_2)
1226+
; RV64-NEXT: addi a2, a2, %lo(.LCPI26_2)
12091227
; RV64-NEXT: li a3, 1040
12101228
; RV64-NEXT: vmv.s.x v0, a3
12111229
; RV64-NEXT: addi a1, a1, -2016
@@ -1289,12 +1307,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12891307
; RV64-NEXT: add a1, sp, a1
12901308
; RV64-NEXT: addi a1, a1, 16
12911309
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1292-
; RV64-NEXT: lui a1, %hi(.LCPI25_3)
1293-
; RV64-NEXT: addi a1, a1, %lo(.LCPI25_3)
1310+
; RV64-NEXT: lui a1, %hi(.LCPI26_3)
1311+
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_3)
12941312
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
12951313
; RV64-NEXT: vle16.v v20, (a1)
1296-
; RV64-NEXT: lui a1, %hi(.LCPI25_4)
1297-
; RV64-NEXT: addi a1, a1, %lo(.LCPI25_4)
1314+
; RV64-NEXT: lui a1, %hi(.LCPI26_4)
1315+
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_4)
12981316
; RV64-NEXT: vle16.v v8, (a1)
12991317
; RV64-NEXT: csrr a1, vlenb
13001318
; RV64-NEXT: li a2, 77
@@ -1345,8 +1363,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
13451363
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
13461364
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
13471365
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1348-
; RV64-NEXT: lui a1, %hi(.LCPI25_5)
1349-
; RV64-NEXT: addi a1, a1, %lo(.LCPI25_5)
1366+
; RV64-NEXT: lui a1, %hi(.LCPI26_5)
1367+
; RV64-NEXT: addi a1, a1, %lo(.LCPI26_5)
13501368
; RV64-NEXT: vle16.v v20, (a1)
13511369
; RV64-NEXT: csrr a1, vlenb
13521370
; RV64-NEXT: li a2, 61
@@ -1963,8 +1981,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
19631981
; RV32-NEXT: vle32.v v12, (a0), v0.t
19641982
; RV32-NEXT: li a0, 36
19651983
; RV32-NEXT: vmv.s.x v20, a1
1966-
; RV32-NEXT: lui a1, %hi(.LCPI61_0)
1967-
; RV32-NEXT: addi a1, a1, %lo(.LCPI61_0)
1984+
; RV32-NEXT: lui a1, %hi(.LCPI62_0)
1985+
; RV32-NEXT: addi a1, a1, %lo(.LCPI62_0)
19681986
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
19691987
; RV32-NEXT: vle16.v v21, (a1)
19701988
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -2039,8 +2057,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
20392057
; RV32-NEXT: vmv.s.x v10, a0
20402058
; RV32-NEXT: li a0, 146
20412059
; RV32-NEXT: vmv.s.x v11, a0
2042-
; RV32-NEXT: lui a0, %hi(.LCPI62_0)
2043-
; RV32-NEXT: addi a0, a0, %lo(.LCPI62_0)
2060+
; RV32-NEXT: lui a0, %hi(.LCPI63_0)
2061+
; RV32-NEXT: addi a0, a0, %lo(.LCPI63_0)
20442062
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
20452063
; RV32-NEXT: vle16.v v20, (a0)
20462064
; RV32-NEXT: li a0, 36
@@ -2181,6 +2199,67 @@ define {<4 x i32>, <4 x i32>} @maskedload_factor3_mask_skip_field(ptr %ptr) {
21812199
ret {<4 x i32>, <4 x i32>} %res1
21822200
}
21832201

2202+
define {<4 x i32>, <4 x i32>} @maskedload_factor3_combined_mask_skip_field(ptr %ptr, <4 x i1> %mask) {
2203+
; CHECK-LABEL: maskedload_factor3_combined_mask_skip_field:
2204+
; CHECK: # %bb.0:
2205+
; CHECK-NEXT: li a1, 12
2206+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2207+
; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t
2208+
; CHECK-NEXT: ret
2209+
%interleaved.mask = shufflevector <4 x i1> %mask, <4 x i1> poison, <12 x i32> <i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
2210+
%combined = and <12 x i1> %interleaved.mask, <i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false>
2211+
%interleaved.vec = tail call <12 x i32> @llvm.masked.load.v12i32.p0(ptr %ptr, i32 4, <12 x i1> %combined, <12 x i32> poison)
2212+
; mask = %mask, skip the last field
2213+
%v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
2214+
%v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
2215+
%res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
2216+
%res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
2217+
ret {<4 x i32>, <4 x i32>} %res1
2218+
}
2219+
2220+
define {<4 x i32>, <4 x i32>} @maskedload_factor4_combined_mask_multi_skip_fields(ptr %ptr, <4 x i1> %mask) {
2221+
; CHECK-LABEL: maskedload_factor4_combined_mask_multi_skip_fields:
2222+
; CHECK: # %bb.0:
2223+
; CHECK-NEXT: li a1, 16
2224+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2225+
; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t
2226+
; CHECK-NEXT: ret
2227+
%interleaved.mask = shufflevector <4 x i1> %mask, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
2228+
%combined = and <16 x i1> %interleaved.mask, <i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>
2229+
%combined1 = and <16 x i1> %combined, <i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true>
2230+
%interleaved.vec = tail call <16 x i32> @llvm.masked.load.v16i32.p0(ptr %ptr, i32 4, <16 x i1> %combined1, <16 x i32> poison)
2231+
; mask = %mask, skip the last 2 fields
2232+
%v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
2233+
%v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
2234+
%res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
2235+
%res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
2236+
ret {<4 x i32>, <4 x i32>} %res1
2237+
}
2238+
2239+
define {<4 x i32>, <4 x i32>} @maskedload_factor4_combined_mask_multi_skip_fields_and_masks(ptr %ptr, <4 x i1> %mask, <4 x i1> %mask2) {
2240+
; CHECK-LABEL: maskedload_factor4_combined_mask_multi_skip_fields_and_masks:
2241+
; CHECK: # %bb.0:
2242+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2243+
; CHECK-NEXT: vmand.mm v0, v0, v8
2244+
; CHECK-NEXT: li a1, 16
2245+
; CHECK-NEXT: vlsseg2e32.v v8, (a0), a1, v0.t
2246+
; CHECK-NEXT: ret
2247+
%interleaved.mask = shufflevector <4 x i1> %mask, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
2248+
%combined = and <16 x i1> %interleaved.mask, <i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>
2249+
2250+
%interleaved.mask2 = shufflevector <4 x i1> %mask2, <4 x i1> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
2251+
%combined1 = and <16 x i1> %interleaved.mask2, <i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true>
2252+
2253+
%combined2 = and <16 x i1> %combined, %combined1
2254+
%interleaved.vec = tail call <16 x i32> @llvm.masked.load.v16i32.p0(ptr %ptr, i32 4, <16 x i1> %combined2, <16 x i32> poison)
2255+
; mask = %mask & %mask2, skip the last 2 fields
2256+
%v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
2257+
%v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
2258+
%res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
2259+
%res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
2260+
ret {<4 x i32>, <4 x i32>} %res1
2261+
}
2262+
21842263
; We can only skip the last field for now.
21852264
define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_invalid_skip_field(ptr %ptr) {
21862265
; RV32-LABEL: maskedload_factor3_invalid_skip_field:
@@ -2198,8 +2277,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @maskedload_factor3_invalid_skip_field(
21982277
; RV32-NEXT: vle32.v v12, (a0), v0.t
21992278
; RV32-NEXT: li a0, 36
22002279
; RV32-NEXT: vmv.s.x v20, a1
2201-
; RV32-NEXT: lui a1, %hi(.LCPI68_0)
2202-
; RV32-NEXT: addi a1, a1, %lo(.LCPI68_0)
2280+
; RV32-NEXT: lui a1, %hi(.LCPI72_0)
2281+
; RV32-NEXT: addi a1, a1, %lo(.LCPI72_0)
22032282
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
22042283
; RV32-NEXT: vle16.v v21, (a1)
22052284
; RV32-NEXT: vcompress.vm v8, v12, v11

0 commit comments

Comments
 (0)