Skip to content

Commit 6590e06

Browse files
preamesmahesh-attarde
authored andcommitted
[IA] Remove resriction on constant masks for shuffle lowering (llvm#150098)
The point of this change is simply to show that the constant check was not required for correctness. The mixed intrinsic and shuffle tests are added purely to exercise the code. An upcoming change will add support for shuffle matching in getMask to support non-constant fixed vector cases.
1 parent f76ac7e commit 6590e06

File tree

2 files changed

+66
-50
lines changed

2 files changed

+66
-50
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -268,17 +268,9 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
268268
if (isa<ScalableVectorType>(Load->getType()))
269269
return false;
270270

271-
if (auto *LI = dyn_cast<LoadInst>(Load)) {
272-
if (!LI->isSimple())
273-
return false;
274-
} else if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) {
275-
assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load);
276-
// Require a constant mask.
277-
if (!isa<ConstantVector>(VPLoad->getMaskParam()))
278-
return false;
279-
} else {
280-
llvm_unreachable("unsupported load operation");
281-
}
271+
if (auto *LI = dyn_cast<LoadInst>(Load);
272+
LI && !LI->isSimple())
273+
return false;
282274

283275
// Check if all users of this load are shufflevectors. If we encounter any
284276
// users that are extractelement instructions or binary operators, we save
@@ -497,9 +489,6 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
497489
StoredValue = SI->getValueOperand();
498490
} else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
499491
assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
500-
// Require a constant mask.
501-
if (!isa<ConstantVector>(VPStore->getMaskParam()))
502-
return false;
503492
StoredValue = VPStore->getArgOperand(0);
504493
} else {
505494
llvm_unreachable("unsupported store operation");

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll

Lines changed: 63 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,20 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2(ptr %ptr) {
190190
ret {<4 x i32>, <4 x i32>} %res1
191191
}
192192

193+
define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %ptr, <4 x i1> %m) {
194+
; CHECK-LABEL: vpload_factor2_interleaved_mask_intrinsic:
195+
; CHECK: # %bb.0:
196+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
197+
; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
198+
; CHECK-NEXT: ret
199+
%interleaved.mask = call <8 x i1> @llvm.vector.interleave2(<4 x i1> %m, <4 x i1> %m)
200+
%interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
201+
%v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
202+
%v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
203+
%res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
204+
%res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
205+
ret {<4 x i32>, <4 x i32>} %res1
206+
}
193207

194208
define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) {
195209
; CHECK-LABEL: vpload_factor3:
@@ -423,8 +437,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
423437
; RV32-NEXT: li a2, 32
424438
; RV32-NEXT: lui a3, 12
425439
; RV32-NEXT: lui a6, 12291
426-
; RV32-NEXT: lui a7, %hi(.LCPI20_0)
427-
; RV32-NEXT: addi a7, a7, %lo(.LCPI20_0)
440+
; RV32-NEXT: lui a7, %hi(.LCPI21_0)
441+
; RV32-NEXT: addi a7, a7, %lo(.LCPI21_0)
428442
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
429443
; RV32-NEXT: vle32.v v24, (a5)
430444
; RV32-NEXT: vmv.s.x v0, a3
@@ -509,12 +523,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
509523
; RV32-NEXT: addi a1, a1, 16
510524
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
511525
; RV32-NEXT: lui a7, 49164
512-
; RV32-NEXT: lui a1, %hi(.LCPI20_1)
513-
; RV32-NEXT: addi a1, a1, %lo(.LCPI20_1)
526+
; RV32-NEXT: lui a1, %hi(.LCPI21_1)
527+
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_1)
514528
; RV32-NEXT: lui t2, 3
515529
; RV32-NEXT: lui t1, 196656
516-
; RV32-NEXT: lui a4, %hi(.LCPI20_3)
517-
; RV32-NEXT: addi a4, a4, %lo(.LCPI20_3)
530+
; RV32-NEXT: lui a4, %hi(.LCPI21_3)
531+
; RV32-NEXT: addi a4, a4, %lo(.LCPI21_3)
518532
; RV32-NEXT: lui t0, 786624
519533
; RV32-NEXT: li a5, 48
520534
; RV32-NEXT: lui a6, 768
@@ -693,8 +707,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
693707
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
694708
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
695709
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
696-
; RV32-NEXT: lui a1, %hi(.LCPI20_2)
697-
; RV32-NEXT: addi a1, a1, %lo(.LCPI20_2)
710+
; RV32-NEXT: lui a1, %hi(.LCPI21_2)
711+
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_2)
698712
; RV32-NEXT: lui a3, 3073
699713
; RV32-NEXT: addi a3, a3, -1024
700714
; RV32-NEXT: vmv.s.x v0, a3
@@ -758,16 +772,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
758772
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
759773
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
760774
; RV32-NEXT: vmv.v.v v28, v24
761-
; RV32-NEXT: lui a1, %hi(.LCPI20_4)
762-
; RV32-NEXT: addi a1, a1, %lo(.LCPI20_4)
763-
; RV32-NEXT: lui a2, %hi(.LCPI20_5)
764-
; RV32-NEXT: addi a2, a2, %lo(.LCPI20_5)
775+
; RV32-NEXT: lui a1, %hi(.LCPI21_4)
776+
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_4)
777+
; RV32-NEXT: lui a2, %hi(.LCPI21_5)
778+
; RV32-NEXT: addi a2, a2, %lo(.LCPI21_5)
765779
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
766780
; RV32-NEXT: vle16.v v24, (a2)
767781
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
768782
; RV32-NEXT: vle16.v v8, (a1)
769-
; RV32-NEXT: lui a1, %hi(.LCPI20_7)
770-
; RV32-NEXT: addi a1, a1, %lo(.LCPI20_7)
783+
; RV32-NEXT: lui a1, %hi(.LCPI21_7)
784+
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_7)
771785
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
772786
; RV32-NEXT: vle16.v v10, (a1)
773787
; RV32-NEXT: csrr a1, vlenb
@@ -795,14 +809,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
795809
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
796810
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
797811
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
798-
; RV32-NEXT: lui a1, %hi(.LCPI20_6)
799-
; RV32-NEXT: addi a1, a1, %lo(.LCPI20_6)
800-
; RV32-NEXT: lui a2, %hi(.LCPI20_8)
801-
; RV32-NEXT: addi a2, a2, %lo(.LCPI20_8)
812+
; RV32-NEXT: lui a1, %hi(.LCPI21_6)
813+
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_6)
814+
; RV32-NEXT: lui a2, %hi(.LCPI21_8)
815+
; RV32-NEXT: addi a2, a2, %lo(.LCPI21_8)
802816
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
803817
; RV32-NEXT: vle16.v v4, (a1)
804-
; RV32-NEXT: lui a1, %hi(.LCPI20_9)
805-
; RV32-NEXT: addi a1, a1, %lo(.LCPI20_9)
818+
; RV32-NEXT: lui a1, %hi(.LCPI21_9)
819+
; RV32-NEXT: addi a1, a1, %lo(.LCPI21_9)
806820
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
807821
; RV32-NEXT: vle16.v v6, (a1)
808822
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -889,8 +903,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
889903
; RV64-NEXT: li a4, 128
890904
; RV64-NEXT: lui a1, 1
891905
; RV64-NEXT: vle64.v v8, (a3)
892-
; RV64-NEXT: lui a3, %hi(.LCPI20_0)
893-
; RV64-NEXT: addi a3, a3, %lo(.LCPI20_0)
906+
; RV64-NEXT: lui a3, %hi(.LCPI21_0)
907+
; RV64-NEXT: addi a3, a3, %lo(.LCPI21_0)
894908
; RV64-NEXT: vmv.s.x v0, a4
895909
; RV64-NEXT: csrr a4, vlenb
896910
; RV64-NEXT: li a5, 61
@@ -1078,8 +1092,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
10781092
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
10791093
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10801094
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
1081-
; RV64-NEXT: lui a2, %hi(.LCPI20_1)
1082-
; RV64-NEXT: addi a2, a2, %lo(.LCPI20_1)
1095+
; RV64-NEXT: lui a2, %hi(.LCPI21_1)
1096+
; RV64-NEXT: addi a2, a2, %lo(.LCPI21_1)
10831097
; RV64-NEXT: li a3, 192
10841098
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
10851099
; RV64-NEXT: vle16.v v6, (a2)
@@ -1113,8 +1127,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11131127
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
11141128
; RV64-NEXT: addi a2, sp, 16
11151129
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
1116-
; RV64-NEXT: lui a2, %hi(.LCPI20_2)
1117-
; RV64-NEXT: addi a2, a2, %lo(.LCPI20_2)
1130+
; RV64-NEXT: lui a2, %hi(.LCPI21_2)
1131+
; RV64-NEXT: addi a2, a2, %lo(.LCPI21_2)
11181132
; RV64-NEXT: li a3, 1040
11191133
; RV64-NEXT: vmv.s.x v0, a3
11201134
; RV64-NEXT: addi a1, a1, -2016
@@ -1198,12 +1212,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
11981212
; RV64-NEXT: add a1, sp, a1
11991213
; RV64-NEXT: addi a1, a1, 16
12001214
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
1201-
; RV64-NEXT: lui a1, %hi(.LCPI20_3)
1202-
; RV64-NEXT: addi a1, a1, %lo(.LCPI20_3)
1215+
; RV64-NEXT: lui a1, %hi(.LCPI21_3)
1216+
; RV64-NEXT: addi a1, a1, %lo(.LCPI21_3)
12031217
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
12041218
; RV64-NEXT: vle16.v v20, (a1)
1205-
; RV64-NEXT: lui a1, %hi(.LCPI20_4)
1206-
; RV64-NEXT: addi a1, a1, %lo(.LCPI20_4)
1219+
; RV64-NEXT: lui a1, %hi(.LCPI21_4)
1220+
; RV64-NEXT: addi a1, a1, %lo(.LCPI21_4)
12071221
; RV64-NEXT: vle16.v v8, (a1)
12081222
; RV64-NEXT: csrr a1, vlenb
12091223
; RV64-NEXT: li a2, 77
@@ -1254,8 +1268,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
12541268
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
12551269
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12561270
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
1257-
; RV64-NEXT: lui a1, %hi(.LCPI20_5)
1258-
; RV64-NEXT: addi a1, a1, %lo(.LCPI20_5)
1271+
; RV64-NEXT: lui a1, %hi(.LCPI21_5)
1272+
; RV64-NEXT: addi a1, a1, %lo(.LCPI21_5)
12591273
; RV64-NEXT: vle16.v v20, (a1)
12601274
; RV64-NEXT: csrr a1, vlenb
12611275
; RV64-NEXT: li a2, 61
@@ -1472,6 +1486,19 @@ define void @vpstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
14721486
ret void
14731487
}
14741488

1489+
define void @vpstore_factor2_interleaved_mask_intrinsic(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i1> %m) {
1490+
; CHECK-LABEL: vpstore_factor2_interleaved_mask_intrinsic:
1491+
; CHECK: # %bb.0:
1492+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1493+
; CHECK-NEXT: vsseg2e32.v v8, (a0), v0.t
1494+
; CHECK-NEXT: ret
1495+
%interleaved.mask = call <8 x i1> @llvm.vector.interleave2(<4 x i1> %m, <4 x i1> %m)
1496+
%interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
1497+
tail call void @llvm.vp.store.v8i32.p0(<8 x i32> %interleaved.vec, ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
1498+
ret void
1499+
}
1500+
1501+
14751502
define void @vpstore_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
14761503
; CHECK-LABEL: vpstore_factor3:
14771504
; CHECK: # %bb.0:
@@ -1839,8 +1866,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
18391866
; RV32-NEXT: vle32.v v12, (a0), v0.t
18401867
; RV32-NEXT: li a0, 36
18411868
; RV32-NEXT: vmv.s.x v20, a1
1842-
; RV32-NEXT: lui a1, %hi(.LCPI54_0)
1843-
; RV32-NEXT: addi a1, a1, %lo(.LCPI54_0)
1869+
; RV32-NEXT: lui a1, %hi(.LCPI56_0)
1870+
; RV32-NEXT: addi a1, a1, %lo(.LCPI56_0)
18441871
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
18451872
; RV32-NEXT: vle16.v v21, (a1)
18461873
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1915,8 +1942,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
19151942
; RV32-NEXT: vmv.s.x v10, a0
19161943
; RV32-NEXT: li a0, 146
19171944
; RV32-NEXT: vmv.s.x v11, a0
1918-
; RV32-NEXT: lui a0, %hi(.LCPI55_0)
1919-
; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0)
1945+
; RV32-NEXT: lui a0, %hi(.LCPI57_0)
1946+
; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0)
19201947
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
19211948
; RV32-NEXT: vle16.v v20, (a0)
19221949
; RV32-NEXT: li a0, 36

0 commit comments

Comments
 (0)