Skip to content

Commit 5122611

Browse files
committed
[InterleavedAccess] Construct interleaved access store with shuffles
- [AArch64]: Interleaved access store can handle more elements than target supported maximum interleaved factor with shuffles.
1 parent 205e39f commit 5122611

File tree

3 files changed

+155
-13
lines changed

3 files changed

+155
-13
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18348,7 +18348,13 @@ bool AArch64TargetLowering::lowerInterleavedStoreWithShuffle(
1834818348
return false;
1834918349

1835018350
std::deque<Value *> Shuffles;
18351-
Shuffles.push_back(SVI);
18351+
// If Only one operand is there in root shuffle.
18352+
if (isa<PoisonValue>(SVI->getOperand(1)) &&
18353+
SVI->getType() == SVI->getOperand(0)->getType()) {
18354+
Value *Op0 = SVI->getOperand(0);
18355+
Shuffles.push_back(dyn_cast<Value>(Op0));
18356+
} else
18357+
Shuffles.push_back(SVI);
1835218358
unsigned ConcatLevel = Factor;
1835318359
unsigned ConcatElt = Factor * LaneLen;
1835418360
// Getting all the interleaved operands.
@@ -18370,7 +18376,10 @@ bool AArch64TargetLowering::lowerInterleavedStoreWithShuffle(
1837018376
Shuffles.push_back(SplitValue);
1837118377
continue;
1837218378
}
18373-
18379+
if (V->getType() == SubVecTy) {
18380+
Shuffles.push_back(V);
18381+
continue;
18382+
}
1837418383
ShuffleVectorInst *SFL = dyn_cast<ShuffleVectorInst>(V);
1837518384
if (!SFL)
1837618385
return false;

llvm/test/CodeGen/AArch64/vldn_shuffle.ll

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -761,12 +761,53 @@ define void @store_factor8(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2
761761
ret void
762762
}
763763

764+
define dso_local void @store_factor8_1(ptr %dst, ptr %temp1, i64 %offset.idx, <8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
765+
; CHECK-LABEL: store_factor8_1:
766+
; CHECK: .Lfunc_begin18:
767+
; CHECK-NEXT: .cfi_startproc
768+
; CHECK-NEXT: // %bb.0: // %entry
769+
; CHECK-NEXT: ld1r { v3.8h }, [x1]
770+
; CHECK-NEXT: movi v4.8h, #1
771+
; CHECK-NEXT: movi v19.2d, #0000000000000000
772+
; CHECK-NEXT: add x8, x0, x2, lsl #1
773+
; CHECK-NEXT: orr v3.8h, #1
774+
; CHECK-NEXT: add v3.8h, v3.8h, v4.8h
775+
; CHECK-NEXT: zip1 v16.8h, v19.8h, v3.8h
776+
; CHECK-NEXT: zip1 v17.8h, v0.8h, v19.8h
777+
; CHECK-NEXT: zip1 v18.8h, v1.8h, v2.8h
778+
; CHECK-NEXT: zip2 v3.8h, v19.8h, v3.8h
779+
; CHECK-NEXT: zip2 v4.8h, v0.8h, v19.8h
780+
; CHECK-NEXT: zip2 v5.8h, v1.8h, v2.8h
781+
; CHECK-NEXT: mov v6.16b, v19.16b
782+
; CHECK-NEXT: st4 { v16.8h, v17.8h, v18.8h, v19.8h }, [x8], #64
783+
; CHECK-NEXT: st4 { v3.8h, v4.8h, v5.8h, v6.8h }, [x8]
784+
; CHECK-NEXT: ret
785+
entry:
786+
%0 = load i32, ptr %temp1, align 4
787+
%broadcast.splatinsert1 = insertelement <8 x i32> poison, i32 %0, i64 0
788+
%broadcast.splat2 = shufflevector <8 x i32> %broadcast.splatinsert1, <8 x i32> poison, <8 x i32> zeroinitializer
789+
%1 = getelementptr i16, ptr %dst, i64 %offset.idx
790+
%2 = trunc <8 x i32> %broadcast.splat2 to <8 x i16>
791+
%3 = or <8 x i16> %2, splat (i16 1)
792+
%4 = add <8 x i16> %3, splat (i16 1)
793+
%5 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %x, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
794+
%6 = shufflevector <8 x i16> %y, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
795+
%7 = shufflevector <8 x i16> %4, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
796+
%8 = shufflevector <8 x i16> %z, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
797+
%9 = shufflevector <16 x i16> %5, <16 x i16> %6, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
798+
%10 = shufflevector <16 x i16> %7, <16 x i16> %8, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
799+
%11 = shufflevector <32 x i16> %9, <32 x i16> %10, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
800+
%interleaved.vec = shufflevector <64 x i16> %11, <64 x i16> poison, <64 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56, i32 1, i32 9, i32 17, i32 25, i32 33, i32 41, i32 49, i32 57, i32 2, i32 10, i32 18, i32 26, i32 34, i32 42, i32 50, i32 58, i32 3, i32 11, i32 19, i32 27, i32 35, i32 43, i32 51, i32 59, i32 4, i32 12, i32 20, i32 28, i32 36, i32 44, i32 52, i32 60, i32 5, i32 13, i32 21, i32 29, i32 37, i32 45, i32 53, i32 61, i32 6, i32 14, i32 22, i32 30, i32 38, i32 46, i32 54, i32 62, i32 7, i32 15, i32 23, i32 31, i32 39, i32 47, i32 55, i32 63>
801+
store <64 x i16> %interleaved.vec, ptr %1, align 2
802+
ret void
803+
}
804+
764805
define void @store_factor16(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3,
765806
<4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7,
766807
<4 x i32> %a8, <4 x i32> %a9, <4 x i32> %a10, <4 x i32> %a11,
767808
<4 x i32> %a12, <4 x i32> %a13, <4 x i32> %a14, <4 x i32> %a15) {
768809
; CHECK-LABEL: store_factor16:
769-
; CHECK: .Lfunc_begin18:
810+
; CHECK: .Lfunc_begin19:
770811
; CHECK-NEXT: .cfi_startproc
771812
; CHECK-NEXT: // %bb.0:
772813
; CHECK: zip1 [[V05:.*s]], [[I05:.*s]], [[I13:.*s]]
@@ -837,16 +878,16 @@ define void @store_factor16(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32>
837878

838879
define dso_local void @store_no_interleave(ptr noalias noundef readonly captures(none) %a, ptr noalias noundef readonly captures(none) %b, i8 noundef %c) {
839880
; CHECK-LABEL: store_no_interleave:
840-
; CHECK: .Lfunc_begin19:
881+
; CHECK: .Lfunc_begin20:
841882
; CHECK-NEXT: .cfi_startproc
842883
; CHECK-NEXT: // %bb.0: // %entry
843884
; CHECK-NEXT: movi v0.4h, #1
844885
; CHECK-NEXT: fmov s1, w2
845886
; CHECK-NEXT: ldrb w8, [x0]
846-
; CHECK-NEXT: adrp x9, .LCPI19_3
847-
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI19_3]
848-
; CHECK-NEXT: adrp x9, .LCPI19_1
849-
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI19_1]
887+
; CHECK-NEXT: adrp x9, .LCPI20_3
888+
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI20_3]
889+
; CHECK-NEXT: adrp x9, .LCPI20_1
890+
; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI20_1]
850891
; CHECK-NEXT: and v0.8b, v1.8b, v0.8b
851892
; CHECK-NEXT: dup v0.16b, v0.b[0]
852893
; CHECK-NEXT: dup v1.16b, w2
@@ -855,10 +896,10 @@ define dso_local void @store_no_interleave(ptr noalias noundef readonly captures
855896
; CHECK-NEXT: tbl v5.16b, { v0.16b, v1.16b }, v5.16b
856897
; CHECK-NEXT: mov v2.b[2], w8
857898
; CHECK-NEXT: mov v2.b[10], w8
858-
; CHECK-NEXT: adrp x8, .LCPI19_2
859-
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI19_2]
860-
; CHECK-NEXT: adrp x8, .LCPI19_0
861-
; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI19_0]
899+
; CHECK-NEXT: adrp x8, .LCPI20_2
900+
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI20_2]
901+
; CHECK-NEXT: adrp x8, .LCPI20_0
902+
; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI20_0]
862903
; CHECK-NEXT: ldrsw x8, [x1]
863904
; CHECK-NEXT: tbl v4.16b, { v0.16b, v1.16b }, v4.16b
864905
; CHECK-NEXT: rev64 v2.4s, v2.4s
@@ -902,7 +943,7 @@ entry:
902943

903944
define dso_local void @store_no_interleave1(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, ptr %i,
904945
; CHECK-LABEL: store_no_interleave1:
905-
; CHECK: .Lfunc_begin20:
946+
; CHECK: .Lfunc_begin21:
906947
; CHECK-NEXT: .cfi_startproc
907948
; CHECK-NEXT: // %bb.0: // %entry
908949
; CHECK-NEXT: ldr x8, [sp]

llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,3 +753,95 @@ define <4 x i1> @load_large_vector(ptr %p) {
753753
%ret = icmp ne <4 x ptr> %s1, %s2
754754
ret <4 x i1> %ret
755755
}
756+
757+
define void @store_factor8_with_undef(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3){
758+
; NEON-LABEL: define void @store_factor8_with_undef(
759+
; NEON: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
760+
; NEON-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> poison, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
761+
; NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
762+
; NEON-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[A1]], <4 x i32> poison, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
763+
; NEON-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[A2:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
764+
; NEON-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[A2]], <4 x i32> poison, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
765+
; NEON-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[A3:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
766+
; NEON-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[A3]], <4 x i32> poison, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
767+
; NEON-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[TMP5]], <4 x i32> [[TMP7]], ptr [[PTR]])
768+
; NEON-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[PTR]], i32 16
769+
; NEON-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <4 x i32> [[TMP6]], <4 x i32> [[TMP8]], ptr [[TMP9]])
770+
; NEON-NEXT: ret void
771+
; NO_NEON-LABEL: @store_factor8_with_undef(
772+
; NO_NEON-NOT: @llvm.aarch64.neon
773+
; NO_NEON: ret void
774+
;
775+
%v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
776+
%v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
777+
%s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
778+
%interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 undef>
779+
store <32 x i32> %interleaved.vec, ptr %ptr, align 4
780+
ret void
781+
}
782+
783+
define void @store_general_mask_factor8_undef_fail(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3,
784+
<4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7){
785+
; NEON-LABEL: @store_general_mask_factor8_undef_fail(
786+
; NEON-NOT: @llvm.aarch64.neon
787+
; NEON: ret void
788+
; NO_NEON-LABEL: @store_general_mask_factor8_undef_fail(
789+
; NO_NEON-NOT: @llvm.aarch64.neon
790+
; NO_NEON: ret void
791+
;
792+
%v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
793+
%v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
794+
%v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
795+
%v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
796+
797+
%s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
798+
%s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
799+
800+
%interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 4, i32 8, i32 9, i32 16, i32 20, i32 24, i32 10, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 undef>
801+
store <32 x i32> %interleaved.vec, ptr %ptr, align 4
802+
ret void
803+
}
804+
805+
define void @store_general_invalid_concat_mask(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3,
806+
<4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7){
807+
; NEON-LABEL: @store_general_invalid_concat_mask(
808+
; NEON-NOT: @llvm.aarch64.neon
809+
; NEON: ret void
810+
; NO_NEON-LABEL: @store_general_invalid_concat_mask(
811+
; NO_NEON-NOT: @llvm.aarch64.neon
812+
; NO_NEON: ret void
813+
;
814+
%v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 0>
815+
%v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
816+
%v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
817+
%v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
818+
819+
%s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
820+
%s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
821+
822+
%interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
823+
store <32 x i32> %interleaved.vec, ptr %ptr, align 4
824+
ret void
825+
}
826+
827+
define void @store_no_interleave_factor8(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3,
828+
<4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7){
829+
; NEON-LABEL: @store_no_interleave_factor8(
830+
; NEON-NOT: @llvm.aarch64.neon
831+
; NEON: ret void
832+
; NO_NEON-LABEL: @store_no_interleave_factor8(
833+
; NO_NEON-NOT: @llvm.aarch64.neon
834+
; NO_NEON: ret void
835+
;
836+
%v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
837+
%v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
838+
%v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
839+
%v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
840+
841+
%s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
842+
%s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
843+
844+
%interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
845+
store <16 x i32> %interleaved.vec, ptr %ptr, align 4
846+
ret void
847+
}

0 commit comments

Comments
 (0)