Skip to content

Commit 4c8c130

Browse files
authored
[AArch64][GlobalISel] Scalarize i128 shufflevector instructions. (#119980)
This, like other operations, scalarizes shuffle vector operations with types larger than 64bits. ImplicitDef and Freeze are also handled the same way, to allow them to legalize. The legalization of fewerElementsVectorShuffle is adjusted to handled scalarization.
1 parent 8652608 commit 4c8c130

File tree

3 files changed

+65
-45
lines changed

3 files changed

+65
-45
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5424,7 +5424,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
54245424
// Further legalization attempts will be needed to do split further.
54255425
NarrowTy =
54265426
DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5427-
unsigned NewElts = NarrowTy.getNumElements();
5427+
unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
54285428

54295429
SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
54305430
extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
@@ -5535,7 +5535,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
55355535
Ops.clear();
55365536
}
55375537

5538-
MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
5538+
MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
55395539
MI.eraseFromParent();
55405540
return Legalized;
55415541
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
104104
.clampNumElements(0, v4s16, v8s16)
105105
.clampNumElements(0, v2s32, v4s32)
106106
.clampMaxNumElements(0, s64, 2)
107-
.clampMaxNumElements(0, p0, 2);
107+
.clampMaxNumElements(0, p0, 2)
108+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
108109

109110
getActionDefinitionsBuilder(G_PHI)
110111
.legalFor({p0, s16, s32, s64})
@@ -1082,6 +1083,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10821083
.clampNumElements(0, v4s16, v8s16)
10831084
.clampNumElements(0, v4s32, v4s32)
10841085
.clampNumElements(0, v2s64, v2s64)
1086+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
10851087
.bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
10861088
// Bitcast pointers vector to i64.
10871089
const LLT DstTy = Query.Types[0];

llvm/test/CodeGen/AArch64/dup.ll

Lines changed: 60 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,6 @@
55
; CHECK-GI: warning: Instruction selection used fallback path for dup_v2i8
66
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i8
77
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i8
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v2i128
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i128
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i128
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v3i128
12-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v3i128
13-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v3i128
14-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v4i128
15-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v4i128
16-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v4i128
17-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v2fp128
18-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2fp128
19-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2fp128
20-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v3fp128
21-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v3fp128
22-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v3fp128
23-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v4fp128
24-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v4fp128
25-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v4fp128
268

279
define <2 x i8> @dup_v2i8(i8 %a) {
2810
; CHECK-LABEL: dup_v2i8:
@@ -795,12 +777,22 @@ entry:
795777
}
796778

797779
define <2 x i128> @loaddup_v2i128(ptr %p) {
798-
; CHECK-LABEL: loaddup_v2i128:
799-
; CHECK: // %bb.0: // %entry
800-
; CHECK-NEXT: ldp x2, x1, [x0]
801-
; CHECK-NEXT: mov x0, x2
802-
; CHECK-NEXT: mov x3, x1
803-
; CHECK-NEXT: ret
780+
; CHECK-SD-LABEL: loaddup_v2i128:
781+
; CHECK-SD: // %bb.0: // %entry
782+
; CHECK-SD-NEXT: ldp x2, x1, [x0]
783+
; CHECK-SD-NEXT: mov x0, x2
784+
; CHECK-SD-NEXT: mov x3, x1
785+
; CHECK-SD-NEXT: ret
786+
;
787+
; CHECK-GI-LABEL: loaddup_v2i128:
788+
; CHECK-GI: // %bb.0: // %entry
789+
; CHECK-GI-NEXT: ldr q0, [x0]
790+
; CHECK-GI-NEXT: mov d1, v0.d[1]
791+
; CHECK-GI-NEXT: fmov x0, d0
792+
; CHECK-GI-NEXT: fmov x2, d0
793+
; CHECK-GI-NEXT: fmov x1, d1
794+
; CHECK-GI-NEXT: fmov x3, d1
795+
; CHECK-GI-NEXT: ret
804796
entry:
805797
%a = load i128, ptr %p
806798
%b = insertelement <2 x i128> poison, i128 %a, i64 0
@@ -836,14 +828,26 @@ entry:
836828
}
837829

838830
define <3 x i128> @loaddup_v3i128(ptr %p) {
839-
; CHECK-LABEL: loaddup_v3i128:
840-
; CHECK: // %bb.0: // %entry
841-
; CHECK-NEXT: ldp x2, x1, [x0]
842-
; CHECK-NEXT: mov x0, x2
843-
; CHECK-NEXT: mov x3, x1
844-
; CHECK-NEXT: mov x4, x2
845-
; CHECK-NEXT: mov x5, x1
846-
; CHECK-NEXT: ret
831+
; CHECK-SD-LABEL: loaddup_v3i128:
832+
; CHECK-SD: // %bb.0: // %entry
833+
; CHECK-SD-NEXT: ldp x2, x1, [x0]
834+
; CHECK-SD-NEXT: mov x0, x2
835+
; CHECK-SD-NEXT: mov x3, x1
836+
; CHECK-SD-NEXT: mov x4, x2
837+
; CHECK-SD-NEXT: mov x5, x1
838+
; CHECK-SD-NEXT: ret
839+
;
840+
; CHECK-GI-LABEL: loaddup_v3i128:
841+
; CHECK-GI: // %bb.0: // %entry
842+
; CHECK-GI-NEXT: ldr q0, [x0]
843+
; CHECK-GI-NEXT: mov d1, v0.d[1]
844+
; CHECK-GI-NEXT: fmov x0, d0
845+
; CHECK-GI-NEXT: fmov x2, d0
846+
; CHECK-GI-NEXT: fmov x4, d0
847+
; CHECK-GI-NEXT: fmov x1, d1
848+
; CHECK-GI-NEXT: fmov x3, d1
849+
; CHECK-GI-NEXT: fmov x5, d1
850+
; CHECK-GI-NEXT: ret
847851
entry:
848852
%a = load i128, ptr %p
849853
%b = insertelement <3 x i128> poison, i128 %a, i64 0
@@ -883,16 +887,30 @@ entry:
883887
}
884888

885889
define <4 x i128> @loaddup_v4i128(ptr %p) {
886-
; CHECK-LABEL: loaddup_v4i128:
887-
; CHECK: // %bb.0: // %entry
888-
; CHECK-NEXT: ldp x2, x1, [x0]
889-
; CHECK-NEXT: mov x0, x2
890-
; CHECK-NEXT: mov x3, x1
891-
; CHECK-NEXT: mov x4, x2
892-
; CHECK-NEXT: mov x5, x1
893-
; CHECK-NEXT: mov x6, x2
894-
; CHECK-NEXT: mov x7, x1
895-
; CHECK-NEXT: ret
890+
; CHECK-SD-LABEL: loaddup_v4i128:
891+
; CHECK-SD: // %bb.0: // %entry
892+
; CHECK-SD-NEXT: ldp x2, x1, [x0]
893+
; CHECK-SD-NEXT: mov x0, x2
894+
; CHECK-SD-NEXT: mov x3, x1
895+
; CHECK-SD-NEXT: mov x4, x2
896+
; CHECK-SD-NEXT: mov x5, x1
897+
; CHECK-SD-NEXT: mov x6, x2
898+
; CHECK-SD-NEXT: mov x7, x1
899+
; CHECK-SD-NEXT: ret
900+
;
901+
; CHECK-GI-LABEL: loaddup_v4i128:
902+
; CHECK-GI: // %bb.0: // %entry
903+
; CHECK-GI-NEXT: ldr q0, [x0]
904+
; CHECK-GI-NEXT: mov d1, v0.d[1]
905+
; CHECK-GI-NEXT: fmov x0, d0
906+
; CHECK-GI-NEXT: fmov x2, d0
907+
; CHECK-GI-NEXT: fmov x4, d0
908+
; CHECK-GI-NEXT: fmov x6, d0
909+
; CHECK-GI-NEXT: fmov x1, d1
910+
; CHECK-GI-NEXT: fmov x3, d1
911+
; CHECK-GI-NEXT: fmov x5, d1
912+
; CHECK-GI-NEXT: fmov x7, d1
913+
; CHECK-GI-NEXT: ret
896914
entry:
897915
%a = load i128, ptr %p
898916
%b = insertelement <4 x i128> poison, i128 %a, i64 0

0 commit comments

Comments
 (0)