Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5424,7 +5424,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
// Further legalization attempts will be needed to do split further.
NarrowTy =
DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
unsigned NewElts = NarrowTy.getNumElements();
unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;

SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
Expand Down Expand Up @@ -5535,7 +5535,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
Ops.clear();
}

MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
MI.eraseFromParent();
return Legalized;
}
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
.clampMaxNumElements(0, s64, 2)
.clampMaxNumElements(0, p0, 2);
.clampMaxNumElements(0, p0, 2)
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0);

getActionDefinitionsBuilder(G_PHI)
.legalFor({p0, s16, s32, s64})
Expand Down Expand Up @@ -1082,6 +1083,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
// Bitcast pointers vector to i64.
const LLT DstTy = Query.Types[0];
Expand Down
102 changes: 60 additions & 42 deletions llvm/test/CodeGen/AArch64/dup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,6 @@
; CHECK-GI: warning: Instruction selection used fallback path for dup_v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v2i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v3i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v3i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v3i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v4i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v4i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v4i128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v2fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v3fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v3fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v3fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v4fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v4fp128
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v4fp128

define <2 x i8> @dup_v2i8(i8 %a) {
; CHECK-LABEL: dup_v2i8:
Expand Down Expand Up @@ -795,12 +777,22 @@ entry:
}

define <2 x i128> @loaddup_v2i128(ptr %p) {
; CHECK-LABEL: loaddup_v2i128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldp x2, x1, [x0]
; CHECK-NEXT: mov x0, x2
; CHECK-NEXT: mov x3, x1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: loaddup_v2i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ldp x2, x1, [x0]
; CHECK-SD-NEXT: mov x0, x2
; CHECK-SD-NEXT: mov x3, x1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: loaddup_v2i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: fmov x2, d0
; CHECK-GI-NEXT: fmov x1, d1
; CHECK-GI-NEXT: fmov x3, d1
; CHECK-GI-NEXT: ret
entry:
%a = load i128, ptr %p
%b = insertelement <2 x i128> poison, i128 %a, i64 0
Expand Down Expand Up @@ -836,14 +828,26 @@ entry:
}

define <3 x i128> @loaddup_v3i128(ptr %p) {
; CHECK-LABEL: loaddup_v3i128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldp x2, x1, [x0]
; CHECK-NEXT: mov x0, x2
; CHECK-NEXT: mov x3, x1
; CHECK-NEXT: mov x4, x2
; CHECK-NEXT: mov x5, x1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: loaddup_v3i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ldp x2, x1, [x0]
; CHECK-SD-NEXT: mov x0, x2
; CHECK-SD-NEXT: mov x3, x1
; CHECK-SD-NEXT: mov x4, x2
; CHECK-SD-NEXT: mov x5, x1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: loaddup_v3i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: fmov x2, d0
; CHECK-GI-NEXT: fmov x4, d0
; CHECK-GI-NEXT: fmov x1, d1
; CHECK-GI-NEXT: fmov x3, d1
; CHECK-GI-NEXT: fmov x5, d1
; CHECK-GI-NEXT: ret
entry:
%a = load i128, ptr %p
%b = insertelement <3 x i128> poison, i128 %a, i64 0
Expand Down Expand Up @@ -883,16 +887,30 @@ entry:
}

define <4 x i128> @loaddup_v4i128(ptr %p) {
; CHECK-LABEL: loaddup_v4i128:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldp x2, x1, [x0]
; CHECK-NEXT: mov x0, x2
; CHECK-NEXT: mov x3, x1
; CHECK-NEXT: mov x4, x2
; CHECK-NEXT: mov x5, x1
; CHECK-NEXT: mov x6, x2
; CHECK-NEXT: mov x7, x1
; CHECK-NEXT: ret
; CHECK-SD-LABEL: loaddup_v4i128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ldp x2, x1, [x0]
; CHECK-SD-NEXT: mov x0, x2
; CHECK-SD-NEXT: mov x3, x1
; CHECK-SD-NEXT: mov x4, x2
; CHECK-SD-NEXT: mov x5, x1
; CHECK-SD-NEXT: mov x6, x2
; CHECK-SD-NEXT: mov x7, x1
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: loaddup_v4i128:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ldr q0, [x0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: fmov x2, d0
; CHECK-GI-NEXT: fmov x4, d0
; CHECK-GI-NEXT: fmov x6, d0
; CHECK-GI-NEXT: fmov x1, d1
; CHECK-GI-NEXT: fmov x3, d1
; CHECK-GI-NEXT: fmov x5, d1
; CHECK-GI-NEXT: fmov x7, d1
; CHECK-GI-NEXT: ret
entry:
%a = load i128, ptr %p
%b = insertelement <4 x i128> poison, i128 %a, i64 0
Expand Down
Loading