diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 39c09f4b71d0c..2fc8ef6a52a52 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5424,7 +5424,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( // Further legalization attempts will be needed to do split further. NarrowTy = DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2)); - unsigned NewElts = NarrowTy.getNumElements(); + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; SmallVector SplitSrc1Regs, SplitSrc2Regs; extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI); @@ -5535,7 +5535,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( Ops.clear(); } - MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi}); + MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi}); MI.eraseFromParent(); return Legalized; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 619a041c273cd..f83ad7aa7460e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -104,7 +104,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v2s32, v4s32) .clampMaxNumElements(0, s64, 2) - .clampMaxNumElements(0, p0, 2); + .clampMaxNumElements(0, p0, 2) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0); getActionDefinitionsBuilder(G_PHI) .legalFor({p0, s16, s32, s64}) @@ -1082,6 +1083,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v4s32, v4s32) .clampNumElements(0, v2s64, v2s64) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) { // Bitcast pointers vector to i64. const LLT DstTy = Query.Types[0]; diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll index a2ebdd28b16b8..bfc0ef0826f68 100644 --- a/llvm/test/CodeGen/AArch64/dup.ll +++ b/llvm/test/CodeGen/AArch64/dup.ll @@ -5,24 +5,6 @@ ; CHECK-GI: warning: Instruction selection used fallback path for dup_v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v3i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v4i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v4i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v4i128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v2fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v2fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v2fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v3fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v3fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v3fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for dup_v4fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for duplane0_v4fp128 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for loaddup_v4fp128 define <2 x i8> @dup_v2i8(i8 %a) { ; CHECK-LABEL: dup_v2i8: @@ -795,12 +777,22 @@ entry: } define <2 x i128> @loaddup_v2i128(ptr %p) { -; CHECK-LABEL: loaddup_v2i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp x2, x1, [x0] -; CHECK-NEXT: mov x0, x2 -; CHECK-NEXT: mov x3, x1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loaddup_v2i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ldp x2, x1, [x0] +; CHECK-SD-NEXT: mov x0, x2 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loaddup_v2i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x2, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: fmov x3, d1 +; CHECK-GI-NEXT: ret entry: %a = load i128, ptr %p %b = insertelement <2 x i128> poison, i128 %a, i64 0 @@ -836,14 +828,26 @@ entry: } define <3 x i128> @loaddup_v3i128(ptr %p) { -; CHECK-LABEL: loaddup_v3i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp x2, x1, [x0] -; CHECK-NEXT: mov x0, x2 -; CHECK-NEXT: mov x3, x1 -; CHECK-NEXT: mov x4, x2 -; CHECK-NEXT: mov x5, x1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loaddup_v3i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ldp x2, x1, [x0] +; CHECK-SD-NEXT: mov x0, x2 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x4, x2 +; CHECK-SD-NEXT: mov x5, x1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loaddup_v3i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x2, d0 +; CHECK-GI-NEXT: fmov x4, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: fmov x3, d1 +; CHECK-GI-NEXT: fmov x5, d1 +; CHECK-GI-NEXT: ret entry: %a = load i128, ptr %p %b = insertelement <3 x i128> poison, i128 %a, i64 0 @@ -883,16 +887,30 @@ entry: } define <4 x i128> @loaddup_v4i128(ptr %p) { -; CHECK-LABEL: loaddup_v4i128: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp x2, x1, [x0] -; CHECK-NEXT: mov x0, x2 -; CHECK-NEXT: mov x3, x1 -; CHECK-NEXT: mov x4, x2 -; CHECK-NEXT: mov x5, x1 -; CHECK-NEXT: mov x6, x2 -; CHECK-NEXT: mov x7, x1 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: loaddup_v4i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ldp x2, x1, [x0] +; CHECK-SD-NEXT: mov x0, x2 +; CHECK-SD-NEXT: mov x3, x1 +; CHECK-SD-NEXT: mov x4, x2 +; CHECK-SD-NEXT: mov x5, x1 +; CHECK-SD-NEXT: mov x6, x2 +; CHECK-SD-NEXT: mov x7, x1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: loaddup_v4i128: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: fmov x2, d0 +; CHECK-GI-NEXT: fmov x4, d0 +; CHECK-GI-NEXT: fmov x6, d0 +; CHECK-GI-NEXT: fmov x1, d1 +; CHECK-GI-NEXT: fmov x3, d1 +; CHECK-GI-NEXT: fmov x5, d1 +; CHECK-GI-NEXT: fmov x7, d1 +; CHECK-GI-NEXT: ret entry: %a = load i128, ptr %p %b = insertelement <4 x i128> poison, i128 %a, i64 0