diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 471a7f70dd546..a27b507fd38b5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -265,6 +265,9 @@ class LegalizationArtifactCombiner { Builder.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); const LLT DstTy = MRI.getType(DstReg); + if (DstTy.isScalableVector()) + return false; + Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); // Try to fold trunc(g_constant) when the smaller constant type is legal. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index b7541effafe5c..93e716a22814c 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -196,6 +196,8 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0, LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { + if (Query.MMODescrs[MMOIdx].MemoryTy.isScalableVector()) + return true; return !llvm::has_single_bit( Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes()); }; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index f162d1c2973cb..581d57bb14bfe 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -61,11 +61,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const LLT v2s64 = LLT::fixed_vector(2, 64); const LLT v2p0 = LLT::fixed_vector(2, p0); + // 128 bit const LLT nxv16s8 = LLT::scalable_vector(16, s8); const LLT nxv8s16 = LLT::scalable_vector(8, s16); const LLT nxv4s32 = LLT::scalable_vector(4, s32); const LLT nxv2s64 = LLT::scalable_vector(2, s64); + // 64 bit + const LLT nxv4s16 = LLT::scalable_vector(4, s16); + const LLT nxv2s32 = LLT::scalable_vector(2, s32); + + // 32 bit + const LLT nxv2s16 = LLT::scalable_vector(2, s16); + std::initializer_list PackedVectorAllTypeList = {/* Begin 128bit types */ v16s8, v8s16, v4s32, v2s64, v2p0, @@ -385,7 +393,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {v8s16, p0, s128, 8}, {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, - {v2s64, p0, s128, 8}}) + {v2s64, p0, s128, 8}, + // SVE vscale x 64 bit base sizes + {nxv4s16, p0, nxv4s16, 8}}) // These extends are also legal .legalForTypesWithMemDesc( {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}}) @@ -442,16 +452,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8}, {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8}, {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}}) - .legalForTypesWithMemDesc({ - // SVE vscale x 128 bit base sizes - // TODO: Add nxv2p0. Consider bitcastIf. - // See #92130 - // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461 - {nxv16s8, p0, nxv16s8, 8}, - {nxv8s16, p0, nxv8s16, 8}, - {nxv4s32, p0, nxv4s32, 8}, - {nxv2s64, p0, nxv2s64, 8}, - }) + .legalForTypesWithMemDesc( + {// SVE vscale x 128 bit base sizes + // TODO: Add nxv2p0. Consider bitcastIf. + // See #92130 + // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461 + {nxv16s8, p0, nxv16s8, 8}, + {nxv8s16, p0, nxv8s16, 8}, + {nxv4s32, p0, nxv4s32, 8}, + {nxv2s64, p0, nxv2s64, 8}, + // SVE vscale x 64 bit base sizes + {nxv2s32, p0, nxv2s32, 8}, + {nxv4s16, p0, nxv4s16, 8}, + // SVE vscale x 32 bit base sizes + {nxv2s16, p0, nxv2s16, 8}}) .clampScalar(0, s8, s64) .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].isScalar() && @@ -639,17 +653,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_TRUNC) .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}}) + .legalFor(HasSVE, {{nxv4s16, nxv4s32}}) .moreElementsToNextPow2(0) .clampMaxNumElements(0, s8, 8) .clampMaxNumElements(0, s16, 4) .clampMaxNumElements(0, s32, 2) .minScalarOrEltIf( - [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, + [=](const LegalityQuery &Query) { + return Query.Types[0].isFixedVector(); + }, 0, s8) .lowerIf([=](const LegalityQuery &Query) { LLT DstTy = Query.Types[0]; LLT SrcTy = Query.Types[1]; - return DstTy.isVector() && SrcTy.getSizeInBits() > 128 && + return DstTy.isFixedVector() && SrcTy.getSizeInBits() > 128 && DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits(); }) .clampMinNumElements(0, s8, 8) @@ -1317,6 +1334,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR) .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}}) + .legalFor(HasSVE, {{nxv2s16, nxv4s16}, + {nxv2s16, nxv8s16}, + {nxv4s16, nxv8s16}, + {nxv2s32, nxv4s32}}) .widenScalarOrEltToNextPow2(0) .immIdx(0); // Inform verifier imm idx 0 is handled. diff --git a/llvm/test/CodeGen/AArch64/extract_subvector.ll b/llvm/test/CodeGen/AArch64/extract_subvector.ll new file mode 100644 index 0000000000000..7d35e58923bfb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/extract_subvector.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64 -mattr=+sve | FileCheck %s +; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s + +define void @extract_nxv2i32_nxv4i32( %arg, ptr %p) { +; CHECK-LABEL: extract_nxv2i32_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1w { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %ext = call @llvm.vector.extract.nxv2i32.nxv4i32( %arg, i64 0) + store %ext, ptr %p + ret void +} + +define void @extract_nxv4i16_nxv8i16( %arg, ptr %p) { +; CHECK-LABEL: extract_nxv4i16_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1h { z0.s }, p0, [x0] +; CHECK-NEXT: ret + %ext = call @llvm.vector.extract.nxv4i16.nxv8i16( %arg, i64 0) + store %ext, ptr %p + ret void +} + +define void @extract_nxv2i16_nxv8i16_2( %arg, ptr %p) { +; CHECK-LABEL: extract_nxv2i16_nxv8i16_2: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: st1h { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %ext = call @llvm.vector.extract.nxv2i16.nxv8i16( %arg, i64 2) + store %ext, ptr %p + ret void +} + +define void @extract_nxv2i16_nxv8i16( %arg, ptr %p) { +; CHECK-LABEL: extract_nxv2i16_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: st1h { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %ext = call @llvm.vector.extract.nxv2i16.nxv8i16( %arg, i64 0) + store %ext, ptr %p + ret void +} + +define void @extract_nxv2i16_nxv4i16(ptr %p, ptr %p2) { +; CHECK-LABEL: extract_nxv2i16_nxv4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: st1h { z0.d }, p0, [x1] +; CHECK-NEXT: ret + %vector = load , ptr %p + %ext = call @llvm.vector.extract.nxv2i16.nxv4i16( %vector, i64 0) + store %ext, ptr %p2 + ret void +}