Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ class LegalizationArtifactCombiner {
const LLT DstTy = MRI.getType(DstReg);
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());

if (DstTy.isScalableVector())
return false;
Comment on lines +268 to +269
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can hoist this up to the getType


// Try to fold trunc(g_constant) when the smaller constant type is legal.
auto *SrcMI = MRI.getVRegDef(SrcReg);
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,

LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
if (Query.MMODescrs[MMOIdx].MemoryTy.isScalableVector())
return true;
return !llvm::has_single_bit<uint32_t>(
Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
};
Expand Down
47 changes: 34 additions & 13 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT v2s64 = LLT::fixed_vector(2, 64);
const LLT v2p0 = LLT::fixed_vector(2, p0);

// 128 bit
const LLT nxv16s8 = LLT::scalable_vector(16, s8);
const LLT nxv8s16 = LLT::scalable_vector(8, s16);
const LLT nxv4s32 = LLT::scalable_vector(4, s32);
const LLT nxv2s64 = LLT::scalable_vector(2, s64);

// 64 bit
const LLT nxv4s16 = LLT::scalable_vector(4, s16);
const LLT nxv2s32 = LLT::scalable_vector(2, s32);

// 32 bit
const LLT nxv2s16 = LLT::scalable_vector(2, s16);

std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
v16s8, v8s16, v4s32,
v2s64, v2p0,
Expand Down Expand Up @@ -385,7 +393,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v8s16, p0, s128, 8},
{v2s32, p0, s64, 8},
{v4s32, p0, s128, 8},
{v2s64, p0, s128, 8}})
{v2s64, p0, s128, 8},
// SVE vscale x 64 bit base sizes
{nxv4s16, p0, nxv4s16, 8}})
// These extends are also legal
.legalForTypesWithMemDesc(
{{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
Expand Down Expand Up @@ -442,16 +452,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
{v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
{v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
.legalForTypesWithMemDesc({
// SVE vscale x 128 bit base sizes
// TODO: Add nxv2p0. Consider bitcastIf.
// See #92130
// https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
{nxv16s8, p0, nxv16s8, 8},
{nxv8s16, p0, nxv8s16, 8},
{nxv4s32, p0, nxv4s32, 8},
{nxv2s64, p0, nxv2s64, 8},
})
.legalForTypesWithMemDesc(
{// SVE vscale x 128 bit base sizes
// TODO: Add nxv2p0. Consider bitcastIf.
// See #92130
// https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
{nxv16s8, p0, nxv16s8, 8},
{nxv8s16, p0, nxv8s16, 8},
{nxv4s32, p0, nxv4s32, 8},
{nxv2s64, p0, nxv2s64, 8},
// SVE vscale x 64 bit base sizes
{nxv2s32, p0, nxv2s32, 8},
{nxv4s16, p0, nxv4s16, 8},
// SVE vscale x 32 bit base sizes
{nxv2s16, p0, nxv2s16, 8}})
.clampScalar(0, s8, s64)
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
Expand Down Expand Up @@ -639,17 +653,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

getActionDefinitionsBuilder(G_TRUNC)
.legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
.legalFor(HasSVE, {{nxv4s16, nxv4s32}})
.moreElementsToNextPow2(0)
.clampMaxNumElements(0, s8, 8)
.clampMaxNumElements(0, s16, 4)
.clampMaxNumElements(0, s32, 2)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
[=](const LegalityQuery &Query) {
return Query.Types[0].isFixedVector();
},
0, s8)
.lowerIf([=](const LegalityQuery &Query) {
LLT DstTy = Query.Types[0];
LLT SrcTy = Query.Types[1];
return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
return DstTy.isFixedVector() && SrcTy.getSizeInBits() > 128 &&
DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
})
.clampMinNumElements(0, s8, 8)
Expand Down Expand Up @@ -1317,6 +1334,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
.legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
.legalFor(HasSVE, {{nxv2s16, nxv4s16},
{nxv2s16, nxv8s16},
{nxv4s16, nxv8s16},
{nxv2s32, nxv4s32}})
.widenScalarOrEltToNextPow2(0)
.immIdx(0); // Inform verifier imm idx 0 is handled.

Expand Down
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/AArch64/extract_subvector.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple aarch64 -mattr=+sve | FileCheck %s
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s

define void @extract_nxv2i32_nxv4i32(<vscale x 4 x i32> %arg, ptr %p) {
; CHECK-LABEL: extract_nxv2i32_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%ext = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %arg, i64 0)
store <vscale x 2 x i32> %ext, ptr %p
ret void
}

define void @extract_nxv4i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
; CHECK-LABEL: extract_nxv4i16_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%ext = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
store <vscale x 4 x i16> %ext, ptr %p
ret void
}

define void @extract_nxv2i16_nxv8i16_2(<vscale x 8 x i16> %arg, ptr %p) {
; CHECK-LABEL: extract_nxv2i16_nxv8i16_2:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 2)
store <vscale x 2 x i16> %ext, ptr %p
ret void
}

define void @extract_nxv2i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
; CHECK-LABEL: extract_nxv2i16_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
store <vscale x 2 x i16> %ext, ptr %p
ret void
}

define void @extract_nxv2i16_nxv4i16(ptr %p, ptr %p2) {
; CHECK-LABEL: extract_nxv2i16_nxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
%vector = load <vscale x 4 x i16>, ptr %p
%ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv4i16(<vscale x 4 x i16> %vector, i64 0)
store <vscale x 2 x i16> %ext, ptr %p2
ret void
}
Loading