Skip to content

Commit 8b77d3c

Browse files
author
Thorsten Schütt
committed
[GlobalISel][AArch64] Legalize G_EXTRACT_SUBVECTOR for SVE
We use stores because return is not support for smaller granuels. {nxv2s16, nxv4s16} fails with: LLVM ERROR: cannot select: %0:zpr(<vscale x 4 x s16>) = G_TRUNC %2:fpr(<vscale x 4 x s32>) (in function: extract_nxv2i16_nxv4i16_1)
1 parent af6ebb7 commit 8b77d3c

File tree

4 files changed

+90
-12
lines changed

4 files changed

+90
-12
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,9 @@ class LegalizationArtifactCombiner {
267267
const LLT DstTy = MRI.getType(DstReg);
268268
Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
269269

270+
if (DstTy.isScalableVector())
271+
return false;
272+
270273
// Try to fold trunc(g_constant) when the smaller constant type is legal.
271274
auto *SrcMI = MRI.getVRegDef(SrcReg);
272275
if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {

llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
196196

197197
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
198198
return [=](const LegalityQuery &Query) {
199+
if (Query.MMODescrs[MMOIdx].MemoryTy.isScalableVector())
200+
return true;
199201
return !llvm::has_single_bit<uint32_t>(
200202
Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
201203
};

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
6161
const LLT v2s64 = LLT::fixed_vector(2, 64);
6262
const LLT v2p0 = LLT::fixed_vector(2, p0);
6363

64+
// 128 bit
6465
const LLT nxv16s8 = LLT::scalable_vector(16, s8);
6566
const LLT nxv8s16 = LLT::scalable_vector(8, s16);
6667
const LLT nxv4s32 = LLT::scalable_vector(4, s32);
6768
const LLT nxv2s64 = LLT::scalable_vector(2, s64);
6869

70+
// 64 bit
71+
const LLT nxv4s16 = LLT::scalable_vector(4, s16);
72+
const LLT nxv2s32 = LLT::scalable_vector(2, s32);
73+
74+
// 32 bit
75+
const LLT nxv2s16 = LLT::scalable_vector(2, s16);
76+
6977
std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
7078
v16s8, v8s16, v4s32,
7179
v2s64, v2p0,
@@ -442,16 +450,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
442450
{p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
443451
{v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
444452
{v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
445-
.legalForTypesWithMemDesc({
446-
// SVE vscale x 128 bit base sizes
447-
// TODO: Add nxv2p0. Consider bitcastIf.
448-
// See #92130
449-
// https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
450-
{nxv16s8, p0, nxv16s8, 8},
451-
{nxv8s16, p0, nxv8s16, 8},
452-
{nxv4s32, p0, nxv4s32, 8},
453-
{nxv2s64, p0, nxv2s64, 8},
454-
})
453+
.legalForTypesWithMemDesc(
454+
{// SVE vscale x 128 bit base sizes
455+
// TODO: Add nxv2p0. Consider bitcastIf.
456+
// See #92130
457+
// https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
458+
{nxv16s8, p0, nxv16s8, 8},
459+
{nxv8s16, p0, nxv8s16, 8},
460+
{nxv4s32, p0, nxv4s32, 8},
461+
{nxv2s64, p0, nxv2s64, 8},
462+
// SVE vscale x 64 bit base sizes
463+
{nxv2s32, p0, nxv2s32, 8},
464+
{nxv4s16, p0, nxv4s16, 8},
465+
// SVE vscale x 32 bit base sizes
466+
{nxv2s16, p0, nxv2s16, 8}})
455467
.clampScalar(0, s8, s64)
456468
.lowerIf([=](const LegalityQuery &Query) {
457469
return Query.Types[0].isScalar() &&
@@ -639,17 +651,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
639651

640652
getActionDefinitionsBuilder(G_TRUNC)
641653
.legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
654+
.legalFor(HasSVE, {{nxv4s16, nxv4s32}})
642655
.moreElementsToNextPow2(0)
643656
.clampMaxNumElements(0, s8, 8)
644657
.clampMaxNumElements(0, s16, 4)
645658
.clampMaxNumElements(0, s32, 2)
646659
.minScalarOrEltIf(
647-
[=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
660+
[=](const LegalityQuery &Query) {
661+
return Query.Types[0].isFixedVector();
662+
},
648663
0, s8)
649664
.lowerIf([=](const LegalityQuery &Query) {
650665
LLT DstTy = Query.Types[0];
651666
LLT SrcTy = Query.Types[1];
652-
return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
667+
return DstTy.isFixedVector() && SrcTy.getSizeInBits() > 128 &&
653668
DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
654669
})
655670
.clampMinNumElements(0, s8, 8)
@@ -1315,8 +1330,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
13151330

13161331
getActionDefinitionsBuilder({G_SCMP, G_UCMP}).lower();
13171332

1333+
// FIXME: {nxv2s16, nxv4s16}
13181334
getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
13191335
.legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
1336+
.legalFor(HasSVE,
1337+
{{nxv2s16, nxv8s16}, {nxv4s16, nxv8s16}, {nxv2s32, nxv4s32}})
13201338
.widenScalarOrEltToNextPow2(0)
13211339
.immIdx(0); // Inform verifier imm idx 0 is handled.
13221340

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple aarch64 -mattr=+sve | FileCheck %s
3+
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
4+
5+
;; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -aarch64-enable-gisel-sve=1 -stop-after=irtranslator < %s | FileCheck %s
6+
7+
define void @extract_nxv2i32_nxv4i32_1(<vscale x 4 x i32> %arg, ptr %p) {
8+
; CHECK-LABEL: extract_nxv2i32_nxv4i32_1:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: uunpklo z0.d, z0.s
11+
; CHECK-NEXT: ptrue p0.d
12+
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
13+
; CHECK-NEXT: ret
14+
%ext = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %arg, i64 0)
15+
store <vscale x 2 x i32> %ext, ptr %p
16+
ret void
17+
}
18+
19+
define void @extract_nxv4i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
20+
; CHECK-LABEL: extract_nxv4i16_nxv8i16_1:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: uunpklo z0.s, z0.h
23+
; CHECK-NEXT: ptrue p0.s
24+
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
25+
; CHECK-NEXT: ret
26+
%ext = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
27+
store <vscale x 4 x i16> %ext, ptr %p
28+
ret void
29+
}
30+
31+
define void @extract_nxv2i16_nxv8i16_1(<vscale x 8 x i16> %arg, ptr %p) {
32+
; CHECK-LABEL: extract_nxv2i16_nxv8i16_1:
33+
; CHECK: // %bb.0:
34+
; CHECK-NEXT: uunpklo z0.s, z0.h
35+
; CHECK-NEXT: ptrue p0.d
36+
; CHECK-NEXT: uunpkhi z0.d, z0.s
37+
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
38+
; CHECK-NEXT: ret
39+
%ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 2)
40+
store <vscale x 2 x i16> %ext, ptr %p
41+
ret void
42+
}
43+
44+
define void @extract_nxv2i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
45+
; CHECK-LABEL: extract_nxv2i16_nxv8i16:
46+
; CHECK: // %bb.0:
47+
; CHECK-NEXT: uunpklo z0.s, z0.h
48+
; CHECK-NEXT: ptrue p0.d
49+
; CHECK-NEXT: uunpklo z0.d, z0.s
50+
; CHECK-NEXT: st1h { z0.d }, p0, [x0]
51+
; CHECK-NEXT: ret
52+
%ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
53+
store <vscale x 2 x i16> %ext, ptr %p
54+
ret void
55+
}

0 commit comments

Comments
 (0)