Skip to content

Commit d3ce069

Browse files
authored
[AArch64][GlobalISel] Legalize ptr shuffle vector to s64 (#116013)
This converts all ptr element shuffle vectors to s64, so that the existing vector legalization handling can lower them as needed. This prevents a lot of fallbacks that currently try to generate things like `<2 x ptr> G_EXT`. I'm not sure if bitcast/inttoptr/ptrtoint is intended to be necessary for vectors of pointers, but it uses buildCast for the casts, which now generates a ptrtoint/inttoptr.
1 parent b0e7383 commit d3ce069

File tree

11 files changed

+138
-53
lines changed

11 files changed

+138
-53
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ class LegalizerHelper {
378378
LLT CastTy);
379379
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
380380
LLT CastTy);
381+
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
382+
LLT CastTy);
381383
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
382384
LLT CastTy);
383385
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,9 @@ LegalityPredicate isPointer(unsigned TypeIdx);
292292
/// True iff the specified type index is a pointer with the specified address
293293
/// space.
294294
LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace);
295+
/// True iff the specified type index is a vector of pointers (with any address
296+
/// space).
297+
LegalityPredicate isPointerVector(unsigned TypeIdx);
295298

296299
/// True if the type index is a vector with element type \p EltTy
297300
LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT EltTy);

llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
101101
};
102102
}
103103

104+
LegalityPredicate LegalityPredicates::isPointerVector(unsigned TypeIdx) {
105+
return [=](const LegalityQuery &Query) {
106+
return Query.Types[TypeIdx].isPointerVector();
107+
};
108+
}
109+
104110
LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx,
105111
LLT EltTy) {
106112
return [=](const LegalityQuery &Query) {

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3697,6 +3697,41 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
36973697
return Legalized;
36983698
}
36993699

3700+
// This bitcasts a shuffle vector to a different type currently of the same
3701+
// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3702+
// will be used instead.
3703+
//
3704+
// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3705+
// ===>
3706+
// <4 x s64> = G_PTRTOINT <4 x p0>
3707+
// <4 x s64> = G_PTRTOINT <4 x p0>
3708+
// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3709+
// <16 x p0> = G_INTTOPTR <16 x s64>
3710+
LegalizerHelper::LegalizeResult
3711+
LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
3712+
LLT CastTy) {
3713+
auto ShuffleMI = cast<GShuffleVector>(&MI);
3714+
LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3715+
LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3716+
3717+
// We currently only handle vectors of the same size.
3718+
if (TypeIdx != 0 ||
3719+
CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3720+
CastTy.getElementCount() != DstTy.getElementCount())
3721+
return UnableToLegalize;
3722+
3723+
LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3724+
3725+
auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3726+
auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3727+
auto Shuf =
3728+
MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3729+
MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3730+
3731+
MI.eraseFromParent();
3732+
return Legalized;
3733+
}
3734+
37003735
/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
37013736
///
37023737
/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
@@ -4133,6 +4168,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
41334168
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
41344169
case TargetOpcode::G_CONCAT_VECTORS:
41354170
return bitcastConcatVector(MI, TypeIdx, CastTy);
4171+
case TargetOpcode::G_SHUFFLE_VECTOR:
4172+
return bitcastShuffleVector(MI, TypeIdx, CastTy);
41364173
case TargetOpcode::G_EXTRACT_SUBVECTOR:
41374174
return bitcastExtractSubvector(MI, TypeIdx, CastTy);
41384175
case TargetOpcode::G_INSERT_SUBVECTOR:

llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,12 +600,13 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
600600
return buildCopy(Dst, Src);
601601

602602
unsigned Opcode;
603-
if (SrcTy.isPointer() && DstTy.isScalar())
603+
if (SrcTy.isPointerOrPointerVector())
604604
Opcode = TargetOpcode::G_PTRTOINT;
605-
else if (DstTy.isPointer() && SrcTy.isScalar())
605+
else if (DstTy.isPointerOrPointerVector())
606606
Opcode = TargetOpcode::G_INTTOPTR;
607607
else {
608-
assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
608+
assert(!SrcTy.isPointerOrPointerVector() &&
609+
!DstTy.isPointerOrPointerVector() && "no G_ADDRCAST yet");
609610
Opcode = TargetOpcode::G_BITCAST;
610611
}
611612

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -840,13 +840,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
840840
getActionDefinitionsBuilder(G_PTRTOINT)
841841
.legalFor({{s64, p0}, {v2s64, v2p0}})
842842
.widenScalarToNextPow2(0, 64)
843-
.clampScalar(0, s64, s64);
843+
.clampScalar(0, s64, s64)
844+
.clampMaxNumElements(0, s64, 2);
844845

845846
getActionDefinitionsBuilder(G_INTTOPTR)
846847
.unsupportedIf([&](const LegalityQuery &Query) {
847848
return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
848849
})
849-
.legalFor({{p0, s64}, {v2p0, v2s64}});
850+
.legalFor({{p0, s64}, {v2p0, v2s64}})
851+
.clampMaxNumElements(1, s64, 2);
850852

851853
// Casts for 32 and 64-bit width type are just copies.
852854
// Same for 128-bit width type, except they are on the FPR bank.
@@ -1053,7 +1055,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10531055
if (DstTy != SrcTy)
10541056
return false;
10551057
return llvm::is_contained(
1056-
{v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
1058+
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
10571059
})
10581060
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
10591061
// just want those lowered into G_BUILD_VECTOR
@@ -1079,7 +1081,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10791081
.clampNumElements(0, v8s8, v16s8)
10801082
.clampNumElements(0, v4s16, v8s16)
10811083
.clampNumElements(0, v4s32, v4s32)
1082-
.clampNumElements(0, v2s64, v2s64);
1084+
.clampNumElements(0, v2s64, v2s64)
1085+
.bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1086+
// Bitcast pointers vector to i64.
1087+
const LLT DstTy = Query.Types[0];
1088+
return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1089+
});
10831090

10841091
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
10851092
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,11 @@ body: |
5959
; CHECK-NEXT: {{ $}}
6060
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
6161
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1
62-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0)
63-
; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>)
62+
; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY]](<2 x p0>)
63+
; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>)
64+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[PTRTOINT]](<2 x s64>), [[PTRTOINT1]], shufflemask(0, 0)
65+
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[SHUF]](<2 x s64>)
66+
; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>)
6467
; CHECK-NEXT: RET_ReallyLR implicit $q0
6568
%0:_(<2 x p0>) = COPY $q0
6669
%1:_(<2 x p0>) = COPY $q1

llvm/test/CodeGen/AArch64/arm64-ext.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for test_v2p0
3+
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
64

75
define <8 x i8> @test_vextd(<8 x i8> %tmp1, <8 x i8> %tmp2) {
86
; CHECK-LABEL: test_vextd:

llvm/test/CodeGen/AArch64/neon-perm.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for test_vuzp1q_p0
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vuzp2q_p0
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip1q_p0
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip2q_p0
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn1q_p0
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn2q_p0
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
114

125
%struct.int8x8x2_t = type { [2 x <8 x i8>] }
136
%struct.int16x4x2_t = type { [2 x <4 x i16>] }

llvm/test/CodeGen/AArch64/neon-vector-splat.ll

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for shuffle8
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
64

75
define <2 x i32> @shuffle(ptr %P) {
86
; CHECK-SD-LABEL: shuffle:
@@ -116,10 +114,16 @@ define <2 x i64> @shuffle7(ptr %P) {
116114
}
117115

118116
define <2 x ptr> @shuffle8(ptr %P) {
119-
; CHECK-LABEL: shuffle8:
120-
; CHECK: // %bb.0:
121-
; CHECK-NEXT: ld1r { v0.2d }, [x0]
122-
; CHECK-NEXT: ret
117+
; CHECK-SD-LABEL: shuffle8:
118+
; CHECK-SD: // %bb.0:
119+
; CHECK-SD-NEXT: ld1r { v0.2d }, [x0]
120+
; CHECK-SD-NEXT: ret
121+
;
122+
; CHECK-GI-LABEL: shuffle8:
123+
; CHECK-GI: // %bb.0:
124+
; CHECK-GI-NEXT: ldr q0, [x0]
125+
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
126+
; CHECK-GI-NEXT: ret
123127
%lv2ptr = load <2 x ptr>, ptr %P
124128
%sv2ptr = shufflevector <2 x ptr> %lv2ptr, <2 x ptr> undef, <2 x i32> zeroinitializer
125129
ret <2 x ptr> %sv2ptr

0 commit comments

Comments
 (0)