Skip to content

Commit a73e0c0

Browse files
committed
[AArch64][GlobalISel] Legalize ptr shuffle vector to s64
This converts all ptr element shuffle vectors to s64, so that the existing vector legalization handling can lower them as needed. I'm not sure if bitcast/inttoptr/ptrtoint is intended to be necessary for vectors of pointers, but it uses buildCast for the casts, which now generates a ptrtoint/inttoptr.
1 parent 174899f commit a73e0c0

File tree

11 files changed

+138
-53
lines changed

11 files changed

+138
-53
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ class LegalizerHelper {
378378
LLT CastTy);
379379
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
380380
LLT CastTy);
381+
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
382+
LLT CastTy);
381383
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
382384
LLT CastTy);
383385
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,9 @@ LegalityPredicate isPointer(unsigned TypeIdx);
292292
/// True iff the specified type index is a pointer with the specified address
293293
/// space.
294294
LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace);
295+
/// True iff the specified type index is a vector of pointers (with any address
296+
/// space).
297+
LegalityPredicate isPointerVector(unsigned TypeIdx);
295298

296299
/// True if the type index is a vector with element type \p EltTy
297300
LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT EltTy);

llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
101101
};
102102
}
103103

104+
LegalityPredicate LegalityPredicates::isPointerVector(unsigned TypeIdx) {
105+
return [=](const LegalityQuery &Query) {
106+
return Query.Types[TypeIdx].isPointerVector();
107+
};
108+
}
109+
104110
LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx,
105111
LLT EltTy) {
106112
return [=](const LegalityQuery &Query) {

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3697,6 +3697,41 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
36973697
return Legalized;
36983698
}
36993699

3700+
// This bitcasts a shuffle vector to a different type currently of the same
3701+
// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3702+
// will be used instead.
3703+
//
3704+
// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3705+
// ===>
3706+
// <4 x s64> = G_PTRTOINT <4 x p0>
3707+
// <4 x s64> = G_PTRTOINT <4 x p0>
3708+
// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3709+
// <16 x p0> = G_INTTOPTR <16 x s64>
3710+
LegalizerHelper::LegalizeResult
3711+
LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
3712+
LLT CastTy) {
3713+
auto ShuffleMI = cast<GShuffleVector>(&MI);
3714+
LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3715+
LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3716+
3717+
// We currently only handle vectors of the same size.
3718+
if (TypeIdx != 0 ||
3719+
CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3720+
CastTy.getElementCount() != DstTy.getElementCount())
3721+
return UnableToLegalize;
3722+
3723+
LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3724+
3725+
auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3726+
auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3727+
auto Shuf =
3728+
MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3729+
MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3730+
3731+
MI.eraseFromParent();
3732+
return Legalized;
3733+
}
3734+
37003735
/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
37013736
///
37023737
/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
@@ -4133,6 +4168,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
41334168
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
41344169
case TargetOpcode::G_CONCAT_VECTORS:
41354170
return bitcastConcatVector(MI, TypeIdx, CastTy);
4171+
case TargetOpcode::G_SHUFFLE_VECTOR:
4172+
return bitcastShuffleVector(MI, TypeIdx, CastTy);
41364173
case TargetOpcode::G_EXTRACT_SUBVECTOR:
41374174
return bitcastExtractSubvector(MI, TypeIdx, CastTy);
41384175
case TargetOpcode::G_INSERT_SUBVECTOR:

llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,12 +600,13 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
600600
return buildCopy(Dst, Src);
601601

602602
unsigned Opcode;
603-
if (SrcTy.isPointer() && DstTy.isScalar())
603+
if (SrcTy.isPointerOrPointerVector())
604604
Opcode = TargetOpcode::G_PTRTOINT;
605-
else if (DstTy.isPointer() && SrcTy.isScalar())
605+
else if (DstTy.isPointerOrPointerVector())
606606
Opcode = TargetOpcode::G_INTTOPTR;
607607
else {
608-
assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
608+
assert(!SrcTy.isPointerOrPointerVector() &&
609+
!DstTy.isPointerOrPointerVector() && "no G_ADDRCAST yet");
609610
Opcode = TargetOpcode::G_BITCAST;
610611
}
611612

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -840,13 +840,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
840840
getActionDefinitionsBuilder(G_PTRTOINT)
841841
.legalFor({{s64, p0}, {v2s64, v2p0}})
842842
.widenScalarToNextPow2(0, 64)
843-
.clampScalar(0, s64, s64);
843+
.clampScalar(0, s64, s64)
844+
.clampMaxNumElements(0, s64, 2);
844845

845846
getActionDefinitionsBuilder(G_INTTOPTR)
846847
.unsupportedIf([&](const LegalityQuery &Query) {
847848
return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
848849
})
849-
.legalFor({{p0, s64}, {v2p0, v2s64}});
850+
.legalFor({{p0, s64}, {v2p0, v2s64}})
851+
.clampMaxNumElements(1, s64, 2);
850852

851853
// Casts for 32 and 64-bit width type are just copies.
852854
// Same for 128-bit width type, except they are on the FPR bank.
@@ -1053,7 +1055,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10531055
if (DstTy != SrcTy)
10541056
return false;
10551057
return llvm::is_contained(
1056-
{v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
1058+
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
10571059
})
10581060
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
10591061
// just want those lowered into G_BUILD_VECTOR
@@ -1079,7 +1081,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10791081
.clampNumElements(0, v8s8, v16s8)
10801082
.clampNumElements(0, v4s16, v8s16)
10811083
.clampNumElements(0, v4s32, v4s32)
1082-
.clampNumElements(0, v2s64, v2s64);
1084+
.clampNumElements(0, v2s64, v2s64)
1085+
.bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
1086+
// Bitcast pointers vector to i64.
1087+
const LLT DstTy = Query.Types[0];
1088+
return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1089+
});
10831090

10841091
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
10851092
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,11 @@ body: |
5959
; CHECK-NEXT: {{ $}}
6060
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
6161
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1
62-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0)
63-
; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>)
62+
; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY]](<2 x p0>)
63+
; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>)
64+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[PTRTOINT]](<2 x s64>), [[PTRTOINT1]], shufflemask(0, 0)
65+
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[SHUF]](<2 x s64>)
66+
; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>)
6467
; CHECK-NEXT: RET_ReallyLR implicit $q0
6568
%0:_(<2 x p0>) = COPY $q0
6669
%1:_(<2 x p0>) = COPY $q1

llvm/test/CodeGen/AArch64/arm64-ext.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for test_v2p0
3+
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
64

75
define <8 x i8> @test_vextd(<8 x i8> %tmp1, <8 x i8> %tmp2) {
86
; CHECK-LABEL: test_vextd:

llvm/test/CodeGen/AArch64/neon-perm.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for test_vuzp1q_p0
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vuzp2q_p0
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip1q_p0
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip2q_p0
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn1q_p0
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn2q_p0
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
114

125
%struct.int8x8x2_t = type { [2 x <8 x i8>] }
136
%struct.int16x4x2_t = type { [2 x <4 x i16>] }

llvm/test/CodeGen/AArch64/neon-vector-splat.ll

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for shuffle8
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
64

75
define <2 x i32> @shuffle(ptr %P) {
86
; CHECK-SD-LABEL: shuffle:
@@ -116,10 +114,16 @@ define <2 x i64> @shuffle7(ptr %P) {
116114
}
117115

118116
define <2 x ptr> @shuffle8(ptr %P) {
119-
; CHECK-LABEL: shuffle8:
120-
; CHECK: // %bb.0:
121-
; CHECK-NEXT: ld1r { v0.2d }, [x0]
122-
; CHECK-NEXT: ret
117+
; CHECK-SD-LABEL: shuffle8:
118+
; CHECK-SD: // %bb.0:
119+
; CHECK-SD-NEXT: ld1r { v0.2d }, [x0]
120+
; CHECK-SD-NEXT: ret
121+
;
122+
; CHECK-GI-LABEL: shuffle8:
123+
; CHECK-GI: // %bb.0:
124+
; CHECK-GI-NEXT: ldr q0, [x0]
125+
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
126+
; CHECK-GI-NEXT: ret
123127
%lv2ptr = load <2 x ptr>, ptr %P
124128
%sv2ptr = shufflevector <2 x ptr> %lv2ptr, <2 x ptr> undef, <2 x i32> zeroinitializer
125129
ret <2 x ptr> %sv2ptr

0 commit comments

Comments
 (0)