Skip to content

Commit 2806fd0

Browse files
committed
[AArch64][GlobalISel] Fix a crash when selecting shufflevectors with an undef mask element.
If a shufflevector's mask vector has an element with "undef" then the generic instruction defining that element register is a G_IMPLICT_DEF instead of G_CONSTANT. This fixes the selector to handle this case, and for now assumes that undef just means zero. In future we'll optimize this case properly. llvm-svn: 358312
1 parent b6e6d3c commit 2806fd0

File tree

2 files changed

+68
-7
lines changed

2 files changed

+68
-7
lines changed

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class AArch64InstructionSelector : public InstructionSelector {
9393
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
9494

9595
void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
96-
SmallVectorImpl<int> &Idxs) const;
96+
SmallVectorImpl<Optional<int>> &Idxs) const;
9797
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
9898
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
9999
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
@@ -2430,7 +2430,7 @@ bool AArch64InstructionSelector::selectConcatVectors(
24302430

24312431
void AArch64InstructionSelector::collectShuffleMaskIndices(
24322432
MachineInstr &I, MachineRegisterInfo &MRI,
2433-
SmallVectorImpl<int> &Idxs) const {
2433+
SmallVectorImpl<Optional<int>> &Idxs) const {
24342434
MachineInstr *MaskDef = MRI.getVRegDef(I.getOperand(3).getReg());
24352435
assert(
24362436
MaskDef->getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
@@ -2444,8 +2444,13 @@ void AArch64InstructionSelector::collectShuffleMaskIndices(
24442444
ScalarDef = MRI.getVRegDef(ScalarDef->getOperand(1).getReg());
24452445
assert(ScalarDef && "Could not find def of copy operand");
24462446
}
2447-
assert(ScalarDef->getOpcode() == TargetOpcode::G_CONSTANT);
2448-
Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2447+
if (ScalarDef->getOpcode() != TargetOpcode::G_CONSTANT) {
2448+
// This be an undef if not a constant.
2449+
assert(ScalarDef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF);
2450+
Idxs.push_back(None);
2451+
} else {
2452+
Idxs.push_back(ScalarDef->getOperand(1).getCImm()->getSExtValue());
2453+
}
24492454
}
24502455
}
24512456

@@ -2692,8 +2697,10 @@ bool AArch64InstructionSelector::selectShuffleVector(
26922697

26932698
// G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
26942699
// operand, it comes in as a normal vector value which we have to analyze to
2695-
// find the mask indices.
2696-
SmallVector<int, 8> Mask;
2700+
// find the mask indices. If the mask element is undef, then
2701+
// collectShuffleMaskIndices() will add a None entry for that index into
2702+
// the list.
2703+
SmallVector<Optional<int>, 8> Mask;
26972704
collectShuffleMaskIndices(I, MRI, Mask);
26982705
assert(!Mask.empty() && "Expected to find mask indices");
26992706

@@ -2708,7 +2715,10 @@ bool AArch64InstructionSelector::selectShuffleVector(
27082715
unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
27092716

27102717
SmallVector<Constant *, 64> CstIdxs;
2711-
for (int Val : Mask) {
2718+
for (auto &MaybeVal : Mask) {
2719+
// For now, any undef indexes we'll just assume to be 0. This should be
2720+
// optimized in future, e.g. to select DUP etc.
2721+
int Val = MaybeVal.hasValue() ? *MaybeVal : 0;
27122722
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
27132723
unsigned Offset = Byte + Val * BytesPerElt;
27142724
CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=aarch64-- -O0 -run-pass=instruction-select -verify-machineinstrs %s -global-isel-abort=1 -o - | FileCheck %s
3+
4+
# This test checks that a shuffle mask with an undef value, instead of a constant,
5+
# doesn't crash. The code generated definitely isn't optimal.
6+
...
7+
---
8+
name: shuffle_undef_mask_elt
9+
alignment: 2
10+
legalized: true
11+
regBankSelected: true
12+
tracksRegLiveness: true
13+
machineFunctionInfo: {}
14+
body: |
15+
bb.1:
16+
liveins: $d0
17+
18+
; CHECK-LABEL: name: shuffle_undef_mask_elt
19+
; CHECK: liveins: $d0
20+
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
21+
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
22+
; CHECK: [[DEF1:%[0-9]+]]:gpr32 = IMPLICIT_DEF
23+
; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
24+
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[DEF]], %subreg.ssub
25+
; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, [[DEF1]]
26+
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub
27+
; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
28+
; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
29+
; CHECK: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF
30+
; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], [[COPY]], %subreg.dsub
31+
; CHECK: [[DEF4:%[0-9]+]]:fpr128 = IMPLICIT_DEF
32+
; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF4]], [[COPY1]], %subreg.dsub
33+
; CHECK: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG1]], 1, [[INSERT_SUBREG2]], 0
34+
; CHECK: [[DEF5:%[0-9]+]]:fpr128 = IMPLICIT_DEF
35+
; CHECK: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF5]], [[LDRDui]], %subreg.dsub
36+
; CHECK: [[TBLv16i8One:%[0-9]+]]:fpr128 = TBLv16i8One [[INSvi64lane]], [[INSERT_SUBREG3]]
37+
; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[TBLv16i8One]].dsub
38+
; CHECK: $d0 = COPY [[COPY2]]
39+
; CHECK: RET_ReallyLR implicit $d0
40+
%0:fpr(<2 x s32>) = COPY $d0
41+
%6:gpr(s32) = G_IMPLICIT_DEF
42+
%7:gpr(s32) = G_IMPLICIT_DEF
43+
%2:fpr(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32)
44+
%4:gpr(s32) = G_CONSTANT i32 1
45+
%5:gpr(s32) = G_IMPLICIT_DEF
46+
%3:fpr(<2 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32)
47+
%1:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, %3(<2 x s32>)
48+
$d0 = COPY %1(<2 x s32>)
49+
RET_ReallyLR implicit $d0
50+
51+
...

0 commit comments

Comments
 (0)