@@ -93,7 +93,7 @@ class AArch64InstructionSelector : public InstructionSelector {
93
93
bool selectUnmergeValues (MachineInstr &I, MachineRegisterInfo &MRI) const ;
94
94
95
95
void collectShuffleMaskIndices (MachineInstr &I, MachineRegisterInfo &MRI,
96
- SmallVectorImpl<int > &Idxs) const ;
96
+ SmallVectorImpl<Optional< int > > &Idxs) const ;
97
97
bool selectShuffleVector (MachineInstr &I, MachineRegisterInfo &MRI) const ;
98
98
bool selectExtractElt (MachineInstr &I, MachineRegisterInfo &MRI) const ;
99
99
bool selectConcatVectors (MachineInstr &I, MachineRegisterInfo &MRI) const ;
@@ -2430,7 +2430,7 @@ bool AArch64InstructionSelector::selectConcatVectors(
2430
2430
2431
2431
void AArch64InstructionSelector::collectShuffleMaskIndices (
2432
2432
MachineInstr &I, MachineRegisterInfo &MRI,
2433
- SmallVectorImpl<int > &Idxs) const {
2433
+ SmallVectorImpl<Optional< int > > &Idxs) const {
2434
2434
MachineInstr *MaskDef = MRI.getVRegDef (I.getOperand (3 ).getReg ());
2435
2435
assert (
2436
2436
MaskDef->getOpcode () == TargetOpcode::G_BUILD_VECTOR &&
@@ -2444,8 +2444,13 @@ void AArch64InstructionSelector::collectShuffleMaskIndices(
2444
2444
ScalarDef = MRI.getVRegDef (ScalarDef->getOperand (1 ).getReg ());
2445
2445
assert (ScalarDef && " Could not find def of copy operand" );
2446
2446
}
2447
- assert (ScalarDef->getOpcode () == TargetOpcode::G_CONSTANT);
2448
- Idxs.push_back (ScalarDef->getOperand (1 ).getCImm ()->getSExtValue ());
2447
+ if (ScalarDef->getOpcode () != TargetOpcode::G_CONSTANT) {
2448
+ // This be an undef if not a constant.
2449
+ assert (ScalarDef->getOpcode () == TargetOpcode::G_IMPLICIT_DEF);
2450
+ Idxs.push_back (None);
2451
+ } else {
2452
+ Idxs.push_back (ScalarDef->getOperand (1 ).getCImm ()->getSExtValue ());
2453
+ }
2449
2454
}
2450
2455
}
2451
2456
@@ -2692,8 +2697,10 @@ bool AArch64InstructionSelector::selectShuffleVector(
2692
2697
2693
2698
// G_SHUFFLE_VECTOR doesn't really have a strictly enforced constant mask
2694
2699
// operand, it comes in as a normal vector value which we have to analyze to
2695
- // find the mask indices.
2696
- SmallVector<int , 8 > Mask;
2700
+ // find the mask indices. If the mask element is undef, then
2701
+ // collectShuffleMaskIndices() will add a None entry for that index into
2702
+ // the list.
2703
+ SmallVector<Optional<int >, 8 > Mask;
2697
2704
collectShuffleMaskIndices (I, MRI, Mask);
2698
2705
assert (!Mask.empty () && " Expected to find mask indices" );
2699
2706
@@ -2708,7 +2715,10 @@ bool AArch64InstructionSelector::selectShuffleVector(
2708
2715
unsigned BytesPerElt = DstTy.getElementType ().getSizeInBits () / 8 ;
2709
2716
2710
2717
SmallVector<Constant *, 64 > CstIdxs;
2711
- for (int Val : Mask) {
2718
+ for (auto &MaybeVal : Mask) {
2719
+ // For now, any undef indexes we'll just assume to be 0. This should be
2720
+ // optimized in future, e.g. to select DUP etc.
2721
+ int Val = MaybeVal.hasValue () ? *MaybeVal : 0 ;
2712
2722
for (unsigned Byte = 0 ; Byte < BytesPerElt; ++Byte) {
2713
2723
unsigned Offset = Byte + Val * BytesPerElt;
2714
2724
CstIdxs.emplace_back (ConstantInt::get (Type::getInt8Ty (Ctx), Offset));
0 commit comments