@@ -719,18 +719,6 @@ static bool selectSupportsSourceMods(const SDNode *N) {
719
719
return N->getValueType (0 ) == MVT::f32 ;
720
720
}
721
721
722
- LLVM_READONLY
723
- static bool buildVectorSupportsSourceMods (const SDNode *N) {
724
- if (N->getValueType (0 ) != MVT::v2f32)
725
- return true ;
726
-
727
- if (N->getOperand (0 )->getOpcode () != ISD::SELECT ||
728
- N->getOperand (1 )->getOpcode () != ISD::SELECT)
729
- return true ;
730
-
731
- return false ;
732
- }
733
-
734
722
// Most FP instructions support source modifiers, but this could be refined
735
723
// slightly.
736
724
LLVM_READONLY
@@ -764,8 +752,6 @@ static bool hasSourceMods(const SDNode *N) {
764
752
return true ;
765
753
}
766
754
}
767
- case ISD::BUILD_VECTOR:
768
- return buildVectorSupportsSourceMods (N);
769
755
case ISD::SELECT:
770
756
return selectSupportsSourceMods (N);
771
757
default :
@@ -4062,6 +4048,59 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
4062
4048
return DAG.getNode (ISD::BITCAST, SL, MVT::i64 , Vec);
4063
4049
}
4064
4050
4051
+ // Part of the shift combines is to optimise for the case where its possible
4052
+ // to reduce e.g shl64 to shl32 if shift range is [63-32]. This
4053
+ // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4054
+ // '&' is then elided by ISel. The vector code for this was being
4055
+ // completely scalarised by the vector legalizer, but when v2i32 is
4056
+ // legal the vector legaliser only partially scalarises the
4057
+ // vector operations and the and is not elided. This function
4058
+ // scalarises the AND for this optimisation case.
4059
+ static SDValue getShiftForReduction (unsigned ShiftOpc, SDValue LHS, SDValue RHS,
4060
+ SelectionDAG &DAG) {
4061
+ assert (
4062
+ (ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL || ShiftOpc == ISD::SHL) &&
4063
+ " Expected shift Opcode." );
4064
+
4065
+ SDLoc SL = SDLoc (RHS);
4066
+ if (RHS->getOpcode () != ISD::EXTRACT_VECTOR_ELT)
4067
+ return SDValue ();
4068
+
4069
+ SDValue VAND = RHS.getOperand (0 );
4070
+ if (VAND->getOpcode () != ISD::AND)
4071
+ return SDValue ();
4072
+
4073
+ ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ));
4074
+ if (!CRRHS)
4075
+ return SDValue ();
4076
+
4077
+ SDValue LHSAND = VAND.getOperand (0 );
4078
+ SDValue RHSAND = VAND.getOperand (1 );
4079
+ if (RHSAND->getOpcode () != ISD::BUILD_VECTOR)
4080
+ return SDValue ();
4081
+
4082
+ ConstantSDNode *CANDL = dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4083
+ ConstantSDNode *CANDR = dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4084
+ if (!CANDL || !CANDR || RHSAND->getConstantOperandVal (0 ) != 0x1f ||
4085
+ RHSAND->getConstantOperandVal (1 ) != 0x1f )
4086
+ return SDValue ();
4087
+ // Get the non-const AND operands and produce scalar AND
4088
+ const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4089
+ const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4090
+ SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, Zero);
4091
+ SDValue Hi = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4092
+ SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4093
+ SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4094
+ SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4095
+ SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4096
+ uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4097
+ if (AndIndex == 0 || AndIndex == 1 )
4098
+ return DAG.getNode (ShiftOpc, SL, MVT::i32 , Trunc,
4099
+ AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags ());
4100
+
4101
+ return SDValue ();
4102
+ }
4103
+
4065
4104
SDValue AMDGPUTargetLowering::performShlCombine (SDNode *N,
4066
4105
DAGCombinerInfo &DCI) const {
4067
4106
EVT VT = N->getValueType (0 );
@@ -4071,49 +4110,8 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4071
4110
SDLoc SL (N);
4072
4111
SelectionDAG &DAG = DCI.DAG ;
4073
4112
4074
- if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4075
- SDValue VAND = RHS.getOperand (0 );
4076
- if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
4077
- uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4078
- if (VAND->getOpcode () == ISD::AND && CRRHS) {
4079
- SDValue LHSAND = VAND.getOperand (0 );
4080
- SDValue RHSAND = VAND.getOperand (1 );
4081
- if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4082
- // Part of shlcombine is to optimise for the case where its possible
4083
- // to reduce shl64 to shl32 if shift range is [63-32]. This
4084
- // transforms: DST = shl i64 X, Y to [0, shl i32 X, (Y & 31) ]. The
4085
- // '&' is then elided by ISel. The vector code for this was being
4086
- // completely scalarised by the vector legalizer, but now v2i32 is
4087
- // made legal the vector legaliser only partially scalarises the
4088
- // vector operations and the and was not elided. This check enables us
4089
- // to locate and scalarise the v2i32 and and re-enable ISel to elide
4090
- // the and instruction.
4091
- ConstantSDNode *CANDL =
4092
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4093
- ConstantSDNode *CANDR =
4094
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4095
- if (CANDL && CANDR && RHSAND->getConstantOperandVal (0 ) == 0x1f &&
4096
- RHSAND->getConstantOperandVal (1 ) == 0x1f ) {
4097
- // Get the non-const AND operands and produce scalar AND
4098
- const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4099
- const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4100
- SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 ,
4101
- LHSAND, Zero);
4102
- SDValue Hi =
4103
- DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4104
- SDValue LoAnd =
4105
- DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, RHSAND->getOperand (0 ));
4106
- SDValue HiAnd =
4107
- DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, RHSAND->getOperand (0 ));
4108
- SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4109
- if (AndIndex == 0 || AndIndex == 1 )
4110
- return DAG.getNode (ISD::SHL, SL, MVT::i32 , Trunc,
4111
- AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4112
- }
4113
- }
4114
- }
4115
- }
4116
- }
4113
+ if (SDValue SS = getShiftForReduction (ISD::SHL, LHS, RHS, DAG))
4114
+ return SS;
4117
4115
4118
4116
unsigned RHSVal;
4119
4117
if (CRHS) {
@@ -4215,6 +4213,9 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4215
4213
SelectionDAG &DAG = DCI.DAG ;
4216
4214
SDLoc SL (N);
4217
4215
4216
+ if (SDValue SS = getShiftForReduction (ISD::SRA, LHS, RHS, DAG))
4217
+ return SS;
4218
+
4218
4219
if (VT.getScalarType () != MVT::i64 )
4219
4220
return SDValue ();
4220
4221
@@ -4245,12 +4246,12 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4245
4246
(ElementType.getSizeInBits () - 1 )) {
4246
4247
ShiftAmt = ShiftFullAmt;
4247
4248
} else {
4248
- SDValue truncShiftAmt = DAG.getNode (ISD::TRUNCATE, SL, TargetType, RHS);
4249
+ SDValue TruncShiftAmt = DAG.getNode (ISD::TRUNCATE, SL, TargetType, RHS);
4249
4250
const SDValue ShiftMask =
4250
4251
DAG.getConstant (TargetScalarType.getSizeInBits () - 1 , SL, TargetType);
4251
4252
// This AND instruction will clamp out of bounds shift values.
4252
4253
// It will also be removed during later instruction selection.
4253
- ShiftAmt = DAG.getNode (ISD::AND, SL, TargetType, truncShiftAmt , ShiftMask);
4254
+ ShiftAmt = DAG.getNode (ISD::AND, SL, TargetType, TruncShiftAmt , ShiftMask);
4254
4255
}
4255
4256
4256
4257
EVT ConcatType;
@@ -4317,48 +4318,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
4317
4318
SDLoc SL (N);
4318
4319
unsigned RHSVal;
4319
4320
4320
- if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4321
- SDValue VAND = RHS.getOperand (0 );
4322
- if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
4323
- uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4324
- if (VAND->getOpcode () == ISD::AND && CRRHS) {
4325
- SDValue LHSAND = VAND.getOperand (0 );
4326
- SDValue RHSAND = VAND.getOperand (1 );
4327
- if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4328
- // Part of srlcombine is to optimise for the case where its possible
4329
- // to reduce shl64 to shl32 if shift range is [63-32]. This
4330
- // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4331
- // '&' is then elided by ISel. The vector code for this was being
4332
- // completely scalarised by the vector legalizer, but now v2i32 is
4333
- // made legal the vector legaliser only partially scalarises the
4334
- // vector operations and the and was not elided. This check enables us
4335
- // to locate and scalarise the v2i32 and and re-enable ISel to elide
4336
- // the and instruction.
4337
- ConstantSDNode *CANDL =
4338
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4339
- ConstantSDNode *CANDR =
4340
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4341
- if (CANDL && CANDR && RHSAND->getConstantOperandVal (0 ) == 0x1f &&
4342
- RHSAND->getConstantOperandVal (1 ) == 0x1f ) {
4343
- // Get the non-const AND operands and produce scalar AND
4344
- const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4345
- const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4346
- SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 ,
4347
- LHSAND, Zero);
4348
- SDValue Hi =
4349
- DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4350
- SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4351
- SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4352
- SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4353
- SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4354
- if (AndIndex == 0 || AndIndex == 1 )
4355
- return DAG.getNode (ISD::SRL, SL, MVT::i32 , Trunc,
4356
- AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4357
- }
4358
- }
4359
- }
4360
- }
4361
- }
4321
+ if (SDValue SS = getShiftForReduction (ISD::SRL, LHS, RHS, DAG))
4322
+ return SS;
4362
4323
4363
4324
if (CRHS) {
4364
4325
RHSVal = CRHS->getZExtValue ();
@@ -4873,8 +4834,8 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
4873
4834
if (!AMDGPUTargetLowering::allUsesHaveSourceMods (N.getNode ()))
4874
4835
return SDValue ();
4875
4836
4876
- return distributeOpThroughSelect (DCI, LHS.getOpcode (), SDLoc (N), Cond, LHS,
4877
- RHS);
4837
+ return distributeOpThroughSelect (DCI, LHS.getOpcode (),
4838
+ SDLoc (N), Cond, LHS, RHS);
4878
4839
}
4879
4840
4880
4841
bool Inv = false ;
0 commit comments