@@ -4222,6 +4222,49 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4222
4222
SelectionDAG &DAG = DCI.DAG ;
4223
4223
SDLoc SL (N);
4224
4224
4225
+ if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4226
+ SDValue VAND = RHS.getOperand (0 );
4227
+ if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
4228
+ uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4229
+ if (VAND->getOpcode () == ISD::AND && CRRHS) {
4230
+ SDValue LHSAND = VAND.getOperand (0 );
4231
+ SDValue RHSAND = VAND.getOperand (1 );
4232
+ if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4233
+ // Part of sracombine is to optimise for the case where its possible
4234
+ // to reduce shl64 to shl32 if shift range is [63-32]. This
4235
+ // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4236
+ // '&' is then elided by ISel. The vector code for this was being
4237
+ // completely scalarised by the vector legalizer, but now v2i32 is
4238
+ // made legal the vector legaliser only partially scalarises the
4239
+ // vector operations and the and was not elided. This check enables us
4240
+ // to locate and scalarise the v2i32 and and re-enable ISel to elide
4241
+ // the and instruction.
4242
+ ConstantSDNode *CANDL =
4243
+ dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4244
+ ConstantSDNode *CANDR =
4245
+ dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4246
+ if (CANDL && CANDR && RHSAND->getConstantOperandVal (0 ) == 0x1f &&
4247
+ RHSAND->getConstantOperandVal (1 ) == 0x1f ) {
4248
+ // Get the non-const AND operands and produce scalar AND
4249
+ const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4250
+ const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4251
+ SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 ,
4252
+ LHSAND, Zero);
4253
+ SDValue Hi =
4254
+ DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4255
+ SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4256
+ SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4257
+ SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4258
+ SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4259
+ if (AndIndex == 0 || AndIndex == 1 )
4260
+ return DAG.getNode (ISD::SRA, SL, MVT::i32 , Trunc,
4261
+ AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4262
+ }
4263
+ }
4264
+ }
4265
+ }
4266
+ }
4267
+
4225
4268
if (VT.getScalarType () != MVT::i64 )
4226
4269
return SDValue ();
4227
4270
@@ -4314,8 +4357,63 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4314
4357
return DAG.getNode (ISD::BITCAST, SL, VT, Vec);
4315
4358
}
4316
4359
4317
- static SDValue getScalarisedShift (SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4318
- SDLoc SL = SDLoc (RHS);
4360
+ // static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4361
+ // SDLoc SL = SDLoc(RHS);
4362
+ // if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4363
+ // SDValue VAND = RHS.getOperand(0);
4364
+ // if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4365
+ // uint64_t AndIndex = RHS->getConstantOperandVal(1);
4366
+ // if (VAND->getOpcode() == ISD::AND && CRRHS) {
4367
+ // SDValue LHSAND = VAND.getOperand(0);
4368
+ // SDValue RHSAND = VAND.getOperand(1);
4369
+ // if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4370
+ // // Part of srlcombine is to optimise for the case where its possible
4371
+ // // to reduce shl64 to shl32 if shift range is [63-32]. This
4372
+ // // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4373
+ // // '&' is then elided by ISel. The vector code for this was being
4374
+ // // completely scalarised by the vector legalizer, but now v2i32 is
4375
+ // // made legal the vector legaliser only partially scalarises the
4376
+ // // vector operations and the and was not elided. This check enables us
4377
+ // // to locate and scalarise the v2i32 and and re-enable ISel to elide
4378
+ // // the and instruction.
4379
+ // ConstantSDNode *CANDL =
4380
+ // dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4381
+ // ConstantSDNode *CANDR =
4382
+ // dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4383
+ // if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4384
+ // RHSAND->getConstantOperandVal(1) == 0x1f) {
4385
+ // // Get the non-const AND operands and produce scalar AND
4386
+ // const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4387
+ // const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4388
+ // SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4389
+ // LHSAND, Zero);
4390
+ // SDValue Hi =
4391
+ // DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4392
+ // SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4393
+ // SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4394
+ // SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4395
+ // SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4396
+ // if (AndIndex == 0 || AndIndex == 1)
4397
+ // return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4398
+ // AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
4399
+ // }
4400
+ // }
4401
+ // }
4402
+ // }
4403
+ // }
4404
+ // return SDValue();
4405
+ // }
4406
+
4407
+ SDValue AMDGPUTargetLowering::performSrlCombine (SDNode *N,
4408
+ DAGCombinerInfo &DCI) const {
4409
+ SDValue RHS = N->getOperand (1 );
4410
+ ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4411
+ EVT VT = N->getValueType (0 );
4412
+ SDValue LHS = N->getOperand (0 );
4413
+ SelectionDAG &DAG = DCI.DAG ;
4414
+ SDLoc SL (N);
4415
+ unsigned RHSVal;
4416
+
4319
4417
if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4320
4418
SDValue VAND = RHS.getOperand (0 );
4321
4419
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
@@ -4352,25 +4450,12 @@ static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4352
4450
SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4353
4451
if (AndIndex == 0 || AndIndex == 1 )
4354
4452
return DAG.getNode (ISD::SRL, SL, MVT::i32 , Trunc,
4355
- AndIndex == 0 ? LoAnd : HiAnd, RHS ->getFlags ());
4453
+ AndIndex == 0 ? LoAnd : HiAnd, N ->getFlags ());
4356
4454
}
4357
4455
}
4358
4456
}
4359
4457
}
4360
4458
}
4361
- return SDValue ();
4362
- }
4363
-
4364
- SDValue AMDGPUTargetLowering::performSrlCombine (SDNode *N,
4365
- DAGCombinerInfo &DCI) const {
4366
- SDValue RHS = N->getOperand (1 );
4367
- ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4368
- EVT VT = N->getValueType (0 );
4369
- SDValue LHS = N->getOperand (0 );
4370
- SelectionDAG &DAG = DCI.DAG ;
4371
- SDLoc SL (N);
4372
- unsigned RHSVal;
4373
-
4374
4459
4375
4460
4376
4461
if (CRHS) {
0 commit comments