@@ -4220,6 +4220,49 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4220
4220
SelectionDAG &DAG = DCI.DAG ;
4221
4221
SDLoc SL (N);
4222
4222
4223
+ if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4224
+ SDValue VAND = RHS.getOperand (0 );
4225
+ if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
4226
+ uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4227
+ if (VAND->getOpcode () == ISD::AND && CRRHS) {
4228
+ SDValue LHSAND = VAND.getOperand (0 );
4229
+ SDValue RHSAND = VAND.getOperand (1 );
4230
+ if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4231
+ // Part of sracombine is to optimise for the case where its possible
4232
+ // to reduce shl64 to shl32 if shift range is [63-32]. This
4233
+ // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4234
+ // '&' is then elided by ISel. The vector code for this was being
4235
+ // completely scalarised by the vector legalizer, but now v2i32 is
4236
+ // made legal the vector legaliser only partially scalarises the
4237
+ // vector operations and the and was not elided. This check enables us
4238
+ // to locate and scalarise the v2i32 and and re-enable ISel to elide
4239
+ // the and instruction.
4240
+ ConstantSDNode *CANDL =
4241
+ dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4242
+ ConstantSDNode *CANDR =
4243
+ dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4244
+ if (CANDL && CANDR && RHSAND->getConstantOperandVal (0 ) == 0x1f &&
4245
+ RHSAND->getConstantOperandVal (1 ) == 0x1f ) {
4246
+ // Get the non-const AND operands and produce scalar AND
4247
+ const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4248
+ const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4249
+ SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 ,
4250
+ LHSAND, Zero);
4251
+ SDValue Hi =
4252
+ DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4253
+ SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4254
+ SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4255
+ SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4256
+ SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4257
+ if (AndIndex == 0 || AndIndex == 1 )
4258
+ return DAG.getNode (ISD::SRA, SL, MVT::i32 , Trunc,
4259
+ AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4260
+ }
4261
+ }
4262
+ }
4263
+ }
4264
+ }
4265
+
4223
4266
if (VT.getScalarType () != MVT::i64 )
4224
4267
return SDValue ();
4225
4268
@@ -4312,8 +4355,63 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4312
4355
return DAG.getNode (ISD::BITCAST, SL, VT, Vec);
4313
4356
}
4314
4357
4315
- static SDValue getScalarisedShift (SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4316
- SDLoc SL = SDLoc (RHS);
4358
+ // static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4359
+ // SDLoc SL = SDLoc(RHS);
4360
+ // if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4361
+ // SDValue VAND = RHS.getOperand(0);
4362
+ // if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4363
+ // uint64_t AndIndex = RHS->getConstantOperandVal(1);
4364
+ // if (VAND->getOpcode() == ISD::AND && CRRHS) {
4365
+ // SDValue LHSAND = VAND.getOperand(0);
4366
+ // SDValue RHSAND = VAND.getOperand(1);
4367
+ // if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4368
+ // // Part of srlcombine is to optimise for the case where its possible
4369
+ // // to reduce shl64 to shl32 if shift range is [63-32]. This
4370
+ // // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4371
+ // // '&' is then elided by ISel. The vector code for this was being
4372
+ // // completely scalarised by the vector legalizer, but now v2i32 is
4373
+ // // made legal the vector legaliser only partially scalarises the
4374
+ // // vector operations and the and was not elided. This check enables us
4375
+ // // to locate and scalarise the v2i32 and and re-enable ISel to elide
4376
+ // // the and instruction.
4377
+ // ConstantSDNode *CANDL =
4378
+ // dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4379
+ // ConstantSDNode *CANDR =
4380
+ // dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4381
+ // if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4382
+ // RHSAND->getConstantOperandVal(1) == 0x1f) {
4383
+ // // Get the non-const AND operands and produce scalar AND
4384
+ // const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4385
+ // const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4386
+ // SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4387
+ // LHSAND, Zero);
4388
+ // SDValue Hi =
4389
+ // DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4390
+ // SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4391
+ // SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4392
+ // SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4393
+ // SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4394
+ // if (AndIndex == 0 || AndIndex == 1)
4395
+ // return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4396
+ // AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
4397
+ // }
4398
+ // }
4399
+ // }
4400
+ // }
4401
+ // }
4402
+ // return SDValue();
4403
+ // }
4404
+
4405
+ SDValue AMDGPUTargetLowering::performSrlCombine (SDNode *N,
4406
+ DAGCombinerInfo &DCI) const {
4407
+ SDValue RHS = N->getOperand (1 );
4408
+ ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4409
+ EVT VT = N->getValueType (0 );
4410
+ SDValue LHS = N->getOperand (0 );
4411
+ SelectionDAG &DAG = DCI.DAG ;
4412
+ SDLoc SL (N);
4413
+ unsigned RHSVal;
4414
+
4317
4415
if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4318
4416
SDValue VAND = RHS.getOperand (0 );
4319
4417
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
@@ -4350,25 +4448,12 @@ static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4350
4448
SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4351
4449
if (AndIndex == 0 || AndIndex == 1 )
4352
4450
return DAG.getNode (ISD::SRL, SL, MVT::i32 , Trunc,
4353
- AndIndex == 0 ? LoAnd : HiAnd, RHS ->getFlags ());
4451
+ AndIndex == 0 ? LoAnd : HiAnd, N ->getFlags ());
4354
4452
}
4355
4453
}
4356
4454
}
4357
4455
}
4358
4456
}
4359
- return SDValue ();
4360
- }
4361
-
4362
- SDValue AMDGPUTargetLowering::performSrlCombine (SDNode *N,
4363
- DAGCombinerInfo &DCI) const {
4364
- SDValue RHS = N->getOperand (1 );
4365
- ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4366
- EVT VT = N->getValueType (0 );
4367
- SDValue LHS = N->getOperand (0 );
4368
- SelectionDAG &DAG = DCI.DAG ;
4369
- SDLoc SL (N);
4370
- unsigned RHSVal;
4371
-
4372
4457
4373
4458
4374
4459
if (CRHS) {
0 commit comments