@@ -4067,15 +4067,22 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
4067
4067
return DAG.getNode (ISD::BITCAST, SL, MVT::i64 , Vec);
4068
4068
}
4069
4069
4070
- SDValue AMDGPUTargetLowering::performShlCombine (SDNode *N,
4071
- DAGCombinerInfo &DCI) const {
4072
- EVT VT = N->getValueType (0 );
4073
- SDValue LHS = N->getOperand (0 );
4074
- SDValue RHS = N->getOperand (1 );
4075
- ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4076
- SDLoc SL (N);
4077
- SelectionDAG &DAG = DCI.DAG ;
4078
-
4070
+ // Part of the shift combines is to optimise for the case where its possible
4071
+ // to reduce e.g shl64 to shl32 if shift range is [63-32]. This
4072
+ // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4073
+ // '&' is then elided by ISel. The vector code for this was being
4074
+ // completely scalarised by the vector legalizer, but when v2i32 is
4075
+ // legal the vector legaliser only partially scalarises the
4076
+ // vector operations and the and is not elided. This function
4077
+ // scalarises the AND for this optimisation case.
4078
+ static SDValue getShiftForReduction (unsigned ShiftOpc, SDValue LHS, SDValue RHS,
4079
+ SelectionDAG &DAG) {
4080
+
4081
+ assert (
4082
+ (ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL || ShiftOpc == ISD::SHL) &&
4083
+ " Expected shift Opcode." );
4084
+
4085
+ SDLoc SL = SDLoc (RHS);
4079
4086
if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4080
4087
SDValue VAND = RHS.getOperand (0 );
4081
4088
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
@@ -4084,15 +4091,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4084
4091
SDValue LHSAND = VAND.getOperand (0 );
4085
4092
SDValue RHSAND = VAND.getOperand (1 );
4086
4093
if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4087
- // Part of shlcombine is to optimise for the case where its possible
4088
- // to reduce shl64 to shl32 if shift range is [63-32]. This
4089
- // transforms: DST = shl i64 X, Y to [0, shl i32 X, (Y & 31) ]. The
4090
- // '&' is then elided by ISel. The vector code for this was being
4091
- // completely scalarised by the vector legalizer, but now v2i32 is
4092
- // made legal the vector legaliser only partially scalarises the
4093
- // vector operations and the and was not elided. This check enables us
4094
- // to locate and scalarise the v2i32 and and re-enable ISel to elide
4095
- // the and instruction.
4096
4094
ConstantSDNode *CANDL =
4097
4095
dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4098
4096
ConstantSDNode *CANDR =
@@ -4106,19 +4104,33 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4106
4104
LHSAND, Zero);
4107
4105
SDValue Hi =
4108
4106
DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4109
- SDValue LoAnd =
4110
- DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, RHSAND->getOperand (0 ));
4111
- SDValue HiAnd =
4112
- DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, RHSAND->getOperand (0 ));
4107
+ SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4108
+ SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4109
+ SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4113
4110
SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4114
4111
if (AndIndex == 0 || AndIndex == 1 )
4115
- return DAG.getNode (ISD::SHL, SL, MVT::i32 , Trunc,
4116
- AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4112
+ return DAG.getNode (ShiftOpc, SL, MVT::i32 , Trunc,
4113
+ AndIndex == 0 ? LoAnd : HiAnd,
4114
+ RHS->getFlags ());
4117
4115
}
4118
4116
}
4119
4117
}
4120
4118
}
4121
4119
}
4120
+ return SDValue ();
4121
+ }
4122
+
4123
+ SDValue AMDGPUTargetLowering::performShlCombine (SDNode *N,
4124
+ DAGCombinerInfo &DCI) const {
4125
+ EVT VT = N->getValueType (0 );
4126
+ SDValue LHS = N->getOperand (0 );
4127
+ SDValue RHS = N->getOperand (1 );
4128
+ ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4129
+ SDLoc SL (N);
4130
+ SelectionDAG &DAG = DCI.DAG ;
4131
+
4132
+ if (SDValue SS = getShiftForReduction (ISD::SHL, LHS, RHS, DAG))
4133
+ return SS;
4122
4134
4123
4135
unsigned RHSVal;
4124
4136
if (CRHS) {
@@ -4220,48 +4232,8 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4220
4232
SelectionDAG &DAG = DCI.DAG ;
4221
4233
SDLoc SL (N);
4222
4234
4223
- if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4224
- SDValue VAND = RHS.getOperand (0 );
4225
- if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
4226
- uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4227
- if (VAND->getOpcode () == ISD::AND && CRRHS) {
4228
- SDValue LHSAND = VAND.getOperand (0 );
4229
- SDValue RHSAND = VAND.getOperand (1 );
4230
- if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4231
- // Part of sracombine is to optimise for the case where its possible
4232
- // to reduce shl64 to shl32 if shift range is [63-32]. This
4233
- // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4234
- // '&' is then elided by ISel. The vector code for this was being
4235
- // completely scalarised by the vector legalizer, but now v2i32 is
4236
- // made legal the vector legaliser only partially scalarises the
4237
- // vector operations and the and was not elided. This check enables us
4238
- // to locate and scalarise the v2i32 and and re-enable ISel to elide
4239
- // the and instruction.
4240
- ConstantSDNode *CANDL =
4241
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4242
- ConstantSDNode *CANDR =
4243
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4244
- if (CANDL && CANDR && RHSAND->getConstantOperandVal (0 ) == 0x1f &&
4245
- RHSAND->getConstantOperandVal (1 ) == 0x1f ) {
4246
- // Get the non-const AND operands and produce scalar AND
4247
- const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4248
- const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4249
- SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 ,
4250
- LHSAND, Zero);
4251
- SDValue Hi =
4252
- DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4253
- SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4254
- SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4255
- SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4256
- SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4257
- if (AndIndex == 0 || AndIndex == 1 )
4258
- return DAG.getNode (ISD::SRA, SL, MVT::i32 , Trunc,
4259
- AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4260
- }
4261
- }
4262
- }
4263
- }
4264
- }
4235
+ if (SDValue SS = getShiftForReduction (ISD::SRA, LHS, RHS, DAG))
4236
+ return SS;
4265
4237
4266
4238
if (VT.getScalarType () != MVT::i64 )
4267
4239
return SDValue ();
@@ -4355,52 +4327,6 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4355
4327
return DAG.getNode (ISD::BITCAST, SL, VT, Vec);
4356
4328
}
4357
4329
4358
- // static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4359
- // SDLoc SL = SDLoc(RHS);
4360
- // if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4361
- // SDValue VAND = RHS.getOperand(0);
4362
- // if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4363
- // uint64_t AndIndex = RHS->getConstantOperandVal(1);
4364
- // if (VAND->getOpcode() == ISD::AND && CRRHS) {
4365
- // SDValue LHSAND = VAND.getOperand(0);
4366
- // SDValue RHSAND = VAND.getOperand(1);
4367
- // if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4368
- // // Part of srlcombine is to optimise for the case where its possible
4369
- // // to reduce shl64 to shl32 if shift range is [63-32]. This
4370
- // // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4371
- // // '&' is then elided by ISel. The vector code for this was being
4372
- // // completely scalarised by the vector legalizer, but now v2i32 is
4373
- // // made legal the vector legaliser only partially scalarises the
4374
- // // vector operations and the and was not elided. This check enables us
4375
- // // to locate and scalarise the v2i32 and and re-enable ISel to elide
4376
- // // the and instruction.
4377
- // ConstantSDNode *CANDL =
4378
- // dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4379
- // ConstantSDNode *CANDR =
4380
- // dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4381
- // if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4382
- // RHSAND->getConstantOperandVal(1) == 0x1f) {
4383
- // // Get the non-const AND operands and produce scalar AND
4384
- // const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4385
- // const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4386
- // SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4387
- // LHSAND, Zero);
4388
- // SDValue Hi =
4389
- // DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4390
- // SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4391
- // SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4392
- // SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4393
- // SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4394
- // if (AndIndex == 0 || AndIndex == 1)
4395
- // return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4396
- // AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
4397
- // }
4398
- // }
4399
- // }
4400
- // }
4401
- // }
4402
- // return SDValue();
4403
- // }
4404
4330
4405
4331
SDValue AMDGPUTargetLowering::performSrlCombine (SDNode *N,
4406
4332
DAGCombinerInfo &DCI) const {
@@ -4412,49 +4338,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
4412
4338
SDLoc SL (N);
4413
4339
unsigned RHSVal;
4414
4340
4415
- if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4416
- SDValue VAND = RHS.getOperand (0 );
4417
- if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
4418
- uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4419
- if (VAND->getOpcode () == ISD::AND && CRRHS) {
4420
- SDValue LHSAND = VAND.getOperand (0 );
4421
- SDValue RHSAND = VAND.getOperand (1 );
4422
- if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4423
- // Part of srlcombine is to optimise for the case where its possible
4424
- // to reduce shl64 to shl32 if shift range is [63-32]. This
4425
- // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4426
- // '&' is then elided by ISel. The vector code for this was being
4427
- // completely scalarised by the vector legalizer, but now v2i32 is
4428
- // made legal the vector legaliser only partially scalarises the
4429
- // vector operations and the and was not elided. This check enables us
4430
- // to locate and scalarise the v2i32 and and re-enable ISel to elide
4431
- // the and instruction.
4432
- ConstantSDNode *CANDL =
4433
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4434
- ConstantSDNode *CANDR =
4435
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4436
- if (CANDL && CANDR && RHSAND->getConstantOperandVal (0 ) == 0x1f &&
4437
- RHSAND->getConstantOperandVal (1 ) == 0x1f ) {
4438
- // Get the non-const AND operands and produce scalar AND
4439
- const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4440
- const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4441
- SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 ,
4442
- LHSAND, Zero);
4443
- SDValue Hi =
4444
- DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4445
- SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4446
- SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4447
- SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4448
- SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4449
- if (AndIndex == 0 || AndIndex == 1 )
4450
- return DAG.getNode (ISD::SRL, SL, MVT::i32 , Trunc,
4451
- AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4452
- }
4453
- }
4454
- }
4455
- }
4456
- }
4457
-
4341
+ if (SDValue SS = getShiftForReduction (ISD::SRL, LHS, RHS, DAG))
4342
+ return SS;
4458
4343
4459
4344
if (CRHS) {
4460
4345
RHSVal = CRHS->getZExtValue ();
0 commit comments