@@ -4069,15 +4069,22 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
4069
4069
return DAG.getNode (ISD::BITCAST, SL, MVT::i64 , Vec);
4070
4070
}
4071
4071
4072
- SDValue AMDGPUTargetLowering::performShlCombine (SDNode *N,
4073
- DAGCombinerInfo &DCI) const {
4074
- EVT VT = N->getValueType (0 );
4075
- SDValue LHS = N->getOperand (0 );
4076
- SDValue RHS = N->getOperand (1 );
4077
- ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4078
- SDLoc SL (N);
4079
- SelectionDAG &DAG = DCI.DAG ;
4080
-
4072
+ // Part of the shift combines is to optimise for the case where its possible
4073
+ // to reduce e.g shl64 to shl32 if shift range is [63-32]. This
4074
+ // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4075
+ // '&' is then elided by ISel. The vector code for this was being
4076
+ // completely scalarised by the vector legalizer, but when v2i32 is
4077
+ // legal the vector legaliser only partially scalarises the
4078
+ // vector operations and the and is not elided. This function
4079
+ // scalarises the AND for this optimisation case.
4080
+ static SDValue getShiftForReduction (unsigned ShiftOpc, SDValue LHS, SDValue RHS,
4081
+ SelectionDAG &DAG) {
4082
+
4083
+ assert (
4084
+ (ShiftOpc == ISD::SRA || ShiftOpc == ISD::SRL || ShiftOpc == ISD::SHL) &&
4085
+ " Expected shift Opcode." );
4086
+
4087
+ SDLoc SL = SDLoc (RHS);
4081
4088
if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4082
4089
SDValue VAND = RHS.getOperand (0 );
4083
4090
if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
@@ -4086,15 +4093,6 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4086
4093
SDValue LHSAND = VAND.getOperand (0 );
4087
4094
SDValue RHSAND = VAND.getOperand (1 );
4088
4095
if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4089
- // Part of shlcombine is to optimise for the case where its possible
4090
- // to reduce shl64 to shl32 if shift range is [63-32]. This
4091
- // transforms: DST = shl i64 X, Y to [0, shl i32 X, (Y & 31) ]. The
4092
- // '&' is then elided by ISel. The vector code for this was being
4093
- // completely scalarised by the vector legalizer, but now v2i32 is
4094
- // made legal the vector legaliser only partially scalarises the
4095
- // vector operations and the and was not elided. This check enables us
4096
- // to locate and scalarise the v2i32 and and re-enable ISel to elide
4097
- // the and instruction.
4098
4096
ConstantSDNode *CANDL =
4099
4097
dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4100
4098
ConstantSDNode *CANDR =
@@ -4108,19 +4106,33 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
4108
4106
LHSAND, Zero);
4109
4107
SDValue Hi =
4110
4108
DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4111
- SDValue LoAnd =
4112
- DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, RHSAND->getOperand (0 ));
4113
- SDValue HiAnd =
4114
- DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, RHSAND->getOperand (0 ));
4109
+ SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4110
+ SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4111
+ SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4115
4112
SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4116
4113
if (AndIndex == 0 || AndIndex == 1 )
4117
- return DAG.getNode (ISD::SHL, SL, MVT::i32 , Trunc,
4118
- AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4114
+ return DAG.getNode (ShiftOpc, SL, MVT::i32 , Trunc,
4115
+ AndIndex == 0 ? LoAnd : HiAnd,
4116
+ RHS->getFlags ());
4119
4117
}
4120
4118
}
4121
4119
}
4122
4120
}
4123
4121
}
4122
+ return SDValue ();
4123
+ }
4124
+
4125
+ SDValue AMDGPUTargetLowering::performShlCombine (SDNode *N,
4126
+ DAGCombinerInfo &DCI) const {
4127
+ EVT VT = N->getValueType (0 );
4128
+ SDValue LHS = N->getOperand (0 );
4129
+ SDValue RHS = N->getOperand (1 );
4130
+ ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
4131
+ SDLoc SL (N);
4132
+ SelectionDAG &DAG = DCI.DAG ;
4133
+
4134
+ if (SDValue SS = getShiftForReduction (ISD::SHL, LHS, RHS, DAG))
4135
+ return SS;
4124
4136
4125
4137
unsigned RHSVal;
4126
4138
if (CRHS) {
@@ -4222,48 +4234,8 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4222
4234
SelectionDAG &DAG = DCI.DAG ;
4223
4235
SDLoc SL (N);
4224
4236
4225
- if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4226
- SDValue VAND = RHS.getOperand (0 );
4227
- if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
4228
- uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4229
- if (VAND->getOpcode () == ISD::AND && CRRHS) {
4230
- SDValue LHSAND = VAND.getOperand (0 );
4231
- SDValue RHSAND = VAND.getOperand (1 );
4232
- if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4233
- // Part of sracombine is to optimise for the case where its possible
4234
- // to reduce shl64 to shl32 if shift range is [63-32]. This
4235
- // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4236
- // '&' is then elided by ISel. The vector code for this was being
4237
- // completely scalarised by the vector legalizer, but now v2i32 is
4238
- // made legal the vector legaliser only partially scalarises the
4239
- // vector operations and the and was not elided. This check enables us
4240
- // to locate and scalarise the v2i32 and and re-enable ISel to elide
4241
- // the and instruction.
4242
- ConstantSDNode *CANDL =
4243
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4244
- ConstantSDNode *CANDR =
4245
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4246
- if (CANDL && CANDR && RHSAND->getConstantOperandVal (0 ) == 0x1f &&
4247
- RHSAND->getConstantOperandVal (1 ) == 0x1f ) {
4248
- // Get the non-const AND operands and produce scalar AND
4249
- const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4250
- const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4251
- SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 ,
4252
- LHSAND, Zero);
4253
- SDValue Hi =
4254
- DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4255
- SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4256
- SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4257
- SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4258
- SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4259
- if (AndIndex == 0 || AndIndex == 1 )
4260
- return DAG.getNode (ISD::SRA, SL, MVT::i32 , Trunc,
4261
- AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4262
- }
4263
- }
4264
- }
4265
- }
4266
- }
4237
+ if (SDValue SS = getShiftForReduction (ISD::SRA, LHS, RHS, DAG))
4238
+ return SS;
4267
4239
4268
4240
if (VT.getScalarType () != MVT::i64 )
4269
4241
return SDValue ();
@@ -4357,52 +4329,6 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
4357
4329
return DAG.getNode (ISD::BITCAST, SL, VT, Vec);
4358
4330
}
4359
4331
4360
- // static SDValue getScalarisedShift(SDValue LHS, SDValue RHS, SelectionDAG &DAG) {
4361
- // SDLoc SL = SDLoc(RHS);
4362
- // if (RHS->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
4363
- // SDValue VAND = RHS.getOperand(0);
4364
- // if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand(1))) {
4365
- // uint64_t AndIndex = RHS->getConstantOperandVal(1);
4366
- // if (VAND->getOpcode() == ISD::AND && CRRHS) {
4367
- // SDValue LHSAND = VAND.getOperand(0);
4368
- // SDValue RHSAND = VAND.getOperand(1);
4369
- // if (RHSAND->getOpcode() == ISD::BUILD_VECTOR) {
4370
- // // Part of srlcombine is to optimise for the case where its possible
4371
- // // to reduce shl64 to shl32 if shift range is [63-32]. This
4372
- // // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4373
- // // '&' is then elided by ISel. The vector code for this was being
4374
- // // completely scalarised by the vector legalizer, but now v2i32 is
4375
- // // made legal the vector legaliser only partially scalarises the
4376
- // // vector operations and the and was not elided. This check enables us
4377
- // // to locate and scalarise the v2i32 and and re-enable ISel to elide
4378
- // // the and instruction.
4379
- // ConstantSDNode *CANDL =
4380
- // dyn_cast<ConstantSDNode>(RHSAND->getOperand(0));
4381
- // ConstantSDNode *CANDR =
4382
- // dyn_cast<ConstantSDNode>(RHSAND->getOperand(1));
4383
- // if (CANDL && CANDR && RHSAND->getConstantOperandVal(0) == 0x1f &&
4384
- // RHSAND->getConstantOperandVal(1) == 0x1f) {
4385
- // // Get the non-const AND operands and produce scalar AND
4386
- // const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
4387
- // const SDValue One = DAG.getConstant(1, SL, MVT::i32);
4388
- // SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32,
4389
- // LHSAND, Zero);
4390
- // SDValue Hi =
4391
- // DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4392
- // SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4393
- // SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4394
- // SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4395
- // SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
4396
- // if (AndIndex == 0 || AndIndex == 1)
4397
- // return DAG.getNode(ISD::SRL, SL, MVT::i32, Trunc,
4398
- // AndIndex == 0 ? LoAnd : HiAnd, RHS->getFlags());
4399
- // }
4400
- // }
4401
- // }
4402
- // }
4403
- // }
4404
- // return SDValue();
4405
- // }
4406
4332
4407
4333
SDValue AMDGPUTargetLowering::performSrlCombine (SDNode *N,
4408
4334
DAGCombinerInfo &DCI) const {
@@ -4414,49 +4340,8 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
4414
4340
SDLoc SL (N);
4415
4341
unsigned RHSVal;
4416
4342
4417
- if (RHS->getOpcode () == ISD::EXTRACT_VECTOR_ELT) {
4418
- SDValue VAND = RHS.getOperand (0 );
4419
- if (ConstantSDNode *CRRHS = dyn_cast<ConstantSDNode>(RHS->getOperand (1 ))) {
4420
- uint64_t AndIndex = RHS->getConstantOperandVal (1 );
4421
- if (VAND->getOpcode () == ISD::AND && CRRHS) {
4422
- SDValue LHSAND = VAND.getOperand (0 );
4423
- SDValue RHSAND = VAND.getOperand (1 );
4424
- if (RHSAND->getOpcode () == ISD::BUILD_VECTOR) {
4425
- // Part of srlcombine is to optimise for the case where its possible
4426
- // to reduce shl64 to shl32 if shift range is [63-32]. This
4427
- // transforms: DST = shl i64 X, Y to [0, srl i32 X, (Y & 31) ]. The
4428
- // '&' is then elided by ISel. The vector code for this was being
4429
- // completely scalarised by the vector legalizer, but now v2i32 is
4430
- // made legal the vector legaliser only partially scalarises the
4431
- // vector operations and the and was not elided. This check enables us
4432
- // to locate and scalarise the v2i32 and and re-enable ISel to elide
4433
- // the and instruction.
4434
- ConstantSDNode *CANDL =
4435
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (0 ));
4436
- ConstantSDNode *CANDR =
4437
- dyn_cast<ConstantSDNode>(RHSAND->getOperand (1 ));
4438
- if (CANDL && CANDR && RHSAND->getConstantOperandVal (0 ) == 0x1f &&
4439
- RHSAND->getConstantOperandVal (1 ) == 0x1f ) {
4440
- // Get the non-const AND operands and produce scalar AND
4441
- const SDValue Zero = DAG.getConstant (0 , SL, MVT::i32 );
4442
- const SDValue One = DAG.getConstant (1 , SL, MVT::i32 );
4443
- SDValue Lo = DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 ,
4444
- LHSAND, Zero);
4445
- SDValue Hi =
4446
- DAG.getNode (ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32 , LHSAND, One);
4447
- SDValue AndMask = DAG.getConstant (0x1f , SL, MVT::i32 );
4448
- SDValue LoAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Lo, AndMask);
4449
- SDValue HiAnd = DAG.getNode (ISD::AND, SL, MVT::i32 , Hi, AndMask);
4450
- SDValue Trunc = DAG.getNode (ISD::TRUNCATE, SL, MVT::i32 , LHS);
4451
- if (AndIndex == 0 || AndIndex == 1 )
4452
- return DAG.getNode (ISD::SRL, SL, MVT::i32 , Trunc,
4453
- AndIndex == 0 ? LoAnd : HiAnd, N->getFlags ());
4454
- }
4455
- }
4456
- }
4457
- }
4458
- }
4459
-
4343
+ if (SDValue SS = getShiftForReduction (ISD::SRL, LHS, RHS, DAG))
4344
+ return SS;
4460
4345
4461
4346
if (CRHS) {
4462
4347
RHSVal = CRHS->getZExtValue ();
0 commit comments