@@ -13361,43 +13361,31 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
13361
13361
SDValue LHS = N->getOperand(0);
13362
13362
SDValue RHS = N->getOperand(1);
13363
13363
13364
- // Fold the fneg of a vselect into the v2 vselect operands.
13365
- // xor (vselect c, a, b), 0x80000000 ->
13366
- // bitcast (vselect c, (fneg (bitcast a)), (fneg (bitcast b)))
13367
- if (VT == MVT::v2i32 && LHS.getNumOperands() > 1) {
13368
-
13369
- const ConstantSDNode *CRHS0 = dyn_cast<ConstantSDNode>(RHS.getOperand(0));
13370
- const ConstantSDNode *CRHS1 = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
13371
- SDValue LHS_0 = LHS.getOperand(0);
13372
- SDValue LHS_1 = LHS.getOperand(1);
13373
-
13374
- if (LHS.getOpcode() == ISD::VSELECT && CRHS0 &&
13375
- CRHS0->getAPIntValue().isSignMask() &&
13376
- shouldFoldFNegIntoSrc(N, LHS_0) && CRHS1 &&
13377
- CRHS1->getAPIntValue().isSignMask() &&
13378
- shouldFoldFNegIntoSrc(N, LHS_1)) {
13379
-
13380
- SDLoc DL(N);
13381
- SDValue CastLHS =
13382
- DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(1));
13383
- SDValue CastRHS =
13384
- DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(2));
13385
- SDValue FNegLHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastLHS);
13386
- SDValue FNegRHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastRHS);
13387
- SDValue NewSelect = DAG.getNode(ISD::VSELECT, DL, MVT::v2f32,
13388
- LHS->getOperand(0), FNegLHS, FNegRHS);
13389
- return DAG.getNode(ISD::BITCAST, DL, VT, NewSelect);
13390
- }
13391
- }
13392
-
13393
- const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
13364
+ const ConstantSDNode *CRHS = isConstOrConstSplat(RHS);
13394
13365
13395
13366
if (CRHS && VT == MVT::i64) {
13396
13367
if (SDValue Split =
13397
13368
splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
13398
13369
return Split;
13399
13370
}
13400
13371
13372
+ // v2i32 (xor (vselect cc, x, y), K) ->
13373
+ // (v2i32 svelect cc, (xor x, K), (xor y, K)) This enables the xor to be
13374
+ // replaced with source modifiers when the select is lowered to CNDMASK.
13375
+ // TODO REMOVE: prevents regressions in fneg-modifier-casting.ll
13376
+ unsigned Opc = LHS.getOpcode();
13377
+ if(((Opc == ISD::VSELECT && VT==MVT::v2i32) || (Opc == ISD::SELECT && VT==MVT::i64)) && CRHS && CRHS->getAPIntValue().isSignMask()) {
13378
+ SDValue CC = LHS->getOperand(0);
13379
+ SDValue TRUE = LHS->getOperand(1);
13380
+ SDValue FALSE = LHS->getOperand(2);
13381
+ SDValue XTrue = DAG.getNode(ISD::XOR, SDLoc(N), VT, TRUE, RHS);
13382
+ SDValue XFalse = DAG.getNode(ISD::XOR, SDLoc(N), VT, FALSE, RHS);
13383
+ SDValue XSelect = DAG.getNode(ISD::VSELECT, SDLoc(N), VT, CC, XTrue, XFalse);
13384
+ return XSelect;
13385
+ }
13386
+
13387
+
13388
+
13401
13389
// Make sure to apply the 64-bit constant splitting fold before trying to fold
13402
13390
// fneg-like xors into 64-bit select.
13403
13391
if (LHS.getOpcode() == ISD::SELECT && VT == MVT::i32) {
@@ -14362,125 +14350,165 @@ bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const {
14362
14350
EltSize, NumElem, Idx->isDivergent(), getSubtarget());
14363
14351
}
14364
14352
14365
- SDValue
14366
- SITargetLowering::performExtractVectorEltCombine (SDNode *N,
14367
- DAGCombinerInfo &DCI) const {
14368
- SDValue Vec = N->getOperand(0);
14369
- SelectionDAG &DAG = DCI.DAG ;
14353
+ // SDValue
14354
+ // SITargetLowering::performBuildVectorCombine (SDNode *N,
14355
+ // DAGCombinerInfo &DCI) const {
14356
+ // // if ( N->use_empty())
14357
+ // // return SDValue() ;
14370
14358
14371
- EVT VecVT = Vec.getValueType();
14372
- EVT VecEltVT = VecVT.getVectorElementType();
14373
- EVT ResVT = N->getValueType(0);
14359
+ // // if(!N->getValueType(0).isFloatingPoint())
14360
+ // // return SDValue();
14374
14361
14375
- unsigned VecSize = VecVT.getSizeInBits();
14376
- unsigned VecEltSize = VecEltVT.getSizeInBits();
14362
+ // // SelectionDAG &DAG = DCI.DAG;
14377
14363
14378
- if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) &&
14379
- allUsesHaveSourceMods(N)) {
14380
- SDLoc SL(N);
14381
- SDValue Idx = N->getOperand(1);
14382
- SDValue Elt =
14383
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec.getOperand(0), Idx);
14384
- return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
14385
- }
14386
-
14387
- // ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
14388
- // =>
14389
- // Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
14390
- // Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
14391
- // ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
14392
- if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
14393
- SDLoc SL(N);
14394
- SDValue Idx = N->getOperand(1);
14395
- unsigned Opc = Vec.getOpcode();
14364
+ // // // Iterate the operands. Check if source modifier. If so, propogate the
14365
+ // // source
14366
+ // // // modifier to the user and the srcmod from the BUILD_VECTOR element.
14367
+ // // for (unsigned I = 0; I < N->getNumOperands(); I++) {
14368
+ // // SDValue E = N->getOperand(I);
14369
+ // // if (E->getOpcode() != ISD::FNEG && E->getOpcode() != ISD::ABS)
14370
+ // // continue;
14396
14371
14397
- switch (Opc) {
14398
- default:
14399
- break;
14400
- // TODO: Support other binary operations.
14401
- case ISD::FADD:
14402
- case ISD::FSUB:
14403
- case ISD::FMUL:
14404
- case ISD::ADD:
14405
- case ISD::UMIN:
14406
- case ISD::UMAX:
14407
- case ISD::SMIN:
14408
- case ISD::SMAX:
14409
- case ISD::FMAXNUM:
14410
- case ISD::FMINNUM:
14411
- case ISD::FMAXNUM_IEEE:
14412
- case ISD::FMINNUM_IEEE:
14413
- case ISD::FMAXIMUM:
14414
- case ISD::FMINIMUM: {
14415
- SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14416
- Vec.getOperand(0), Idx);
14417
- SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14418
- Vec.getOperand(1), Idx);
14419
-
14420
- DCI.AddToWorklist(Elt0.getNode());
14421
- DCI.AddToWorklist(Elt1.getNode());
14422
- return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags());
14423
- }
14424
- }
14425
- }
14426
-
14427
- // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
14428
- if (shouldExpandVectorDynExt(N)) {
14429
- SDLoc SL(N);
14430
- SDValue Idx = N->getOperand(1);
14431
- SDValue V;
14432
- for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
14433
- SDValue IC = DAG.getVectorIdxConstant(I, SL);
14434
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
14435
- if (I == 0)
14436
- V = Elt;
14437
- else
14438
- V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
14372
+ // // // Users through which we can propogate will include users of
14373
+ // // // extract_element on this vector, so need to peek-through.
14374
+ // // }
14375
+
14376
+ // // SmallVector<SDNode*, 4> UsersToModify;
14377
+
14378
+ // // // If the use of the BUILD_VECTOR supports source mods it can be
14379
+ // // propogated. for (SDNode *U : N->users()) {
14380
+ // // if(!U->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
14381
+ // // if (!allUsesHaveSourceMods(U))
14382
+ // // continue;
14383
+ // // UsersToModify.push_back(U);
14384
+ // // }
14385
+
14386
+ // // for(auto Node: UsersToModify) {
14387
+
14388
+ // // }
14389
+
14390
+ // return SDValue();
14391
+ // }
14392
+
14393
+ SDValue SITargetLowering::performExtractVectorEltCombine(
14394
+ SDNode * N, DAGCombinerInfo & DCI) const {
14395
+ SDValue Vec = N->getOperand(0);
14396
+ SelectionDAG &DAG = DCI.DAG;
14397
+
14398
+ EVT VecVT = Vec.getValueType();
14399
+ EVT VecEltVT = VecVT.getVectorElementType();
14400
+ EVT ResVT = N->getValueType(0);
14401
+
14402
+ unsigned VecSize = VecVT.getSizeInBits();
14403
+ unsigned VecEltSize = VecEltVT.getSizeInBits();
14404
+
14405
+ if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) &&
14406
+ allUsesHaveSourceMods(N)) {
14407
+ SDLoc SL(N);
14408
+ SDValue Idx = N->getOperand(1);
14409
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14410
+ Vec.getOperand(0), Idx);
14411
+ return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
14412
+ }
14413
+
14414
+ // ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
14415
+ // =>
14416
+ // Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
14417
+ // Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
14418
+ // ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
14419
+ if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
14420
+ SDLoc SL(N);
14421
+ SDValue Idx = N->getOperand(1);
14422
+ unsigned Opc = Vec.getOpcode();
14423
+
14424
+ switch (Opc) {
14425
+ default:
14426
+ break;
14427
+ // TODO: Support other binary operations.
14428
+ case ISD::FADD:
14429
+ case ISD::FSUB:
14430
+ case ISD::FMUL:
14431
+ case ISD::ADD:
14432
+ case ISD::UMIN:
14433
+ case ISD::UMAX:
14434
+ case ISD::SMIN:
14435
+ case ISD::SMAX:
14436
+ case ISD::FMAXNUM:
14437
+ case ISD::FMINNUM:
14438
+ case ISD::FMAXNUM_IEEE:
14439
+ case ISD::FMINNUM_IEEE:
14440
+ case ISD::FMAXIMUM:
14441
+ case ISD::FMINIMUM: {
14442
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14443
+ Vec.getOperand(0), Idx);
14444
+ SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14445
+ Vec.getOperand(1), Idx);
14446
+
14447
+ DCI.AddToWorklist(Elt0.getNode());
14448
+ DCI.AddToWorklist(Elt1.getNode());
14449
+ return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags());
14450
+ }
14451
+ }
14439
14452
}
14440
- return V;
14441
- }
14442
14453
14443
- if (!DCI.isBeforeLegalize())
14444
- return SDValue();
14454
+ // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
14455
+ if (shouldExpandVectorDynExt(N)) {
14456
+ SDLoc SL(N);
14457
+ SDValue Idx = N->getOperand(1);
14458
+ SDValue V;
14459
+ for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
14460
+ SDValue IC = DAG.getVectorIdxConstant(I, SL);
14461
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
14462
+ if (I == 0)
14463
+ V = Elt;
14464
+ else
14465
+ V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
14466
+ }
14467
+ return V;
14468
+ }
14445
14469
14446
- // Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
14447
- // elements. This exposes more load reduction opportunities by replacing
14448
- // multiple small extract_vector_elements with a single 32-bit extract.
14449
- auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14450
- if (isa<MemSDNode>(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() &&
14451
- VecSize > 32 && VecSize % 32 == 0 && Idx) {
14452
- EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
14453
-
14454
- unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
14455
- unsigned EltIdx = BitIndex / 32;
14456
- unsigned LeftoverBitIdx = BitIndex % 32;
14457
- SDLoc SL(N);
14470
+ if (!DCI.isBeforeLegalize())
14471
+ return SDValue();
14458
14472
14459
- SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
14460
- DCI.AddToWorklist(Cast.getNode());
14473
+ // Try to turn sub-dword accesses of vectors into accesses of the same
14474
+ // 32-bit elements. This exposes more load reduction opportunities by
14475
+ // replacing multiple small extract_vector_elements with a single 32-bit
14476
+ // extract.
14477
+ auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14478
+ if (isa<MemSDNode>(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() &&
14479
+ VecSize > 32 && VecSize % 32 == 0 && Idx) {
14480
+ EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
14481
+
14482
+ unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
14483
+ unsigned EltIdx = BitIndex / 32;
14484
+ unsigned LeftoverBitIdx = BitIndex % 32;
14485
+ SDLoc SL(N);
14461
14486
14462
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
14463
- DAG.getConstant(EltIdx, SL, MVT::i32));
14464
- DCI.AddToWorklist(Elt.getNode());
14465
- SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
14466
- DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
14467
- DCI.AddToWorklist(Srl.getNode());
14487
+ SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
14488
+ DCI.AddToWorklist(Cast.getNode());
14468
14489
14469
- EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger();
14470
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
14471
- DCI.AddToWorklist(Trunc.getNode());
14490
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
14491
+ DAG.getConstant(EltIdx, SL, MVT::i32));
14492
+ DCI.AddToWorklist(Elt.getNode());
14493
+ SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
14494
+ DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
14495
+ DCI.AddToWorklist(Srl.getNode());
14472
14496
14473
- if (VecEltVT == ResVT) {
14474
- return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
14497
+ EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger();
14498
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
14499
+ DCI.AddToWorklist(Trunc.getNode());
14500
+
14501
+ if (VecEltVT == ResVT) {
14502
+ return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
14503
+ }
14504
+
14505
+ assert(ResVT.isScalarInteger());
14506
+ return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT);
14475
14507
}
14476
14508
14477
- assert(ResVT.isScalarInteger());
14478
- return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT);
14509
+ return SDValue();
14479
14510
}
14480
14511
14481
- return SDValue();
14482
- }
14483
-
14484
14512
SDValue
14485
14513
SITargetLowering::performInsertVectorEltCombine(SDNode *N,
14486
14514
DAGCombinerInfo &DCI) const {
0 commit comments