@@ -13331,43 +13331,31 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
13331
13331
SDValue LHS = N->getOperand(0);
13332
13332
SDValue RHS = N->getOperand(1);
13333
13333
13334
- // Fold the fneg of a vselect into the v2 vselect operands.
13335
- // xor (vselect c, a, b), 0x80000000 ->
13336
- // bitcast (vselect c, (fneg (bitcast a)), (fneg (bitcast b)))
13337
- if (VT == MVT::v2i32 && LHS.getNumOperands() > 1) {
13338
-
13339
- const ConstantSDNode *CRHS0 = dyn_cast<ConstantSDNode>(RHS.getOperand(0));
13340
- const ConstantSDNode *CRHS1 = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
13341
- SDValue LHS_0 = LHS.getOperand(0);
13342
- SDValue LHS_1 = LHS.getOperand(1);
13343
-
13344
- if (LHS.getOpcode() == ISD::VSELECT && CRHS0 &&
13345
- CRHS0->getAPIntValue().isSignMask() &&
13346
- shouldFoldFNegIntoSrc(N, LHS_0) && CRHS1 &&
13347
- CRHS1->getAPIntValue().isSignMask() &&
13348
- shouldFoldFNegIntoSrc(N, LHS_1)) {
13349
-
13350
- SDLoc DL(N);
13351
- SDValue CastLHS =
13352
- DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(1));
13353
- SDValue CastRHS =
13354
- DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, LHS->getOperand(2));
13355
- SDValue FNegLHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastLHS);
13356
- SDValue FNegRHS = DAG.getNode(ISD::FNEG, DL, MVT::v2f32, CastRHS);
13357
- SDValue NewSelect = DAG.getNode(ISD::VSELECT, DL, MVT::v2f32,
13358
- LHS->getOperand(0), FNegLHS, FNegRHS);
13359
- return DAG.getNode(ISD::BITCAST, DL, VT, NewSelect);
13360
- }
13361
- }
13362
-
13363
- const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
13334
+ const ConstantSDNode *CRHS = isConstOrConstSplat(RHS);
13364
13335
13365
13336
if (CRHS && VT == MVT::i64) {
13366
13337
if (SDValue Split =
13367
13338
splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
13368
13339
return Split;
13369
13340
}
13370
13341
13342
+ // v2i32 (xor (vselect cc, x, y), K) ->
13343
+ // (v2i32 svelect cc, (xor x, K), (xor y, K)) This enables the xor to be
13344
+ // replaced with source modifiers when the select is lowered to CNDMASK.
13345
+ // TODO REMOVE: prevents regressions in fneg-modifier-casting.ll
13346
+ unsigned Opc = LHS.getOpcode();
13347
+ if(((Opc == ISD::VSELECT && VT==MVT::v2i32) || (Opc == ISD::SELECT && VT==MVT::i64)) && CRHS && CRHS->getAPIntValue().isSignMask()) {
13348
+ SDValue CC = LHS->getOperand(0);
13349
+ SDValue TRUE = LHS->getOperand(1);
13350
+ SDValue FALSE = LHS->getOperand(2);
13351
+ SDValue XTrue = DAG.getNode(ISD::XOR, SDLoc(N), VT, TRUE, RHS);
13352
+ SDValue XFalse = DAG.getNode(ISD::XOR, SDLoc(N), VT, FALSE, RHS);
13353
+ SDValue XSelect = DAG.getNode(ISD::VSELECT, SDLoc(N), VT, CC, XTrue, XFalse);
13354
+ return XSelect;
13355
+ }
13356
+
13357
+
13358
+
13371
13359
// Make sure to apply the 64-bit constant splitting fold before trying to fold
13372
13360
// fneg-like xors into 64-bit select.
13373
13361
if (LHS.getOpcode() == ISD::SELECT && VT == MVT::i32) {
@@ -14332,125 +14320,165 @@ bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const {
14332
14320
EltSize, NumElem, Idx->isDivergent(), getSubtarget());
14333
14321
}
14334
14322
14335
- SDValue
14336
- SITargetLowering::performExtractVectorEltCombine (SDNode *N,
14337
- DAGCombinerInfo &DCI) const {
14338
- SDValue Vec = N->getOperand(0);
14339
- SelectionDAG &DAG = DCI.DAG ;
14323
+ // SDValue
14324
+ // SITargetLowering::performBuildVectorCombine (SDNode *N,
14325
+ // DAGCombinerInfo &DCI) const {
14326
+ // // if ( N->use_empty())
14327
+ // // return SDValue() ;
14340
14328
14341
- EVT VecVT = Vec.getValueType();
14342
- EVT VecEltVT = VecVT.getVectorElementType();
14343
- EVT ResVT = N->getValueType(0);
14329
+ // // if(!N->getValueType(0).isFloatingPoint())
14330
+ // // return SDValue();
14344
14331
14345
- unsigned VecSize = VecVT.getSizeInBits();
14346
- unsigned VecEltSize = VecEltVT.getSizeInBits();
14332
+ // // SelectionDAG &DAG = DCI.DAG;
14347
14333
14348
- if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) &&
14349
- allUsesHaveSourceMods(N)) {
14350
- SDLoc SL(N);
14351
- SDValue Idx = N->getOperand(1);
14352
- SDValue Elt =
14353
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec.getOperand(0), Idx);
14354
- return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
14355
- }
14356
-
14357
- // ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
14358
- // =>
14359
- // Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
14360
- // Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
14361
- // ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
14362
- if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
14363
- SDLoc SL(N);
14364
- SDValue Idx = N->getOperand(1);
14365
- unsigned Opc = Vec.getOpcode();
14334
+ // // // Iterate the operands. Check if source modifier. If so, propogate the
14335
+ // // source
14336
+ // // // modifier to the user and the srcmod from the BUILD_VECTOR element.
14337
+ // // for (unsigned I = 0; I < N->getNumOperands(); I++) {
14338
+ // // SDValue E = N->getOperand(I);
14339
+ // // if (E->getOpcode() != ISD::FNEG && E->getOpcode() != ISD::ABS)
14340
+ // // continue;
14366
14341
14367
- switch (Opc) {
14368
- default:
14369
- break;
14370
- // TODO: Support other binary operations.
14371
- case ISD::FADD:
14372
- case ISD::FSUB:
14373
- case ISD::FMUL:
14374
- case ISD::ADD:
14375
- case ISD::UMIN:
14376
- case ISD::UMAX:
14377
- case ISD::SMIN:
14378
- case ISD::SMAX:
14379
- case ISD::FMAXNUM:
14380
- case ISD::FMINNUM:
14381
- case ISD::FMAXNUM_IEEE:
14382
- case ISD::FMINNUM_IEEE:
14383
- case ISD::FMAXIMUM:
14384
- case ISD::FMINIMUM: {
14385
- SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14386
- Vec.getOperand(0), Idx);
14387
- SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14388
- Vec.getOperand(1), Idx);
14389
-
14390
- DCI.AddToWorklist(Elt0.getNode());
14391
- DCI.AddToWorklist(Elt1.getNode());
14392
- return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags());
14393
- }
14394
- }
14395
- }
14396
-
14397
- // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
14398
- if (shouldExpandVectorDynExt(N)) {
14399
- SDLoc SL(N);
14400
- SDValue Idx = N->getOperand(1);
14401
- SDValue V;
14402
- for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
14403
- SDValue IC = DAG.getVectorIdxConstant(I, SL);
14404
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
14405
- if (I == 0)
14406
- V = Elt;
14407
- else
14408
- V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
14342
+ // // // Users through which we can propogate will include users of
14343
+ // // // extract_element on this vector, so need to peek-through.
14344
+ // // }
14345
+
14346
+ // // SmallVector<SDNode*, 4> UsersToModify;
14347
+
14348
+ // // // If the use of the BUILD_VECTOR supports source mods it can be
14349
+ // // propogated. for (SDNode *U : N->users()) {
14350
+ // // if(!U->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
14351
+ // // if (!allUsesHaveSourceMods(U))
14352
+ // // continue;
14353
+ // // UsersToModify.push_back(U);
14354
+ // // }
14355
+
14356
+ // // for(auto Node: UsersToModify) {
14357
+
14358
+ // // }
14359
+
14360
+ // return SDValue();
14361
+ // }
14362
+
14363
+ SDValue SITargetLowering::performExtractVectorEltCombine(
14364
+ SDNode * N, DAGCombinerInfo & DCI) const {
14365
+ SDValue Vec = N->getOperand(0);
14366
+ SelectionDAG &DAG = DCI.DAG;
14367
+
14368
+ EVT VecVT = Vec.getValueType();
14369
+ EVT VecEltVT = VecVT.getVectorElementType();
14370
+ EVT ResVT = N->getValueType(0);
14371
+
14372
+ unsigned VecSize = VecVT.getSizeInBits();
14373
+ unsigned VecEltSize = VecEltVT.getSizeInBits();
14374
+
14375
+ if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) &&
14376
+ allUsesHaveSourceMods(N)) {
14377
+ SDLoc SL(N);
14378
+ SDValue Idx = N->getOperand(1);
14379
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14380
+ Vec.getOperand(0), Idx);
14381
+ return DAG.getNode(Vec.getOpcode(), SL, ResVT, Elt);
14382
+ }
14383
+
14384
+ // ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
14385
+ // =>
14386
+ // Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
14387
+ // Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
14388
+ // ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
14389
+ if (Vec.hasOneUse() && DCI.isBeforeLegalize() && VecEltVT == ResVT) {
14390
+ SDLoc SL(N);
14391
+ SDValue Idx = N->getOperand(1);
14392
+ unsigned Opc = Vec.getOpcode();
14393
+
14394
+ switch (Opc) {
14395
+ default:
14396
+ break;
14397
+ // TODO: Support other binary operations.
14398
+ case ISD::FADD:
14399
+ case ISD::FSUB:
14400
+ case ISD::FMUL:
14401
+ case ISD::ADD:
14402
+ case ISD::UMIN:
14403
+ case ISD::UMAX:
14404
+ case ISD::SMIN:
14405
+ case ISD::SMAX:
14406
+ case ISD::FMAXNUM:
14407
+ case ISD::FMINNUM:
14408
+ case ISD::FMAXNUM_IEEE:
14409
+ case ISD::FMINNUM_IEEE:
14410
+ case ISD::FMAXIMUM:
14411
+ case ISD::FMINIMUM: {
14412
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14413
+ Vec.getOperand(0), Idx);
14414
+ SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT,
14415
+ Vec.getOperand(1), Idx);
14416
+
14417
+ DCI.AddToWorklist(Elt0.getNode());
14418
+ DCI.AddToWorklist(Elt1.getNode());
14419
+ return DAG.getNode(Opc, SL, ResVT, Elt0, Elt1, Vec->getFlags());
14420
+ }
14421
+ }
14409
14422
}
14410
- return V;
14411
- }
14412
14423
14413
- if (!DCI.isBeforeLegalize())
14414
- return SDValue();
14424
+ // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
14425
+ if (shouldExpandVectorDynExt(N)) {
14426
+ SDLoc SL(N);
14427
+ SDValue Idx = N->getOperand(1);
14428
+ SDValue V;
14429
+ for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
14430
+ SDValue IC = DAG.getVectorIdxConstant(I, SL);
14431
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, ResVT, Vec, IC);
14432
+ if (I == 0)
14433
+ V = Elt;
14434
+ else
14435
+ V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
14436
+ }
14437
+ return V;
14438
+ }
14415
14439
14416
- // Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
14417
- // elements. This exposes more load reduction opportunities by replacing
14418
- // multiple small extract_vector_elements with a single 32-bit extract.
14419
- auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14420
- if (isa<MemSDNode>(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() &&
14421
- VecSize > 32 && VecSize % 32 == 0 && Idx) {
14422
- EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
14423
-
14424
- unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
14425
- unsigned EltIdx = BitIndex / 32;
14426
- unsigned LeftoverBitIdx = BitIndex % 32;
14427
- SDLoc SL(N);
14440
+ if (!DCI.isBeforeLegalize())
14441
+ return SDValue();
14428
14442
14429
- SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
14430
- DCI.AddToWorklist(Cast.getNode());
14443
+ // Try to turn sub-dword accesses of vectors into accesses of the same
14444
+ // 32-bit elements. This exposes more load reduction opportunities by
14445
+ // replacing multiple small extract_vector_elements with a single 32-bit
14446
+ // extract.
14447
+ auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
14448
+ if (isa<MemSDNode>(Vec) && VecEltSize <= 16 && VecEltVT.isByteSized() &&
14449
+ VecSize > 32 && VecSize % 32 == 0 && Idx) {
14450
+ EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
14451
+
14452
+ unsigned BitIndex = Idx->getZExtValue() * VecEltSize;
14453
+ unsigned EltIdx = BitIndex / 32;
14454
+ unsigned LeftoverBitIdx = BitIndex % 32;
14455
+ SDLoc SL(N);
14431
14456
14432
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
14433
- DAG.getConstant(EltIdx, SL, MVT::i32));
14434
- DCI.AddToWorklist(Elt.getNode());
14435
- SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
14436
- DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
14437
- DCI.AddToWorklist(Srl.getNode());
14457
+ SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
14458
+ DCI.AddToWorklist(Cast.getNode());
14438
14459
14439
- EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger();
14440
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
14441
- DCI.AddToWorklist(Trunc.getNode());
14460
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
14461
+ DAG.getConstant(EltIdx, SL, MVT::i32));
14462
+ DCI.AddToWorklist(Elt.getNode());
14463
+ SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
14464
+ DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
14465
+ DCI.AddToWorklist(Srl.getNode());
14442
14466
14443
- if (VecEltVT == ResVT) {
14444
- return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
14467
+ EVT VecEltAsIntVT = VecEltVT.changeTypeToInteger();
14468
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VecEltAsIntVT, Srl);
14469
+ DCI.AddToWorklist(Trunc.getNode());
14470
+
14471
+ if (VecEltVT == ResVT) {
14472
+ return DAG.getNode(ISD::BITCAST, SL, VecEltVT, Trunc);
14473
+ }
14474
+
14475
+ assert(ResVT.isScalarInteger());
14476
+ return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT);
14445
14477
}
14446
14478
14447
- assert(ResVT.isScalarInteger());
14448
- return DAG.getAnyExtOrTrunc(Trunc, SL, ResVT);
14479
+ return SDValue();
14449
14480
}
14450
14481
14451
- return SDValue();
14452
- }
14453
-
14454
14482
SDValue
14455
14483
SITargetLowering::performInsertVectorEltCombine(SDNode *N,
14456
14484
DAGCombinerInfo &DCI) const {
0 commit comments