@@ -20467,6 +20467,69 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
20467
20467
}
20468
20468
}
20469
20469
20470
+ // Given an extract(load) or extract(extend(load)), produce a scalar load
20471
+ // instead to avoid the cross-register-bank copies.
20472
+ if (DCI.isAfterLegalizeDAG() && Subtarget->isLittleEndian() &&
20473
+ VT.isInteger() && isa<ConstantSDNode>(N1)) {
20474
+ SDValue LoadN0 = N0;
20475
+ // Look through sext/zext and extract_subvector / insert_subvector if
20476
+ // required.
20477
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
20478
+ N0.getOpcode() == ISD::SIGN_EXTEND ||
20479
+ N0.getOpcode() == ISD::ANY_EXTEND) &&
20480
+ N0.getOperand(0).hasOneUse())
20481
+ LoadN0 = N0.getOperand(0);
20482
+ unsigned OffsetElts = 0;
20483
+ if (LoadN0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
20484
+ OffsetElts = LoadN0.getConstantOperandVal(1);
20485
+ LoadN0 = LoadN0.getOperand(0);
20486
+ }
20487
+ if (LoadN0.getOpcode() == ISD::INSERT_SUBVECTOR &&
20488
+ LoadN0.getOperand(0).isUndef() &&
20489
+ isNullConstant(LoadN0.getOperand(2)) &&
20490
+ LoadN0.getOperand(1).hasOneUse())
20491
+ LoadN0 = LoadN0.getOperand(1);
20492
+
20493
+ // Check all the uses are valid and can be scalarized. We check that all the
20494
+ // uses are extracts and those extracts are not re-inserted into an
20495
+ // operation best treated as a vector register.
20496
+ auto Load = dyn_cast<LoadSDNode>(LoadN0);
20497
+ if (Load && Load->isSimple() && ISD::isNormalLoad(Load) &&
20498
+ Load->getMemoryVT().isByteSized() &&
20499
+ all_of(N0->uses(), [&](const SDUse &U) {
20500
+ return U.getResNo() != N0.getResNo() ||
20501
+ (U.getUser()->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20502
+ !any_of(U.getUser()->uses(), [](const SDUse &U2) {
20503
+ return U2.getUser()->getOpcode() ==
20504
+ ISD::INSERT_VECTOR_ELT ||
20505
+ U2.getUser()->getOpcode() == ISD::BUILD_VECTOR ||
20506
+ U2.getUser()->getOpcode() == ISD::SCALAR_TO_VECTOR;
20507
+ }));
20508
+ })) {
20509
+
20510
+ SDLoc DL(Load);
20511
+
20512
+ // Generate a new scalar load.
20513
+ unsigned Offset = (OffsetElts + N->getConstantOperandVal(1)) *
20514
+ Load->getValueType(0).getScalarSizeInBits() / 8;
20515
+ SDValue BasePtr = DAG.getObjectPtrOffset(
20516
+ DL, Load->getBasePtr(), DAG.getConstant(Offset, DL, MVT::i64));
20517
+ ISD::LoadExtType ExtType =
20518
+ N0.getOpcode() == ISD::ZERO_EXTEND
20519
+ ? ISD::ZEXTLOAD
20520
+ : (N0.getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD
20521
+ : ISD::EXTLOAD);
20522
+ SDValue ScalarLoad =
20523
+ DAG.getExtLoad(ExtType, DL, VT, Load->getChain(), BasePtr,
20524
+ Load->getPointerInfo().getWithOffset(Offset),
20525
+ Load->getValueType(0).getScalarType(),
20526
+ commonAlignment(Load->getAlign(), Offset),
20527
+ Load->getMemOperand()->getFlags(), Load->getAAInfo());
20528
+ DAG.makeEquivalentMemoryOrdering(Load, ScalarLoad);
20529
+ return ScalarLoad;
20530
+ }
20531
+ }
20532
+
20470
20533
return SDValue();
20471
20534
}
20472
20535
0 commit comments