@@ -894,6 +894,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
894894 ISD::UADDO_CARRY,
895895 ISD::SUB,
896896 ISD::USUBO_CARRY,
897+ ISD::MUL,
897898 ISD::FADD,
898899 ISD::FSUB,
899900 ISD::FDIV,
@@ -909,9 +910,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
909910 ISD::UMIN,
910911 ISD::UMAX,
911912 ISD::SETCC,
913+ ISD::SELECT,
914+ ISD::SMIN,
915+ ISD::SMAX,
916+ ISD::UMIN,
917+ ISD::UMAX,
912918 ISD::AND,
913919 ISD::OR,
914920 ISD::XOR,
921+ ISD::SHL,
922+ ISD::SRL,
923+ ISD::SRA,
915924 ISD::FSHR,
916925 ISD::SINT_TO_FP,
917926 ISD::UINT_TO_FP,
@@ -1942,13 +1951,6 @@ bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
19421951 switch (Op) {
19431952 case ISD::LOAD:
19441953 case ISD::STORE:
1945-
1946- // These operations are done with 32-bit instructions anyway.
1947- case ISD::AND:
1948- case ISD::OR:
1949- case ISD::XOR:
1950- case ISD::SELECT:
1951- // TODO: Extensions?
19521954 return true;
19531955 default:
19541956 return false;
@@ -6731,6 +6733,93 @@ SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
67316733 return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(0), TruncExp);
67326734}
67336735
6736+ static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
6737+ switch (Op->getOpcode()) {
6738+ case ISD::SRA:
6739+ case ISD::SMIN:
6740+ case ISD::SMAX:
6741+ return ISD::SIGN_EXTEND;
6742+ case ISD::SRL:
6743+ case ISD::UMIN:
6744+ case ISD::UMAX:
6745+ return ISD::ZERO_EXTEND;
6746+ case ISD::ADD:
6747+ case ISD::SUB:
6748+ case ISD::AND:
6749+ case ISD::OR:
6750+ case ISD::XOR:
6751+ case ISD::SHL:
6752+ case ISD::SELECT:
6753+ case ISD::MUL:
6754+ // operation result won't be influenced by garbage high bits.
6755+ // TODO: are all of those cases correct, and are there more?
6756+ return ISD::ANY_EXTEND;
6757+ case ISD::SETCC: {
6758+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6759+ return ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6760+ }
6761+ default:
6762+ llvm_unreachable("unexpected opcode!");
6763+ }
6764+ }
6765+
6766+ SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
6767+ DAGCombinerInfo &DCI) const {
6768+ const unsigned Opc = Op.getOpcode();
6769+ assert(Opc == ISD::ADD || Opc == ISD::SUB || Opc == ISD::SHL ||
6770+ Opc == ISD::SRL || Opc == ISD::SRA || Opc == ISD::AND ||
6771+ Opc == ISD::OR || Opc == ISD::XOR || Opc == ISD::MUL ||
6772+ Opc == ISD::SETCC || Opc == ISD::SELECT || Opc == ISD::SMIN ||
6773+ Opc == ISD::SMAX || Opc == ISD::UMIN || Opc == ISD::UMAX);
6774+
6775+ EVT OpTy = (Opc != ISD::SETCC) ? Op.getValueType()
6776+ : Op->getOperand(0).getValueType();
6777+ auto ExtTy = OpTy.changeElementType(MVT::i32);
6778+
6779+ if (DCI.isBeforeLegalizeOps() ||
6780+ isNarrowingProfitable(Op.getNode(), ExtTy, OpTy))
6781+ return SDValue();
6782+
6783+ auto &DAG = DCI.DAG;
6784+
6785+ SDLoc DL(Op);
6786+ SDValue LHS;
6787+ SDValue RHS;
6788+ if (Opc == ISD::SELECT) {
6789+ LHS = Op->getOperand(1);
6790+ RHS = Op->getOperand(2);
6791+ } else {
6792+ LHS = Op->getOperand(0);
6793+ RHS = Op->getOperand(1);
6794+ }
6795+
6796+ const unsigned ExtOp = getExtOpcodeForPromotedOp(Op);
6797+ LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS});
6798+
6799+ // Special case: for shifts, the RHS always needs a zext.
6800+ if (Op.getOpcode() == ISD::SRA || Op.getOpcode() == ISD::SRL ||
6801+ Op.getOpcode() == ISD::SRA)
6802+ RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS});
6803+ else
6804+ RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});
6805+
6806+ // setcc always return i1/i1 vec so no need to truncate after.
6807+ if (Opc == ISD::SETCC) {
6808+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6809+ return DAG.getSetCC(DL, Op.getValueType(), LHS, RHS, CC);
6810+ }
6811+
6812+ // For other ops, we extend the operation's return type as well so we need to
6813+ // truncate back to the original type.
6814+ SDValue NewVal;
6815+ if (Opc == ISD::SELECT)
6816+ NewVal = DAG.getNode(ISD::SELECT, DL, ExtTy, {Op->getOperand(0), LHS, RHS});
6817+ else
6818+ NewVal = DAG.getNode(Opc, DL, ExtTy, {LHS, RHS});
6819+
6820+ return DAG.getZExtOrTrunc(NewVal, DL, OpTy);
6821+ }
6822+
67346823// Custom lowering for vector multiplications and s_mul_u64.
67356824SDValue SITargetLowering::lowerMUL(SDValue Op, SelectionDAG &DAG) const {
67366825 EVT VT = Op.getValueType();
@@ -14623,8 +14712,32 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
1462314712
1462414713SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1462514714 DAGCombinerInfo &DCI) const {
14715+ switch (N->getOpcode()) {
14716+ case ISD::ADD:
14717+ case ISD::SUB:
14718+ case ISD::SHL:
14719+ case ISD::SRL:
14720+ case ISD::SRA:
14721+ case ISD::AND:
14722+ case ISD::OR:
14723+ case ISD::XOR:
14724+ case ISD::MUL:
14725+ case ISD::SETCC:
14726+ case ISD::SELECT:
14727+ case ISD::SMIN:
14728+ case ISD::SMAX:
14729+ case ISD::UMIN:
14730+ case ISD::UMAX:
14731+ if (auto Res = promoteUniformOpToI32(SDValue(N, 0), DCI))
14732+ return Res;
14733+ break;
14734+ default:
14735+ break;
14736+ }
14737+
1462614738 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
1462714739 return SDValue();
14740+
1462814741 switch (N->getOpcode()) {
1462914742 case ISD::ADD:
1463014743 return performAddCombine(N, DCI);
0 commit comments