@@ -1143,6 +1143,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11431143 ISD::SIGN_EXTEND_INREG, ISD::CONCAT_VECTORS,
11441144 ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
11451145 ISD::STORE, ISD::BUILD_VECTOR});
1146+ setTargetDAGCombine(ISD::SMIN);
11461147 setTargetDAGCombine(ISD::TRUNCATE);
11471148 setTargetDAGCombine(ISD::LOAD);
11481149
@@ -20998,23 +20999,18 @@ static SDValue performBuildVectorCombine(SDNode *N,
2099820999
2099921000// A special combine for the sqdmulh family of instructions.
2100021001// smin( sra ( mul( sext v0, sext v1 ) ), SHIFT_AMOUNT ),
21001- // SATURATING_VAL ) can be reduced to sext( sqdmulh(...) )
21002+ // SATURATING_VAL ) can be reduced to sqdmulh(...)
2100221003static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
2100321004
21004- if (N->getOpcode() != ISD::TRUNCATE )
21005+ if (N->getOpcode() != ISD::SMIN )
2100521006 return SDValue();
2100621007
2100721008 EVT VT = N->getValueType(0);
2100821009
2100921010 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
2101021011 return SDValue();
2101121012
21012- SDValue SMin = N->getOperand(0);
21013-
21014- if (SMin.getOpcode() != ISD::SMIN)
21015- return SDValue();
21016-
21017- ConstantSDNode *Clamp = isConstOrConstSplat(SMin.getOperand(1));
21013+ ConstantSDNode *Clamp = isConstOrConstSplat(N->getOperand(1));
2101821014
2101921015 if (!Clamp)
2102021016 return SDValue();
@@ -21034,8 +21030,8 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
2103421030 return SDValue();
2103521031 }
2103621032
21037- SDValue Sra = SMin. getOperand(0);
21038- if (Sra.getOpcode() != ISD::SRA)
21033+ SDValue Sra = N-> getOperand(0);
21034+ if (Sra.getOpcode() != ISD::SRA || !Sra.hasOneUse() )
2103921035 return SDValue();
2104021036
2104121037 ConstantSDNode *RightShiftVec = isConstOrConstSplat(Sra.getOperand(1));
@@ -21062,11 +21058,27 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
2106221058 SExt0Type.getFixedSizeInBits() > 128)
2106321059 return SDValue();
2106421060
21065- SDValue V0 = SExt0.getOperand(0);
21066- SDValue V1 = SExt1.getOperand(0);
21061+ // Source vectors with width < 64 are illegal and will need to be extended
21062+ unsigned SourceVectorWidth = SExt0Type.getFixedSizeInBits();
21063+ SDValue V0 = (SourceVectorWidth < 64) ? SExt0 : SExt0.getOperand(0);
21064+ SDValue V1 = (SourceVectorWidth < 64) ? SExt1 : SExt1.getOperand(0);
21065+
21066+ SDLoc DL(N);
21067+ SDValue SQDMULH =
21068+ DAG.getNode(AArch64ISD::SQDMULH, DL, V0.getValueType(), V0, V1);
21069+ EVT DestVT = N->getValueType(0);
21070+ if (DestVT.getScalarSizeInBits() > SExt0Type.getScalarSizeInBits())
21071+ return DAG.getNode(ISD::SIGN_EXTEND, DL, DestVT, SQDMULH);
21072+
21073+ return SQDMULH;
21074+ }
21075+
21076+ static SDValue performSMINCombine(SDNode *N, SelectionDAG &DAG) {
21077+ if (SDValue V = trySQDMULHCombine(N, DAG)) {
21078+ return V;
21079+ }
2106721080
21068- SDLoc DL(SMin);
21069- return DAG.getNode(AArch64ISD::SQDMULH, DL, SExt0Type, V0, V1);
21081+ return SDValue();
2107021082}
2107121083
2107221084static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
@@ -21083,10 +21095,6 @@ static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
2108321095 return DAG.getNode(N0.getOpcode(), DL, VT, Op);
2108421096 }
2108521097
21086- if (SDValue V = trySQDMULHCombine(N, DAG)) {
21087- return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
21088- }
21089-
2109021098 // Performing the following combine produces a preferable form for ISEL.
2109121099 // i32 (trunc (extract Vi64, idx)) -> i32 (extract (nvcast Vi32), idx*2))
2109221100 if (DCI.isAfterLegalizeDAG() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -26824,6 +26832,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
2682426832 return performAddSubCombine(N, DCI);
2682526833 case ISD::BUILD_VECTOR:
2682626834 return performBuildVectorCombine(N, DCI, DAG);
26835+ case ISD::SMIN:
26836+ return performSMINCombine(N, DAG);
2682726837 case ISD::TRUNCATE:
2682826838 return performTruncateCombine(N, DAG, DCI);
2682926839 case AArch64ISD::ANDS:
0 commit comments