-
Notifications
You must be signed in to change notification settings - Fork 15k
[AArch64][CostModel] Add constraints on which partial reductions are #163728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5721,6 +5721,38 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost( | |
| return Cost; | ||
| } | ||
|
|
||
| // FIXME: | ||
| // 1. Do cost modelling for USDOT. | ||
| // 2. Refactor the whole code here. | ||
| if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) { | ||
| if (AccumLT.second.getScalarType() == MVT::i32 && | ||
| InputLT.second.getScalarType() == MVT::i16) { | ||
| // i16 -> i32 is supported in SVE 2.1 | ||
| if (ST->hasSVE2p1()) | ||
| return Cost; | ||
| // umlalt + umlalb. Same goes for signed types. | ||
| return Cost + 1; | ||
| } | ||
| if (AccumLT.second.getScalarType() == MVT::i64 && | ||
| InputLT.second.getScalarType() == MVT::i32) | ||
| return Cost + 1; | ||
| } | ||
| if (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() && | ||
| ST->hasDotProd() && !IsUSDot) { | ||
| // umull + umull2 + (2 * uaddw) + (2 * uaddw2). Same goes for signed types. | ||
| if (AccumLT.second.getScalarType() == MVT::i64 && | ||
| InputLT.second.getScalarType() == MVT::i16) | ||
| return Cost + 5; | ||
|
||
| // umlal + umlal2. Same goes for signed types. | ||
| if ((AccumLT.second.getScalarType() == MVT::i32 && | ||
| InputLT.second.getScalarType() == MVT::i16) || | ||
| (AccumLT.second.getScalarType() == MVT::i64 && | ||
| InputLT.second.getScalarType() == MVT::i32)) | ||
| return Cost + 1; | ||
| } | ||
|
|
||
| // FIXME: This should be more expensive for NEON as we see fmov instructions | ||
| // with very low throughput. | ||
| // Add additional cost for the extends that would need to be inserted. | ||
| return Cost + 4; | ||
|
||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is implementing a new requirement that should be in a separate PR. Also, this should be added below line 5712 above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
resolved.