-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Analysis][AArch64] Add cost model for loop.dependence.{war/raw}.mask #167551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1032,6 +1032,40 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, | |
| } | ||
| break; | ||
| } | ||
| case Intrinsic::loop_dependence_raw_mask: | ||
| case Intrinsic::loop_dependence_war_mask: { | ||
| auto *EltSize = cast<ConstantInt>(ICA.getArgs()[2]); | ||
| EVT VecVT = getTLI()->getValueType(DL, RetTy); | ||
| // An invalid element size and return type combination must be expanded. | ||
| bool MustBeExpanded = false; | ||
| switch (EltSize->getSExtValue()) { | ||
| case 1: | ||
| if (VecVT != MVT::v16i1 && VecVT != MVT::nxv16i1) | ||
| MustBeExpanded = true; | ||
| break; | ||
| case 2: | ||
| if (VecVT != MVT::v8i1 && VecVT != MVT::nxv8i1) | ||
| MustBeExpanded = true; | ||
| break; | ||
| case 4: | ||
| if (VecVT != MVT::v4i1 && VecVT != MVT::nxv4i1) | ||
| MustBeExpanded = true; | ||
| break; | ||
| case 8: | ||
| if (VecVT != MVT::v2i1 && VecVT != MVT::nxv2i1) | ||
| MustBeExpanded = true; | ||
| break; | ||
| default: | ||
| MustBeExpanded = true; | ||
| // Other element sizes are incompatible with whilewr/rw, so expand instead | ||
| break; | ||
| } | ||
|
||
|
|
||
| // The whilewr/rw instructions require SVE2 or SME | ||
| if (MustBeExpanded || (!ST->hasSVE2() && !ST->hasSME())) | ||
| break; | ||
| return 1; | ||
| } | ||
| case Intrinsic::experimental_vector_extract_last_active: | ||
| if (ST->isSVEorStreamingSVEAvailable()) { | ||
| auto [LegalCost, _] = getTypeLegalizationCost(ICA.getArgTypes()[0]); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 | ||
| ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-EXPANDED | ||
| ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s --check-prefix=CHECK | ||
| ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+sme | FileCheck %s --check-prefix=CHECK | ||
|
|
||
| ; loop.dependence.{war,raw}.mask can be lowered to while{wr,rw} if SVE2 or SME is enabled. | ||
| define void @loop_dependence_war_mask(ptr %a, ptr %b) { | ||
| ; CHECK-EXPANDED-LABEL: 'loop_dependence_war_mask' | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
| ; | ||
| ; CHECK-LABEL: 'loop_dependence_war_mask' | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
| ; | ||
| entry: | ||
| %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) | ||
| %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) | ||
| %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) | ||
| %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) | ||
| ret void | ||
| } | ||
|
|
||
| define void @loop_dependence_raw_mask(ptr %a, ptr %b) { | ||
| ; CHECK-EXPANDED-LABEL: 'loop_dependence_raw_mask' | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
| ; | ||
| ; CHECK-LABEL: 'loop_dependence_raw_mask' | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
| ; | ||
| entry: | ||
| %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) | ||
| %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) | ||
| %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) | ||
| %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) | ||
| ret void | ||
| } | ||
|
|
||
| ; Invalid element size and return type combinations must be expanded, even with sve2/sme | ||
| define void @loop_dependence_war_mask_invalid(ptr %a, ptr %b) { | ||
| ; CHECK-EXPANDED-LABEL: 'loop_dependence_war_mask_invalid' | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res5 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res6 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res7 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res9 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
| ; | ||
| ; CHECK-LABEL: 'loop_dependence_war_mask_invalid' | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res5 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %res6 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res7 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res9 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
| ; | ||
| entry: | ||
| %res5 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 8) | ||
| %res6 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 4) | ||
| %res7 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 2) | ||
| %res8 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 1) | ||
| %res9 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 10) | ||
| ret void | ||
| } | ||
|
|
||
| define void @loop_dependence_raw_mask_invalid(ptr %a, ptr %b) { | ||
| ; CHECK-EXPANDED-LABEL: 'loop_dependence_raw_mask_invalid' | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %res5 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res6 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res7 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res9 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10) | ||
| ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
| ; | ||
| ; CHECK-LABEL: 'loop_dependence_raw_mask_invalid' | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %res5 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %res6 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res7 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res9 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10) | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
| ; | ||
| entry: | ||
| %res5 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 8) | ||
| %res6 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 4) | ||
| %res7 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 2) | ||
| %res8 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 1) | ||
| %res9 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 10) | ||
| ret void | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a comment that explains why the cost is broken down like this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. I hope that's comprehensive enough.