-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Refactor insertWaveSizeFeature #154850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
rampitec
merged 3 commits into
main
from
users/rampitec/08-21-_amdgpu_refactor_insertwavesizefeature
Aug 27, 2025
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,10 @@ | ||
| // RUN: not %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s | ||
| // RUN: not %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s | ||
| // RUN: not %clang_cc1 -triple amdgcn -target-cpu gfx900 -target-feature +wavefrontsize32 -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=GFX9 | ||
| // RUN: not %clang_cc1 -triple amdgcn -target-cpu gfx1250 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=GFX1250 | ||
|
|
||
| // CHECK: error: invalid feature combination: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive | ||
| // GFX9: error: option 'wavefrontsize32' cannot be specified on this target | ||
| // GFX1250: error: option 'wavefrontsize64' cannot be specified on this target | ||
|
|
||
| kernel void test() {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -471,6 +471,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, | |
| Features["setprio-inc-wg-inst"] = true; | ||
| Features["atomic-fmin-fmax-global-f32"] = true; | ||
| Features["atomic-fmin-fmax-global-f64"] = true; | ||
| Features["wavefrontsize32"] = true; | ||
| break; | ||
| case GK_GFX1201: | ||
| case GK_GFX1200: | ||
|
|
@@ -638,6 +639,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, | |
| Features["gws"] = true; | ||
| Features["vmem-to-lds-load-insts"] = true; | ||
| Features["atomic-fmin-fmax-global-f64"] = true; | ||
| Features["wavefrontsize64"] = true; | ||
| break; | ||
| case GK_GFX90A: | ||
| Features["gfx90a-insts"] = true; | ||
|
|
@@ -681,6 +683,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, | |
| Features["image-insts"] = true; | ||
| Features["s-memtime-inst"] = true; | ||
| Features["gws"] = true; | ||
| Features["wavefrontsize64"] = true; | ||
| break; | ||
| case GK_GFX705: | ||
| case GK_GFX704: | ||
|
|
@@ -698,6 +701,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, | |
| Features["gws"] = true; | ||
| Features["atomic-fmin-fmax-global-f32"] = true; | ||
| Features["atomic-fmin-fmax-global-f64"] = true; | ||
| Features["wavefrontsize64"] = true; | ||
| break; | ||
| case GK_NONE: | ||
| break; | ||
|
|
@@ -734,68 +738,37 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, | |
| } | ||
| } | ||
|
|
||
| static bool isWave32Capable(StringRef GPU, const Triple &T) { | ||
| bool IsWave32Capable = false; | ||
| // XXX - What does the member GPU mean if device name string passed here? | ||
| if (T.isAMDGCN()) { | ||
| switch (parseArchAMDGCN(GPU)) { | ||
| case GK_GFX1250: | ||
| case GK_GFX1201: | ||
| case GK_GFX1200: | ||
| case GK_GFX1153: | ||
| case GK_GFX1152: | ||
| case GK_GFX1151: | ||
| case GK_GFX1150: | ||
| case GK_GFX1103: | ||
| case GK_GFX1102: | ||
| case GK_GFX1101: | ||
| case GK_GFX1100: | ||
| case GK_GFX1036: | ||
| case GK_GFX1035: | ||
| case GK_GFX1034: | ||
| case GK_GFX1033: | ||
| case GK_GFX1032: | ||
| case GK_GFX1031: | ||
| case GK_GFX1030: | ||
| case GK_GFX1012: | ||
| case GK_GFX1011: | ||
| case GK_GFX1013: | ||
| case GK_GFX1010: | ||
| case GK_GFX12_GENERIC: | ||
| case GK_GFX11_GENERIC: | ||
| case GK_GFX10_3_GENERIC: | ||
| case GK_GFX10_1_GENERIC: | ||
| IsWave32Capable = true; | ||
| break; | ||
| default: | ||
| break; | ||
| } | ||
| } | ||
| return IsWave32Capable; | ||
| } | ||
|
|
||
| std::pair<FeatureError, StringRef> | ||
| AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T, | ||
| StringMap<bool> &Features) { | ||
| bool IsWave32Capable = isWave32Capable(GPU, T); | ||
| StringMap<bool> DefaultFeatures; | ||
| fillAMDGPUFeatureMap(GPU, T, DefaultFeatures); | ||
|
|
||
| const bool IsNullGPU = GPU.empty(); | ||
| const bool TargetHasWave32 = DefaultFeatures.count("wavefrontsize32"); | ||
| const bool TargetHasWave64 = DefaultFeatures.count("wavefrontsize64"); | ||
| const bool HaveWave32 = Features.count("wavefrontsize32"); | ||
| const bool HaveWave64 = Features.count("wavefrontsize64"); | ||
| if (HaveWave32 && HaveWave64) { | ||
| return {AMDGPU::INVALID_FEATURE_COMBINATION, | ||
| "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"}; | ||
| } | ||
| if (HaveWave32 && !IsNullGPU && !IsWave32Capable) { | ||
| if (HaveWave32 && !IsNullGPU && TargetHasWave64) { | ||
| return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize32"}; | ||
| } | ||
| if (HaveWave64 && !IsNullGPU && TargetHasWave32) { | ||
| return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "wavefrontsize64"}; | ||
| } | ||
| // Don't assume any wavesize with an unknown subtarget. | ||
| if (!IsNullGPU) { | ||
| // Default to wave32 if available, or wave64 if not | ||
| if (!HaveWave32 && !HaveWave64) { | ||
| StringRef DefaultWaveSizeFeature = | ||
| IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64"; | ||
| Features.insert(std::make_pair(DefaultWaveSizeFeature, true)); | ||
| } | ||
| // Default to wave32 if target supports both. | ||
| if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 && | ||
| !TargetHasWave64) | ||
| Features.insert(std::make_pair("wavefrontsize32", true)); | ||
|
|
||
| for (const auto &Entry : DefaultFeatures) { | ||
| if (!Features.count(Entry.getKey())) | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wish there is a method to detach entry from one map and add it to another, but I didn't find one. |
||
| Features[Entry.getKey()] = Entry.getValue(); | ||
| } | ||
|
|
||
| return {NO_ERROR, StringRef()}; | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am a bit concerned moving fillAMDGPUFeatureMap from here to insertWaveSizeFeature. Since we are supposed to initialize the feature map with the target defaults, then override them with command line options.
How about we keep fillAMDGPUFeatureMap here and make a copy of default target features and pass it to insertWaveSizeFeature as an extra argument. In insertWaveSizeFeature, we only use the default target feature for checking target support of wave32/64.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check the next #155222. Here I have only done what you have asked for. There I have refactored the whole thing.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you want I can squash both into a single commit, but I thought that way it is better reviewable.