@@ -471,6 +471,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
471471 Features[" setprio-inc-wg-inst" ] = true ;
472472 Features[" atomic-fmin-fmax-global-f32" ] = true ;
473473 Features[" atomic-fmin-fmax-global-f64" ] = true ;
474+ Features[" wavefrontsize32" ] = true ;
474475 break ;
475476 case GK_GFX1201:
476477 case GK_GFX1200:
@@ -638,6 +639,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
638639 Features[" gws" ] = true ;
639640 Features[" vmem-to-lds-load-insts" ] = true ;
640641 Features[" atomic-fmin-fmax-global-f64" ] = true ;
642+ Features[" wavefrontsize64" ] = true ;
641643 break ;
642644 case GK_GFX90A:
643645 Features[" gfx90a-insts" ] = true ;
@@ -681,6 +683,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
681683 Features[" image-insts" ] = true ;
682684 Features[" s-memtime-inst" ] = true ;
683685 Features[" gws" ] = true ;
686+ Features[" wavefrontsize64" ] = true ;
684687 break ;
685688 case GK_GFX705:
686689 case GK_GFX704:
@@ -698,6 +701,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
698701 Features[" gws" ] = true ;
699702 Features[" atomic-fmin-fmax-global-f32" ] = true ;
700703 Features[" atomic-fmin-fmax-global-f64" ] = true ;
704+ Features[" wavefrontsize64" ] = true ;
701705 break ;
702706 case GK_NONE:
703707 break ;
@@ -734,68 +738,37 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
734738 }
735739}
736740
737- static bool isWave32Capable (StringRef GPU, const Triple &T) {
738- bool IsWave32Capable = false ;
739- // XXX - What does the member GPU mean if device name string passed here?
740- if (T.isAMDGCN ()) {
741- switch (parseArchAMDGCN (GPU)) {
742- case GK_GFX1250:
743- case GK_GFX1201:
744- case GK_GFX1200:
745- case GK_GFX1153:
746- case GK_GFX1152:
747- case GK_GFX1151:
748- case GK_GFX1150:
749- case GK_GFX1103:
750- case GK_GFX1102:
751- case GK_GFX1101:
752- case GK_GFX1100:
753- case GK_GFX1036:
754- case GK_GFX1035:
755- case GK_GFX1034:
756- case GK_GFX1033:
757- case GK_GFX1032:
758- case GK_GFX1031:
759- case GK_GFX1030:
760- case GK_GFX1012:
761- case GK_GFX1011:
762- case GK_GFX1013:
763- case GK_GFX1010:
764- case GK_GFX12_GENERIC:
765- case GK_GFX11_GENERIC:
766- case GK_GFX10_3_GENERIC:
767- case GK_GFX10_1_GENERIC:
768- IsWave32Capable = true ;
769- break ;
770- default :
771- break ;
772- }
773- }
774- return IsWave32Capable;
775- }
776-
777741std::pair<FeatureError, StringRef>
778742AMDGPU::insertWaveSizeFeature (StringRef GPU, const Triple &T,
779743 StringMap<bool > &Features) {
780- bool IsWave32Capable = isWave32Capable (GPU, T);
744+ StringMap<bool > DefaultFeatures;
745+ fillAMDGPUFeatureMap (GPU, T, DefaultFeatures);
746+
781747 const bool IsNullGPU = GPU.empty ();
748+ const bool TargetHasWave32 = DefaultFeatures.count (" wavefrontsize32" );
749+ const bool TargetHasWave64 = DefaultFeatures.count (" wavefrontsize64" );
782750 const bool HaveWave32 = Features.count (" wavefrontsize32" );
783751 const bool HaveWave64 = Features.count (" wavefrontsize64" );
784752 if (HaveWave32 && HaveWave64) {
785753 return {AMDGPU::INVALID_FEATURE_COMBINATION,
786754 " 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive" };
787755 }
788- if (HaveWave32 && !IsNullGPU && !IsWave32Capable ) {
756+ if (HaveWave32 && !IsNullGPU && TargetHasWave64 ) {
789757 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, " wavefrontsize32" };
790758 }
759+ if (HaveWave64 && !IsNullGPU && TargetHasWave32) {
760+ return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, " wavefrontsize64" };
761+ }
791762 // Don't assume any wavesize with an unknown subtarget.
792- if (!IsNullGPU) {
793- // Default to wave32 if available, or wave64 if not
794- if (!HaveWave32 && !HaveWave64) {
795- StringRef DefaultWaveSizeFeature =
796- IsWave32Capable ? " wavefrontsize32" : " wavefrontsize64" ;
797- Features.insert (std::make_pair (DefaultWaveSizeFeature, true ));
798- }
763+ // Default to wave32 if target supports both.
764+ if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 &&
765+ !TargetHasWave64)
766+ Features.insert (std::make_pair (" wavefrontsize32" , true ));
767+
768+ for (const auto &Entry : DefaultFeatures) {
769+ if (!Features.count (Entry.getKey ()))
770+ Features[Entry.getKey ()] = Entry.getValue ();
799771 }
772+
800773 return {NO_ERROR, StringRef ()};
801774}
0 commit comments