@@ -471,6 +471,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
471
471
Features[" setprio-inc-wg-inst" ] = true ;
472
472
Features[" atomic-fmin-fmax-global-f32" ] = true ;
473
473
Features[" atomic-fmin-fmax-global-f64" ] = true ;
474
+ Features[" wavefrontsize32" ] = true ;
474
475
break ;
475
476
case GK_GFX1201:
476
477
case GK_GFX1200:
@@ -638,6 +639,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
638
639
Features[" gws" ] = true ;
639
640
Features[" vmem-to-lds-load-insts" ] = true ;
640
641
Features[" atomic-fmin-fmax-global-f64" ] = true ;
642
+ Features[" wavefrontsize64" ] = true ;
641
643
break ;
642
644
case GK_GFX90A:
643
645
Features[" gfx90a-insts" ] = true ;
@@ -681,6 +683,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
681
683
Features[" image-insts" ] = true ;
682
684
Features[" s-memtime-inst" ] = true ;
683
685
Features[" gws" ] = true ;
686
+ Features[" wavefrontsize64" ] = true ;
684
687
break ;
685
688
case GK_GFX705:
686
689
case GK_GFX704:
@@ -698,6 +701,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
698
701
Features[" gws" ] = true ;
699
702
Features[" atomic-fmin-fmax-global-f32" ] = true ;
700
703
Features[" atomic-fmin-fmax-global-f64" ] = true ;
704
+ Features[" wavefrontsize64" ] = true ;
701
705
break ;
702
706
case GK_NONE:
703
707
break ;
@@ -734,68 +738,37 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
734
738
}
735
739
}
736
740
737
- static bool isWave32Capable (StringRef GPU, const Triple &T) {
738
- bool IsWave32Capable = false ;
739
- // XXX - What does the member GPU mean if device name string passed here?
740
- if (T.isAMDGCN ()) {
741
- switch (parseArchAMDGCN (GPU)) {
742
- case GK_GFX1250:
743
- case GK_GFX1201:
744
- case GK_GFX1200:
745
- case GK_GFX1153:
746
- case GK_GFX1152:
747
- case GK_GFX1151:
748
- case GK_GFX1150:
749
- case GK_GFX1103:
750
- case GK_GFX1102:
751
- case GK_GFX1101:
752
- case GK_GFX1100:
753
- case GK_GFX1036:
754
- case GK_GFX1035:
755
- case GK_GFX1034:
756
- case GK_GFX1033:
757
- case GK_GFX1032:
758
- case GK_GFX1031:
759
- case GK_GFX1030:
760
- case GK_GFX1012:
761
- case GK_GFX1011:
762
- case GK_GFX1013:
763
- case GK_GFX1010:
764
- case GK_GFX12_GENERIC:
765
- case GK_GFX11_GENERIC:
766
- case GK_GFX10_3_GENERIC:
767
- case GK_GFX10_1_GENERIC:
768
- IsWave32Capable = true ;
769
- break ;
770
- default :
771
- break ;
772
- }
773
- }
774
- return IsWave32Capable;
775
- }
776
-
777
741
std::pair<FeatureError, StringRef>
778
742
AMDGPU::insertWaveSizeFeature (StringRef GPU, const Triple &T,
779
743
StringMap<bool > &Features) {
780
- bool IsWave32Capable = isWave32Capable (GPU, T);
744
+ StringMap<bool > DefaultFeatures;
745
+ fillAMDGPUFeatureMap (GPU, T, DefaultFeatures);
746
+
781
747
const bool IsNullGPU = GPU.empty ();
748
+ const bool TargetHasWave32 = DefaultFeatures.count (" wavefrontsize32" );
749
+ const bool TargetHasWave64 = DefaultFeatures.count (" wavefrontsize64" );
782
750
const bool HaveWave32 = Features.count (" wavefrontsize32" );
783
751
const bool HaveWave64 = Features.count (" wavefrontsize64" );
784
752
if (HaveWave32 && HaveWave64) {
785
753
return {AMDGPU::INVALID_FEATURE_COMBINATION,
786
754
" 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive" };
787
755
}
788
- if (HaveWave32 && !IsNullGPU && !IsWave32Capable ) {
756
+ if (HaveWave32 && !IsNullGPU && TargetHasWave64 ) {
789
757
return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, " wavefrontsize32" };
790
758
}
759
+ if (HaveWave64 && !IsNullGPU && TargetHasWave32) {
760
+ return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, " wavefrontsize64" };
761
+ }
791
762
// Don't assume any wavesize with an unknown subtarget.
792
- if (!IsNullGPU) {
793
- // Default to wave32 if available, or wave64 if not
794
- if (!HaveWave32 && !HaveWave64) {
795
- StringRef DefaultWaveSizeFeature =
796
- IsWave32Capable ? " wavefrontsize32" : " wavefrontsize64" ;
797
- Features.insert (std::make_pair (DefaultWaveSizeFeature, true ));
798
- }
763
+ // Default to wave32 if target supports both.
764
+ if (!IsNullGPU && !HaveWave32 && !HaveWave64 && !TargetHasWave32 &&
765
+ !TargetHasWave64)
766
+ Features.insert (std::make_pair (" wavefrontsize32" , true ));
767
+
768
+ for (const auto &Entry : DefaultFeatures) {
769
+ if (!Features.count (Entry.getKey ()))
770
+ Features[Entry.getKey ()] = Entry.getValue ();
799
771
}
772
+
800
773
return {NO_ERROR, StringRef ()};
801
774
}
0 commit comments