18
18
#include " gcenv.structs.h"
19
19
#include " gcenv.base.h"
20
20
#include " gcenv.os.h"
21
+ #include " gcenv.ee.h"
21
22
#include " gcenv.unix.inl"
22
23
#include " volatile.h"
24
+ #include " gcconfig.h"
23
25
#include " numasupport.h"
24
26
25
27
#if HAVE_SWAPCTL
@@ -792,101 +794,125 @@ bool ReadMemoryValueFromFile(const char* filename, uint64_t* val)
792
794
return result;
793
795
}
794
796
795
- #define UPDATE_CACHE_SIZE_AND_LEVEL (NEW_CACHE_SIZE, NEW_CACHE_LEVEL ) if (NEW_CACHE_SIZE > ((long )cacheSize)) { cacheSize = NEW_CACHE_SIZE; cacheLevel = NEW_CACHE_LEVEL; }
796
797
797
- static size_t GetLogicalProcessorCacheSizeFromOS ( )
798
+ static void GetLogicalProcessorCacheSizeFromSysConf ( size_t * cacheLevel, size_t * cacheSize )
798
799
{
799
- size_t cacheLevel = 0 ;
800
- size_t cacheSize = 0 ;
801
- long size;
800
+ assert (cacheLevel != nullptr );
801
+ assert (cacheSize != nullptr );
802
802
803
- // sysconf can return -1 if the cache size is unavailable in some distributions and 0 in others.
804
- // UPDATE_CACHE_SIZE_AND_LEVEL should handle both the cases by not updating cacheSize if either of cases are met.
805
- #ifdef _SC_LEVEL1_DCACHE_SIZE
806
- size = sysconf (_SC_LEVEL1_DCACHE_SIZE);
807
- UPDATE_CACHE_SIZE_AND_LEVEL (size, 1 )
808
- #endif
809
- #ifdef _SC_LEVEL2_CACHE_SIZE
810
- size = sysconf (_SC_LEVEL2_CACHE_SIZE);
811
- UPDATE_CACHE_SIZE_AND_LEVEL (size, 2 )
812
- #endif
813
- #ifdef _SC_LEVEL3_CACHE_SIZE
814
- size = sysconf (_SC_LEVEL3_CACHE_SIZE);
815
- UPDATE_CACHE_SIZE_AND_LEVEL (size, 3 )
816
- #endif
817
- #ifdef _SC_LEVEL4_CACHE_SIZE
818
- size = sysconf (_SC_LEVEL4_CACHE_SIZE);
819
- UPDATE_CACHE_SIZE_AND_LEVEL (size, 4 )
820
- #endif
803
+ #if defined(_SC_LEVEL1_DCACHE_SIZE) || defined(_SC_LEVEL2_CACHE_SIZE) || defined(_SC_LEVEL3_CACHE_SIZE) || defined(_SC_LEVEL4_CACHE_SIZE)
804
+ const int cacheLevelNames[] =
805
+ {
806
+ _SC_LEVEL1_DCACHE_SIZE,
807
+ _SC_LEVEL2_CACHE_SIZE,
808
+ _SC_LEVEL3_CACHE_SIZE,
809
+ _SC_LEVEL4_CACHE_SIZE,
810
+ };
821
811
822
- #if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
823
- if (cacheSize == 0 )
812
+ for (int i = ARRAY_SIZE (cacheLevelNames) - 1 ; i >= 0 ; i--)
824
813
{
825
- //
826
- // Fallback to retrieve cachesize via /sys/.. if sysconf was not available
827
- // for the platform. Currently musl and arm64 should be only cases to use
828
- // this method to determine cache size.
829
- //
830
- size_t level;
831
- char path_to_size_file[] = " /sys/devices/system/cpu/cpu0/cache/index-/size" ;
832
- char path_to_level_file[] = " /sys/devices/system/cpu/cpu0/cache/index-/level" ;
833
- int index = 40 ;
834
- assert (path_to_size_file[index] == ' -' );
835
- assert (path_to_level_file[index] == ' -' );
836
-
837
- for (int i = 0 ; i < 5 ; i++)
814
+ long size = sysconf (cacheLevelNames[i]);
815
+ if (size > 0 )
838
816
{
839
- path_to_size_file[index] = (char )(48 + i);
817
+ *cacheSize = (size_t )size;
818
+ *cacheLevel = i + 1 ;
819
+ break ;
820
+ }
821
+ }
822
+ #endif
823
+ }
840
824
841
- uint64_t cache_size_from_sys_file = 0 ;
825
+ static void GetLogicalProcessorCacheSizeFromSysFs (size_t * cacheLevel, size_t * cacheSize)
826
+ {
827
+ assert (cacheLevel != nullptr );
828
+ assert (cacheSize != nullptr );
842
829
843
- if (ReadMemoryValueFromFile (path_to_size_file, &cache_size_from_sys_file))
844
- {
845
- // uint64_t to long conversion as ReadMemoryValueFromFile takes a uint64_t* as an argument for the val argument.
846
- size = (long )cache_size_from_sys_file;
847
- path_to_level_file[index] = (char )(48 + i);
830
+ #if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
831
+ //
832
+ // Retrieve cachesize via sysfs by reading the file /sys/devices/system/cpu/cpu0/cache/index{LastLevelCache}/size
833
+ // for the platform. Currently musl and arm64 should be only cases to use
834
+ // this method to determine cache size.
835
+ //
836
+ size_t level;
837
+ char path_to_size_file[] = " /sys/devices/system/cpu/cpu0/cache/index-/size" ;
838
+ char path_to_level_file[] = " /sys/devices/system/cpu/cpu0/cache/index-/level" ;
839
+ int index = 40 ;
840
+ assert (path_to_size_file[index] == ' -' );
841
+ assert (path_to_level_file[index] == ' -' );
842
+
843
+ for (int i = 0 ; i < 5 ; i++)
844
+ {
845
+ path_to_size_file[index] = (char )(48 + i);
848
846
849
- if (ReadMemoryValueFromFile (path_to_level_file, &level))
850
- {
851
- UPDATE_CACHE_SIZE_AND_LEVEL (size, level)
852
- }
847
+ uint64_t cache_size_from_sys_file = 0 ;
853
848
854
- else
855
- {
856
- cacheSize = std::max ((long )cacheSize, size);
857
- }
849
+ if (ReadMemoryValueFromFile (path_to_size_file, &cache_size_from_sys_file))
850
+ {
851
+ *cacheSize = std::max (*cacheSize, (size_t )cache_size_from_sys_file);
852
+
853
+ path_to_level_file[index] = (char )(48 + i);
854
+ if (ReadMemoryValueFromFile (path_to_level_file, &level))
855
+ {
856
+ *cacheLevel = level;
858
857
}
859
858
}
860
859
}
861
- #endif
860
+ #endif
861
+ }
862
862
863
- #if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_APPLE)
864
- if (cacheSize == 0 )
863
+ static void GetLogicalProcessorCacheSizeFromHeuristic (size_t * cacheLevel, size_t * cacheSize)
864
+ {
865
+ assert (cacheLevel != nullptr );
866
+ assert (cacheSize != nullptr );
867
+
868
+ #if (defined(TARGET_LINUX) && !defined(TARGET_APPLE))
865
869
{
866
- // We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
867
- // from most of the machines.
868
- //
869
- // _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
870
- //
871
- // /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems.
872
- // Arm64 patch is in Linux kernel tip.
873
- //
874
- // midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
875
- // but without an exhaustive list of ARM64 processors any decode of midr_el1
876
- // Would likely be incomplete
877
-
878
- // Published information on ARM64 architectures is limited.
879
- // If we use recent high core count chips as a guide for state of the art, we find
880
- // total L3 cache to be 1-2MB/core. As always, there are exceptions.
881
-
882
- // Estimate cache size based on CPU count
883
- // Assume lower core count are lighter weight parts which are likely to have smaller caches
884
- // Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs
870
+ // Use the following heuristics at best depending on the CPU count
871
+ // 1 ~ 4 : 4 MB
872
+ // 5 ~ 16 : 8 MB
873
+ // 17 ~ 64 : 16 MB
874
+ // 65+ : 32 MB
885
875
DWORD logicalCPUs = g_processAffinitySet.Count ();
876
+ if (logicalCPUs < 5 )
877
+ {
878
+ *cacheSize = 4 ;
879
+ }
880
+ else if (logicalCPUs < 17 )
881
+ {
882
+ *cacheSize = 8 ;
883
+ }
884
+ else if (logicalCPUs < 65 )
885
+ {
886
+ *cacheSize = 16 ;
887
+ }
888
+ else
889
+ {
890
+ *cacheSize = 32 ;
891
+ }
886
892
887
- cacheSize = logicalCPUs * std::min ( 1536 , std::max ( 256 , ( int )logicalCPUs * 128 )) * 1024 ;
893
+ * cacheSize *= ( 1024 * 1024 ) ;
888
894
}
889
895
#endif
896
+ }
897
+
898
+ static size_t GetLogicalProcessorCacheSizeFromOS ()
899
+ {
900
+ size_t cacheLevel = 0 ;
901
+ size_t cacheSize = 0 ;
902
+
903
+ if (GCConfig::GetGCCacheSizeFromSysConf ())
904
+ {
905
+ GetLogicalProcessorCacheSizeFromSysConf (&cacheLevel, &cacheSize);
906
+ }
907
+
908
+ if (cacheSize == 0 )
909
+ {
910
+ GetLogicalProcessorCacheSizeFromSysFs (&cacheLevel, &cacheSize);
911
+ if (cacheSize == 0 )
912
+ {
913
+ GetLogicalProcessorCacheSizeFromHeuristic (&cacheLevel, &cacheSize);
914
+ }
915
+ }
890
916
891
917
#if HAVE_SYSCTLBYNAME
892
918
if (cacheSize == 0 )
@@ -905,40 +931,15 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
905
931
if (success)
906
932
{
907
933
assert (cacheSizeFromSysctl > 0 );
908
- cacheSize = ( size_t ) cacheSizeFromSysctl;
934
+ cacheSize = (size_t ) cacheSizeFromSysctl;
909
935
}
910
936
}
911
937
#endif
912
938
913
939
#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_APPLE)
914
940
if (cacheLevel != 3 )
915
941
{
916
- // We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
917
- // from most of the machines.
918
- // Hence, just use the following heuristics at best depending on the CPU count
919
- // 1 ~ 4 : 4 MB
920
- // 5 ~ 16 : 8 MB
921
- // 17 ~ 64 : 16 MB
922
- // 65+ : 32 MB
923
- DWORD logicalCPUs = g_processAffinitySet.Count ();
924
- if (logicalCPUs < 5 )
925
- {
926
- cacheSize = 4 ;
927
- }
928
- else if (logicalCPUs < 17 )
929
- {
930
- cacheSize = 8 ;
931
- }
932
- else if (logicalCPUs < 65 )
933
- {
934
- cacheSize = 16 ;
935
- }
936
- else
937
- {
938
- cacheSize = 32 ;
939
- }
940
-
941
- cacheSize *= (1024 * 1024 );
942
+ GetLogicalProcessorCacheSizeFromHeuristic (&cacheLevel, &cacheSize);
942
943
}
943
944
#endif
944
945
0 commit comments