Skip to content

Commit 3989aac

Browse files
authored
Fix an issue with sysconf returning the wrong last level cache values on Linux running on certain AMD Processors (#109567)
* Added fix back for Last Level Cache for Unix * Fix up build related issues
1 parent f569844 commit 3989aac

File tree

3 files changed

+108
-104
lines changed

3 files changed

+108
-104
lines changed

src/coreclr/gc/gcconfig.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,9 @@ class GCConfigStringHolder
139139
INT_CONFIG (GCWriteBarrier, "GCWriteBarrier", NULL, 0, "Specifies whether GC should use more precise but slower write barrier") \
140140
STRING_CONFIG(GCName, "GCName", "System.GC.Name", "Specifies the path of the standalone GC implementation.") \
141141
INT_CONFIG (GCSpinCountUnit, "GCSpinCountUnit", 0, 0, "Specifies the spin count unit used by the GC.") \
142-
INT_CONFIG (GCDynamicAdaptationMode, "GCDynamicAdaptationMode", "System.GC.DynamicAdaptationMode", 0, "Enable the GC to dynamically adapt to application sizes.")
142+
INT_CONFIG (GCDynamicAdaptationMode, "GCDynamicAdaptationMode", "System.GC.DynamicAdaptationMode", 0, "Enable the GC to dynamically adapt to application sizes.") \
143+
BOOL_CONFIG (GCCacheSizeFromSysConf, "GCCacheSizeFromSysConf", NULL, false, "Specifies using sysconf to retrieve the last level cache size for Unix.")
144+
143145
// This class is responsible for retreiving configuration information
144146
// for how the GC should operate.
145147
class GCConfig

src/coreclr/gc/unix/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
set(CMAKE_INCLUDE_CURRENT_DIR ON)
22
include_directories("../env")
3+
include_directories("..")
34

45
include(configure.cmake)
56

src/coreclr/gc/unix/gcenv.unix.cpp

Lines changed: 104 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
#include "gcenv.structs.h"
1919
#include "gcenv.base.h"
2020
#include "gcenv.os.h"
21+
#include "gcenv.ee.h"
2122
#include "gcenv.unix.inl"
2223
#include "volatile.h"
24+
#include "gcconfig.h"
2325
#include "numasupport.h"
2426

2527
#if HAVE_SWAPCTL
@@ -792,101 +794,125 @@ bool ReadMemoryValueFromFile(const char* filename, uint64_t* val)
792794
return result;
793795
}
794796

795-
#define UPDATE_CACHE_SIZE_AND_LEVEL(NEW_CACHE_SIZE, NEW_CACHE_LEVEL) if (NEW_CACHE_SIZE > ((long)cacheSize)) { cacheSize = NEW_CACHE_SIZE; cacheLevel = NEW_CACHE_LEVEL; }
796797

797-
static size_t GetLogicalProcessorCacheSizeFromOS()
798+
static void GetLogicalProcessorCacheSizeFromSysConf(size_t* cacheLevel, size_t* cacheSize)
798799
{
799-
size_t cacheLevel = 0;
800-
size_t cacheSize = 0;
801-
long size;
800+
assert (cacheLevel != nullptr);
801+
assert (cacheSize != nullptr);
802802

803-
// sysconf can return -1 if the cache size is unavailable in some distributions and 0 in others.
804-
// UPDATE_CACHE_SIZE_AND_LEVEL should handle both the cases by not updating cacheSize if either of cases are met.
805-
#ifdef _SC_LEVEL1_DCACHE_SIZE
806-
size = sysconf(_SC_LEVEL1_DCACHE_SIZE);
807-
UPDATE_CACHE_SIZE_AND_LEVEL(size, 1)
808-
#endif
809-
#ifdef _SC_LEVEL2_CACHE_SIZE
810-
size = sysconf(_SC_LEVEL2_CACHE_SIZE);
811-
UPDATE_CACHE_SIZE_AND_LEVEL(size, 2)
812-
#endif
813-
#ifdef _SC_LEVEL3_CACHE_SIZE
814-
size = sysconf(_SC_LEVEL3_CACHE_SIZE);
815-
UPDATE_CACHE_SIZE_AND_LEVEL(size, 3)
816-
#endif
817-
#ifdef _SC_LEVEL4_CACHE_SIZE
818-
size = sysconf(_SC_LEVEL4_CACHE_SIZE);
819-
UPDATE_CACHE_SIZE_AND_LEVEL(size, 4)
820-
#endif
803+
#if defined(_SC_LEVEL1_DCACHE_SIZE) || defined(_SC_LEVEL2_CACHE_SIZE) || defined(_SC_LEVEL3_CACHE_SIZE) || defined(_SC_LEVEL4_CACHE_SIZE)
804+
const int cacheLevelNames[] =
805+
{
806+
_SC_LEVEL1_DCACHE_SIZE,
807+
_SC_LEVEL2_CACHE_SIZE,
808+
_SC_LEVEL3_CACHE_SIZE,
809+
_SC_LEVEL4_CACHE_SIZE,
810+
};
821811

822-
#if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
823-
if (cacheSize == 0)
812+
for (int i = ARRAY_SIZE(cacheLevelNames) - 1; i >= 0; i--)
824813
{
825-
//
826-
// Fallback to retrieve cachesize via /sys/.. if sysconf was not available
827-
// for the platform. Currently musl and arm64 should be only cases to use
828-
// this method to determine cache size.
829-
//
830-
size_t level;
831-
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
832-
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
833-
int index = 40;
834-
assert(path_to_size_file[index] == '-');
835-
assert(path_to_level_file[index] == '-');
836-
837-
for (int i = 0; i < 5; i++)
814+
long size = sysconf(cacheLevelNames[i]);
815+
if (size > 0)
838816
{
839-
path_to_size_file[index] = (char)(48 + i);
817+
*cacheSize = (size_t)size;
818+
*cacheLevel = i + 1;
819+
break;
820+
}
821+
}
822+
#endif
823+
}
840824

841-
uint64_t cache_size_from_sys_file = 0;
825+
static void GetLogicalProcessorCacheSizeFromSysFs(size_t* cacheLevel, size_t* cacheSize)
826+
{
827+
assert (cacheLevel != nullptr);
828+
assert (cacheSize != nullptr);
842829

843-
if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
844-
{
845-
// uint64_t to long conversion as ReadMemoryValueFromFile takes a uint64_t* as an argument for the val argument.
846-
size = (long)cache_size_from_sys_file;
847-
path_to_level_file[index] = (char)(48 + i);
830+
#if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
831+
//
832+
// Retrieve cachesize via sysfs by reading the file /sys/devices/system/cpu/cpu0/cache/index{LastLevelCache}/size
833+
// for the platform. Currently musl and arm64 should be only cases to use
834+
// this method to determine cache size.
835+
//
836+
size_t level;
837+
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
838+
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
839+
int index = 40;
840+
assert(path_to_size_file[index] == '-');
841+
assert(path_to_level_file[index] == '-');
842+
843+
for (int i = 0; i < 5; i++)
844+
{
845+
path_to_size_file[index] = (char)(48 + i);
848846

849-
if (ReadMemoryValueFromFile(path_to_level_file, &level))
850-
{
851-
UPDATE_CACHE_SIZE_AND_LEVEL(size, level)
852-
}
847+
uint64_t cache_size_from_sys_file = 0;
853848

854-
else
855-
{
856-
cacheSize = std::max((long)cacheSize, size);
857-
}
849+
if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
850+
{
851+
*cacheSize = std::max(*cacheSize, (size_t)cache_size_from_sys_file);
852+
853+
path_to_level_file[index] = (char)(48 + i);
854+
if (ReadMemoryValueFromFile(path_to_level_file, &level))
855+
{
856+
*cacheLevel = level;
858857
}
859858
}
860859
}
861-
#endif
860+
#endif
861+
}
862862

863-
#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_APPLE)
864-
if (cacheSize == 0)
863+
static void GetLogicalProcessorCacheSizeFromHeuristic(size_t* cacheLevel, size_t* cacheSize)
864+
{
865+
assert (cacheLevel != nullptr);
866+
assert (cacheSize != nullptr);
867+
868+
#if (defined(TARGET_LINUX) && !defined(TARGET_APPLE))
865869
{
866-
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
867-
// from most of the machines.
868-
//
869-
// _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64
870-
//
871-
// /sys/devices/system/cpu/cpu*/cache/index*/ is also not yet present in most systems.
872-
// Arm64 patch is in Linux kernel tip.
873-
//
874-
// midr_el1 is available in "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
875-
// but without an exhaustive list of ARM64 processors any decode of midr_el1
876-
// Would likely be incomplete
877-
878-
// Published information on ARM64 architectures is limited.
879-
// If we use recent high core count chips as a guide for state of the art, we find
880-
// total L3 cache to be 1-2MB/core. As always, there are exceptions.
881-
882-
// Estimate cache size based on CPU count
883-
// Assume lower core count are lighter weight parts which are likely to have smaller caches
884-
// Assume L3$/CPU grows linearly from 256K to 1.5M/CPU as logicalCPUs grows from 2 to 12 CPUs
870+
// Use the following heuristics at best depending on the CPU count
871+
// 1 ~ 4 : 4 MB
872+
// 5 ~ 16 : 8 MB
873+
// 17 ~ 64 : 16 MB
874+
// 65+ : 32 MB
885875
DWORD logicalCPUs = g_processAffinitySet.Count();
876+
if (logicalCPUs < 5)
877+
{
878+
*cacheSize = 4;
879+
}
880+
else if (logicalCPUs < 17)
881+
{
882+
*cacheSize = 8;
883+
}
884+
else if (logicalCPUs < 65)
885+
{
886+
*cacheSize = 16;
887+
}
888+
else
889+
{
890+
*cacheSize = 32;
891+
}
886892

887-
cacheSize = logicalCPUs * std::min(1536, std::max(256, (int)logicalCPUs * 128)) * 1024;
893+
*cacheSize *= (1024 * 1024);
888894
}
889895
#endif
896+
}
897+
898+
static size_t GetLogicalProcessorCacheSizeFromOS()
899+
{
900+
size_t cacheLevel = 0;
901+
size_t cacheSize = 0;
902+
903+
if (GCConfig::GetGCCacheSizeFromSysConf())
904+
{
905+
GetLogicalProcessorCacheSizeFromSysConf(&cacheLevel, &cacheSize);
906+
}
907+
908+
if (cacheSize == 0)
909+
{
910+
GetLogicalProcessorCacheSizeFromSysFs(&cacheLevel, &cacheSize);
911+
if (cacheSize == 0)
912+
{
913+
GetLogicalProcessorCacheSizeFromHeuristic(&cacheLevel, &cacheSize);
914+
}
915+
}
890916

891917
#if HAVE_SYSCTLBYNAME
892918
if (cacheSize == 0)
@@ -905,40 +931,15 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
905931
if (success)
906932
{
907933
assert(cacheSizeFromSysctl > 0);
908-
cacheSize = ( size_t) cacheSizeFromSysctl;
934+
cacheSize = (size_t) cacheSizeFromSysctl;
909935
}
910936
}
911937
#endif
912938

913939
#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_APPLE)
914940
if (cacheLevel != 3)
915941
{
916-
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
917-
// from most of the machines.
918-
// Hence, just use the following heuristics at best depending on the CPU count
919-
// 1 ~ 4 : 4 MB
920-
// 5 ~ 16 : 8 MB
921-
// 17 ~ 64 : 16 MB
922-
// 65+ : 32 MB
923-
DWORD logicalCPUs = g_processAffinitySet.Count();
924-
if (logicalCPUs < 5)
925-
{
926-
cacheSize = 4;
927-
}
928-
else if (logicalCPUs < 17)
929-
{
930-
cacheSize = 8;
931-
}
932-
else if (logicalCPUs < 65)
933-
{
934-
cacheSize = 16;
935-
}
936-
else
937-
{
938-
cacheSize = 32;
939-
}
940-
941-
cacheSize *= (1024 * 1024);
942+
GetLogicalProcessorCacheSizeFromHeuristic(&cacheLevel, &cacheSize);
942943
}
943944
#endif
944945

0 commit comments

Comments
 (0)