Skip to content

Commit 11584f1

Browse files
committed
More refinements on guessing cache sizes for Linux
1 parent e60b4f4 commit 11584f1

File tree

1 file changed

+34
-26
lines changed

1 file changed

+34
-26
lines changed

src/blosc2/core.py

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,50 +1164,58 @@ def apple_silicon_cache_size(cache_level: int) -> int:
11641164
return size.value
11651165

11661166

1167-
def get_l3_cache_info():
1167+
def get_cache_info(cache_level: int) -> tuple:
11681168
result = subprocess.run(["lscpu", "--json"], capture_output=True, text=True)
11691169
lscpu_info = json.loads(result.stdout)
1170+
if cache_level == 0:
1171+
cache_level = "1d"
11701172
for entry in lscpu_info["lscpu"]:
1171-
if entry["field"] == "L3 cache:":
1173+
if entry["field"] == f"L{cache_level} cache:":
11721174
size_str, instances_str = entry["data"].split(" (")
1173-
size = int(size_str.split()[0]) * 1024 * 1024 # Convert MiB to bytes
1175+
size, units = size_str.split()
1176+
size = int(size)
1177+
if units == "KiB":
1178+
size *= 2**10
1179+
elif units == "MiB":
1180+
size *= 2**20
1181+
elif units == "GiB":
1182+
size *= 2**30
1183+
else:
1184+
raise ValueError("Unrecognized unit when guessing cache units")
11741185
instances = int(instances_str.split()[0])
11751186
return size, instances
11761187

1177-
raise ValueError("L3 cache not found in lscpu output")
1188+
raise ValueError(f"L{cache_level} cache not found in lscpu output")
11781189

11791190

11801191
def linux_cache_size(cache_level: int, default_size: int) -> int:
11811192
"""Get the data cache_level size in bytes for Linux."""
11821193
cache_size = default_size
1183-
if cache_level == 3:
1184-
# In modern multicore CPUs, the L3 cache is normally shared among all core complexes (CCX),
1185-
# but sysfs only reports the cache size for each complex, so better use lscpu, if available.
1186-
try:
1187-
l3_cache_size, l3_cache_instances = get_l3_cache_info()
1188-
# What comes next is a heuristic to guess the most appropriate L3 cache size.
1189-
# Essentially, this is the result of different experiments, mainly on AMD CPUs
1190-
# (in particular, Ryzen 9800X3D with 8 cores, and EPYC 9454P with 48 cores).
1191-
# For Intel, YMMV, but my guess is that they are not using the same CCX approach.
1192-
l3_cache_size *= l3_cache_instances
1193-
if l3_cache_instances > 1:
1194-
# This is yet another heuristic for large CPUs with core sets (CCX) having
1195-
# their own L3. No idea why, but it seems to work well.
1196-
l3_cache_size *= l3_cache_instances // 2
1197-
return l3_cache_size
1198-
except (FileNotFoundError, ValueError):
1199-
# If lscpu is not available or the cache size cannot be read, try with sysfs
1200-
pass
12011194
try:
1195+
# Try to read the cache size from sysfs
12021196
with open(f"/sys/devices/system/cpu/cpu0/cache/index{cache_level}/size") as f:
12031197
size = f.read()
12041198
if size.endswith("K\n"):
1205-
cache_size = int(size[:-2]) * 1024
1199+
cache_size = int(size[:-2]) * 2**10
12061200
elif size.endswith("M\n"):
1207-
cache_size = int(size[:-2]) * 1024 * 1024
1201+
cache_size = int(size[:-2]) * 2**20
1202+
elif size.endswith("G\n"):
1203+
cache_size = int(size[:-2]) * 2**30
12081204
except FileNotFoundError:
1209-
# If the cache size cannot be read, return the default size
1210-
pass
1205+
# Try with lscpu, if available.
1206+
try:
1207+
cache_size, cache_instances = get_cache_info(cache_level)
1208+
# cache_instances typically refers to the number of sockets, CCXs or cores,
1209+
# depending on the CPU and cache level.
1210+
# In general, dividing the cache size by the number of instances would bring
1211+
# best performance for private caches (L1 and L2). For shared caches (L3),
1212+
# this should be the case as well, but more experimentation is needed.
1213+
cache_size //= cache_instances
1214+
return cache_size
1215+
except (FileNotFoundError, ValueError):
1216+
# If lscpu is not available or the cache size cannot be read from sysfs,
1217+
# return the default size.
1218+
pass
12111219
return cache_size
12121220

12131221

0 commit comments

Comments
 (0)