Skip to content

Commit 3e917cc

Browse files
committed
linux: detect memory-side caches from sysfs
Those are exposed in /sys/devices/system/node/node*/memory_side_caches/index[1...]/ since Linux 5.2 when the machine has a HMAT ACPI table. We don't know the exact associativity but only if it's direct-mapped or not. Signed-off-by: Brice Goglin <[email protected]>
1 parent af69b78 commit 3e917cc

File tree

2 files changed

+107
-19
lines changed

2 files changed

+107
-19
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ Version 2.1.0
2121
* API
2222
+ Add the new HWLOC_OBJ_MEMCACHE object type for memory-side caches.
2323
- They are filtered-out by default, except in command-line tools.
24+
- They are only available on very recent platforms running Linux 5.2+
25+
and uptodate ACPI tables.
2426
- The KNL MCDRAM in cache mode is still exposed as a L3 unless
2527
HWLOC_KNL_MSCACHE_L3=0 in the environment.
2628
+ Add HWLOC_RESTRICT_FLAG_BYNODESET and _REMOVE_MEMLESS for restricting

hwloc/topology-linux.c

Lines changed: 105 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,16 @@ hwloc_read_path_as_uint(const char *path, unsigned *value, int fsroot_fd)
557557
return 0;
558558
}
559559

560+
static __hwloc_inline int
561+
hwloc_read_path_as_uint64(const char *path, uint64_t *value, int fsroot_fd)
562+
{
563+
char string[22];
564+
if (hwloc_read_path_by_length(path, string, sizeof(string), fsroot_fd) < 0)
565+
return -1;
566+
*value = (uint64_t) strtoull(string, NULL, 10);
567+
return 0;
568+
}
569+
560570
/* Read everything from fd and save it into a newly allocated buffer
561571
* returned in bufferp. Use sizep as a default buffer size, and returned
562572
* the actually needed size in sizep.
@@ -3789,6 +3799,72 @@ read_node_initiators(struct hwloc_linux_backend_data_s *data,
37893799
return 0;
37903800
}
37913801

3802+
/* return -1 if the kernel doesn't support mscache,
3803+
* or update tree (containing only the node on input) with caches (if any)
3804+
*/
3805+
static int
3806+
read_node_mscaches(struct hwloc_topology *topology,
3807+
struct hwloc_linux_backend_data_s *data,
3808+
const char *path,
3809+
hwloc_obj_t *treep)
3810+
{
3811+
hwloc_obj_t tree = *treep, node = tree;
3812+
unsigned osnode = node->os_index;
3813+
char mscpath[SYSFS_NUMA_NODE_PATH_LEN];
3814+
DIR *mscdir;
3815+
struct dirent *dirent;
3816+
3817+
sprintf(mscpath, "%s/node%u/memory_side_cache", path, osnode);
3818+
mscdir = hwloc_opendir(mscpath, data->root_fd);
3819+
if (!mscdir)
3820+
return -1;
3821+
3822+
while ((dirent = readdir(mscdir)) != NULL) {
3823+
unsigned depth;
3824+
uint64_t size;
3825+
unsigned line_size;
3826+
unsigned associativity;
3827+
hwloc_obj_t cache;
3828+
3829+
if (strncmp(dirent->d_name, "index", 5))
3830+
continue;
3831+
3832+
depth = atoi(dirent->d_name+5);
3833+
3834+
sprintf(mscpath, "%s/node%u/memory_side_cache/index%u/size", path, osnode, depth);
3835+
if (hwloc_read_path_as_uint64(mscpath, &size, data->root_fd) < 0)
3836+
continue;
3837+
3838+
sprintf(mscpath, "%s/node%u/memory_side_cache/index%u/line_size", path, osnode, depth);
3839+
if (hwloc_read_path_as_uint(mscpath, &line_size, data->root_fd) < 0)
3840+
continue;
3841+
3842+
sprintf(mscpath, "%s/node%u/memory_side_cache/index%u/indexing", path, osnode, depth);
3843+
if (hwloc_read_path_as_uint(mscpath, &associativity, data->root_fd) < 0)
3844+
continue;
3845+
/* 0 for direct-mapped, 1 for indexed (don't know how many ways), 2 for custom/other */
3846+
3847+
cache = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
3848+
if (cache) {
3849+
cache->nodeset = hwloc_bitmap_dup(node->nodeset);
3850+
cache->cpuset = hwloc_bitmap_dup(node->cpuset);
3851+
cache->attr->cache.size = size;
3852+
cache->attr->cache.depth = depth;
3853+
cache->attr->cache.linesize = line_size;
3854+
cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
3855+
cache->attr->cache.associativity = !associativity ? 1 /* direct-mapped */ : 0 /* unknown */;
3856+
hwloc_debug_1arg_bitmap("mscache %s has nodeset %s\n",
3857+
dirent->d_name, cache->nodeset);
3858+
3859+
cache->memory_first_child = tree;
3860+
tree = cache;
3861+
}
3862+
}
3863+
closedir(mscdir);
3864+
*treep = tree;
3865+
return 0;
3866+
}
3867+
37923868
static unsigned *
37933869
list_sysfsnode(struct hwloc_topology *topology,
37943870
struct hwloc_linux_backend_data_s *data,
@@ -3954,6 +4030,7 @@ look_sysfsnode(struct hwloc_topology *topology,
39544030
unsigned i;
39554031
DIR *dir;
39564032
int allow_overlapping_node_cpusets = (getenv("HWLOC_DEBUG_ALLOW_OVERLAPPING_NODE_CPUSETS") != NULL);
4033+
int need_memcaches = hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_MEMCACHE);
39574034

39584035
/* NUMA nodes cannot be filtered out */
39594036
indexes = list_sysfsnode(topology, data, path, &nbnodes);
@@ -4120,7 +4197,6 @@ look_sysfsnode(struct hwloc_topology *topology,
41204197
hwloc_linux_knl_numa_quirk(topology, data, nodes, nbnodes, distances, &failednodes);
41214198
free(distances);
41224199
free(nodes);
4123-
free(trees);
41244200
goto out;
41254201
}
41264202
}
@@ -4134,10 +4210,14 @@ look_sysfsnode(struct hwloc_topology *topology,
41344210
for (i = 0; i < nbnodes; i++) {
41354211
hwloc_obj_t node = nodes[i];
41364212
if (node && !hwloc_bitmap_iszero(node->cpuset)) {
4213+
hwloc_obj_t tree;
41374214
/* update from HMAT initiators if any */
41384215
read_node_initiators(data, node, nbnodes, nodes, path);
41394216

4140-
trees[nr_trees++] = node;
4217+
tree = node;
4218+
if (need_memcaches)
4219+
read_node_mscaches(topology, data, path, &tree);
4220+
trees[nr_trees++] = tree;
41414221
}
41424222
}
41434223
/* Now look for empty-cpumap nodes.
@@ -4148,6 +4228,7 @@ look_sysfsnode(struct hwloc_topology *topology,
41484228
for (i = 0; i < nbnodes; i++) {
41494229
hwloc_obj_t node = nodes[i];
41504230
if (node && hwloc_bitmap_iszero(node->cpuset)) {
4231+
hwloc_obj_t tree;
41514232
/* update from HMAT initiators if any */
41524233
if (!read_node_initiators(data, node, nbnodes, nodes, path))
41534234
if (!hwloc_bitmap_iszero(node->cpuset))
@@ -4158,29 +4239,34 @@ look_sysfsnode(struct hwloc_topology *topology,
41584239
fixup_cpuless_node_locality_from_distances(i, nbnodes, nodes, distances);
41594240

41604241
fixed:
4161-
trees[nr_trees++] = node;
4242+
tree = node;
4243+
if (need_memcaches)
4244+
read_node_mscaches(topology, data, path, &tree);
4245+
trees[nr_trees++] = tree;
41624246
}
41634247
}
41644248

41654249
/* insert memory trees for real */
41664250
for (i = 0; i < nr_trees; i++) {
41674251
hwloc_obj_t tree = trees[i];
4168-
hwloc_obj_t cur_obj = tree;
4169-
hwloc_obj_type_t cur_type = cur_obj->type;
4170-
hwloc_obj_t res_obj;
4171-
4172-
assert(!cur_obj->next_sibling);
4173-
assert(!cur_obj->memory_first_child);
4174-
4175-
res_obj = hwloc__insert_object_by_cpuset(topology, NULL, cur_obj, hwloc_report_os_error);
4176-
if (res_obj != cur_obj && cur_type == HWLOC_OBJ_NUMANODE) {
4177-
/* This NUMA node got merged somehow, could be a buggy BIOS reporting wrong NUMA node cpuset.
4178-
* Update it in the array for the distance matrix. */
4179-
unsigned j;
4180-
for(j=0; j<nbnodes; j++)
4181-
if (nodes[j] == cur_obj)
4182-
nodes[j] = res_obj;
4183-
failednodes++;
4252+
while (tree) {
4253+
hwloc_obj_t cur_obj;
4254+
hwloc_obj_t res_obj;
4255+
hwloc_obj_type_t cur_type;
4256+
cur_obj = tree;
4257+
cur_type = cur_obj->type;
4258+
tree = cur_obj->memory_first_child;
4259+
assert(!cur_obj->next_sibling);
4260+
res_obj = hwloc__insert_object_by_cpuset(topology, NULL, cur_obj, hwloc_report_os_error);
4261+
if (res_obj != cur_obj && cur_type == HWLOC_OBJ_NUMANODE) {
4262+
/* This NUMA node got merged somehow, could be a buggy BIOS reporting wrong NUMA node cpuset.
4263+
* Update it in the array for the distance matrix. */
4264+
unsigned j;
4265+
for(j=0; j<nbnodes; j++)
4266+
if (nodes[j] == cur_obj)
4267+
nodes[j] = res_obj;
4268+
failednodes++;
4269+
}
41844270
}
41854271
}
41864272
free(trees);

0 commit comments

Comments
 (0)