Skip to content

Commit 9b53fe5

Browse files
committed
API+linux: add new THISSYSTEM_ALLOWED_RESOURCES flag
XML is useful for improving discovery time, but it doesn't work when the XML should be adapted to different cgroups for different jobs on the same machine. This new flag (combined with IS_THISSYSTEM) (or the corresponding env var) load normal XML or synthetic and apply cpu/cgroup restrictions that the actual machine reports on Linux. Note that the XML must be created without any restriction (outside of any cgroup that disallows some PUs or NUMAs on Linux). Signed-off-by: Brice Goglin <[email protected]>
1 parent f5a6b46 commit 9b53fe5

File tree

7 files changed

+128
-20
lines changed

7 files changed

+128
-20
lines changed

doc/Makefile.am

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright © 2009-2016 Inria. All rights reserved.
1+
# Copyright © 2009-2017 Inria. All rights reserved.
22
# Copyright © 2009-2013 Université Bordeaux
33
# Copyright © 2009-2016 Cisco Systems, Inc. All rights reserved.
44
# See COPYING in top-level directory.
@@ -430,8 +430,9 @@ man3_setsource_DATA = \
430430
man3_configurationdir = $(man3dir)
431431
man3_configuration_DATA = \
432432
$(DOX_MAN_DIR)/man3/hwlocality_configuration.3 \
433-
$(DOX_MAN_DIR)/man3/HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM.3 \
434433
$(DOX_MAN_DIR)/man3/HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM.3 \
434+
$(DOX_MAN_DIR)/man3/HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM.3 \
435+
$(DOX_MAN_DIR)/man3/HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES.3 \
435436
$(DOX_MAN_DIR)/man3/hwloc_topology_flags_e.3 \
436437
$(DOX_MAN_DIR)/man3/hwloc_topology_get_flags.3 \
437438
$(DOX_MAN_DIR)/man3/hwloc_topology_set_flags.3 \

doc/hwloc.doxy

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,20 @@ following environment variables.
844844
This can be used for efficiency reasons to first detect the topology once,
845845
save it to an XML file, and quickly reload it later through the XML
846846
backend, but still having binding functions actually do bind.
847+
This also enables support for the variable HWLOC_THISSYSTEM_ALLOWED_RESOURCES.
848+
</dd>
849+
850+
<dt>HWLOC_THISSYSTEM_ALLOWED_RESOURCES=1</dt>
851+
<dd>Get the set of allowed resources from the native operating system
852+
even if the topology was loaded from XML or synthetic description,
853+
as if ::HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES was set
854+
with hwloc_topology_set_flags().
855+
This variable requires the topology to match the current system
856+
(see the variable HWLOC_THISSYSTEM).
857+
This is useful when the topology is not loaded directly from the
858+
local machine (e.g. for performance reason) and it comes with all
859+
resources, but the running process is restricted to only a part
860+
of the machine (for instance because of Linux Cgroup/Cpuset).
847861
</dd>
848862

849863
<dt>HWLOC_HIDE_ERRORS=0</dt>
@@ -1662,6 +1676,12 @@ This behavior may be reverted by asserting that loaded file really
16621676
matches the underlying system with the HWLOC_THISSYSTEM environment
16631677
variable or the ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM topology flag.
16641678

1679+
\note The topology flag ::HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES
1680+
may be used to load a XML topology that contains the entire machine
1681+
and restrict it to the part that is actually available to the current
1682+
process (e.g. when Linux Cgroup/Cpuset are used to restrict the set
1683+
of resources).
1684+
16651685
\note hwloc also offers the ability to export/import \ref hwlocality_diff.
16661686

16671687
\note XML topology files are not localized. They use a dot as a
@@ -2819,6 +2839,12 @@ interface, and the import/export may also be directed to memory buffer
28192839
(that may for instance be transmitted between applications through a package).
28202840
See also \ref xml.
28212841

2842+
\note The environment variable HWLOC_THISSYSTEM_ALLOWED_RESOURCES
2843+
may be used to load a XML topology that contains the entire machine
2844+
and restrict it to the part that is actually available to the current
2845+
process (e.g. when Linux Cgroup/Cpuset are used to restrict the set
2846+
of resources). See \ref envvar.
2847+
28222848

28232849
\subsection faq_multitopo How many topologies may I use in my program?
28242850

hwloc/topology-linux.c

Lines changed: 64 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1956,6 +1956,49 @@ hwloc_linux_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unu
19561956
return ret;
19571957
}
19581958

1959+
static void hwloc_linux__get_allowed_resources(hwloc_topology_t topology, const char *root_path, int root_fd, char **cpuset_namep);
1960+
1961+
static int hwloc_linux_get_allowed_resources_hook(hwloc_topology_t topology)
1962+
{
1963+
const char *fsroot_path;
1964+
char *cpuset_name;
1965+
int root_fd = -1;
1966+
1967+
fsroot_path = getenv("HWLOC_FSROOT");
1968+
if (!fsroot_path)
1969+
fsroot_path = "/";
1970+
1971+
#ifdef HAVE_OPENAT
1972+
root_fd = open(fsroot_path, O_RDONLY | O_DIRECTORY);
1973+
if (root_fd < 0)
1974+
goto out;
1975+
#else
1976+
if (strcmp(fsroot_path, "/")) {
1977+
errno = ENOSYS;
1978+
goto out;
1979+
}
1980+
#endif
1981+
1982+
/* we could also error-out if the current topology doesn't actually match the system,
1983+
* at least for PUs and NUMA nodes. But it would increase the overhead of loading XMLs.
1984+
*
1985+
* Just trust the user when he sets THISSYSTEM=1. It enables hacky
1986+
* tests such as restricting random XML or synthetic to the current
1987+
* machine (uses the default cgroup).
1988+
*/
1989+
1990+
hwloc_linux__get_allowed_resources(topology, fsroot_path, root_fd, &cpuset_name);
1991+
if (cpuset_name) {
1992+
hwloc_obj_add_info(topology->levels[0][0], "LinuxCgroup", cpuset_name);
1993+
free(cpuset_name);
1994+
}
1995+
if (root_fd != -1)
1996+
close(root_fd);
1997+
1998+
out:
1999+
return -1;
2000+
}
2001+
19592002
void
19602003
hwloc_set_linuxfs_hooks(struct hwloc_binding_hooks *hooks,
19612004
struct hwloc_topology_support *support __hwloc_attribute_unused)
@@ -1987,6 +2030,7 @@ hwloc_set_linuxfs_hooks(struct hwloc_binding_hooks *hooks,
19872030
support->membind->bind_membind = 1;
19882031
support->membind->interleave_membind = 1;
19892032
support->membind->migrate_membind = 1;
2033+
hooks->get_allowed_resources = hwloc_linux_get_allowed_resources_hook;
19902034
}
19912035

19922036

@@ -2152,7 +2196,7 @@ hwloc_read_linux_cpuset_name(int fsroot_fd, hwloc_pid_t pid)
21522196
* are cgroup<name>/cpuset.{cpus,mems} or cpuset<name>/{cpus,mems} files.
21532197
*/
21542198
static void
2155-
hwloc_admin_disable_set_from_cpuset(struct hwloc_linux_backend_data_s *data,
2199+
hwloc_admin_disable_set_from_cpuset(int root_fd,
21562200
const char *cgroup_mntpnt, const char *cpuset_mntpnt, const char *cpuset_name,
21572201
const char *attr_name,
21582202
hwloc_bitmap_t admin_enabled_cpus_set)
@@ -2172,7 +2216,7 @@ hwloc_admin_disable_set_from_cpuset(struct hwloc_linux_backend_data_s *data,
21722216
hwloc_debug("Trying to read cpuset file <%s>\n", cpuset_filename);
21732217
}
21742218

2175-
fd = hwloc_open(cpuset_filename, data->root_fd);
2219+
fd = hwloc_open(cpuset_filename, root_fd);
21762220
if (fd < 0) {
21772221
/* found no cpuset description, ignore it */
21782222
hwloc_debug("Couldn't find cpuset <%s> description, ignoring\n", cpuset_name);
@@ -4152,13 +4196,29 @@ hwloc_linux_try_hardwired_cpuinfo(struct hwloc_backend *backend)
41524196
return -1;
41534197
}
41544198

4199+
static void hwloc_linux__get_allowed_resources(hwloc_topology_t topology, const char *root_path, int root_fd, char **cpuset_namep)
4200+
{
4201+
char *cpuset_mntpnt, *cgroup_mntpnt, *cpuset_name = NULL;
4202+
hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, root_path);
4203+
if (cgroup_mntpnt || cpuset_mntpnt) {
4204+
cpuset_name = hwloc_read_linux_cpuset_name(root_fd, topology->pid);
4205+
if (cpuset_name) {
4206+
hwloc_admin_disable_set_from_cpuset(root_fd, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "cpus", topology->levels[0][0]->allowed_cpuset);
4207+
hwloc_admin_disable_set_from_cpuset(root_fd, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "mems", topology->levels[0][0]->allowed_nodeset);
4208+
}
4209+
free(cgroup_mntpnt);
4210+
free(cpuset_mntpnt);
4211+
}
4212+
*cpuset_namep = cpuset_name;
4213+
}
4214+
41554215
static int
41564216
hwloc_look_linuxfs(struct hwloc_backend *backend)
41574217
{
41584218
struct hwloc_topology *topology = backend->topology;
41594219
struct hwloc_linux_backend_data_s *data = backend->private_data;
41604220
unsigned nbnodes;
4161-
char *cpuset_mntpnt, *cgroup_mntpnt, *cpuset_name = NULL;
4221+
char *cpuset_name;
41624222
struct hwloc_linux_cpuinfo_proc * Lprocs = NULL;
41634223
struct hwloc_obj_info_s *global_infos = NULL;
41644224
unsigned global_infos_count = 0;
@@ -4226,16 +4286,7 @@ hwloc_look_linuxfs(struct hwloc_backend *backend)
42264286
/**********************
42274287
* Gather the list of admin-disabled cpus and mems
42284288
*/
4229-
hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, data->root_path);
4230-
if (cgroup_mntpnt || cpuset_mntpnt) {
4231-
cpuset_name = hwloc_read_linux_cpuset_name(data->root_fd, topology->pid);
4232-
if (cpuset_name) {
4233-
hwloc_admin_disable_set_from_cpuset(data, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "cpus", topology->levels[0][0]->allowed_cpuset);
4234-
hwloc_admin_disable_set_from_cpuset(data, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "mems", topology->levels[0][0]->allowed_nodeset);
4235-
}
4236-
free(cgroup_mntpnt);
4237-
free(cpuset_mntpnt);
4238-
}
4289+
hwloc_linux__get_allowed_resources(topology, data->root_path, data->root_fd, &cpuset_name);
42394290

42404291
/*********************
42414292
* Memory information

hwloc/topology.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright © 2009 CNRS
3-
* Copyright © 2009-2016 Inria. All rights reserved.
3+
* Copyright © 2009-2017 Inria. All rights reserved.
44
* Copyright © 2009-2012 Université Bordeaux
55
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
66
* See COPYING in top-level directory.
@@ -2618,6 +2618,12 @@ hwloc_discover(struct hwloc_topology *topology)
26182618
return -1;
26192619
}
26202620

2621+
if (topology->binding_hooks.get_allowed_resources && topology->is_thissystem) {
2622+
const char *env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES");
2623+
if ((env && atoi(env))
2624+
|| (topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES))
2625+
topology->binding_hooks.get_allowed_resources(topology);
2626+
}
26212627
hwloc_debug("%s", "\nPropagate disallowed cpus down and up\n");
26222628
hwloc_bitmap_and(topology->levels[0][0]->allowed_cpuset, topology->levels[0][0]->allowed_cpuset, topology->levels[0][0]->cpuset);
26232629
propagate_unused_cpuset(topology->levels[0][0], NULL);
@@ -2905,7 +2911,7 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags)
29052911
return -1;
29062912
}
29072913

2908-
if (flags & ~(HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) {
2914+
if (flags & ~(HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) {
29092915
errno = EINVAL;
29102916
return -1;
29112917
}

include/hwloc.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1738,7 +1738,28 @@ enum hwloc_topology_flags_e {
17381738
* backend, but still having binding functions actually do bind.
17391739
* \hideinitializer
17401740
*/
1741-
HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1)
1741+
HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1),
1742+
1743+
/** \brief Get the set of allowed resources from the local operating system even if the topology was loaded from XML or synthetic description.
1744+
*
1745+
* If the topology was loaded from XML or from a synthetic string,
1746+
* restrict it by applying the current process restrictions such as
1747+
* Linux Cgroup/Cpuset.
1748+
*
1749+
* This is useful when the topology is not loaded directly from
1750+
* the local machine (e.g. for performance reason) and it comes
1751+
* with all resources, while the running process is restricted
1752+
* to only parts of the machine.
1753+
*
1754+
* This flag is ignored unless ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM is
1755+
* also set since the loaded topology must match the underlying machine
1756+
* where restrictions will be gathered from.
1757+
*
1758+
* Setting the environment variable HWLOC_THISSYSTEM_ALLOWED_RESOURCES
1759+
* would result in the same behavior.
1760+
* \hideinitializer
1761+
*/
1762+
HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2)
17421763
};
17431764

17441765
/** \brief Set OR'ed flags to non-yet-loaded topology.

include/hwloc/rename.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
3-
* Copyright © 2010-2016 Inria. All rights reserved.
3+
* Copyright © 2010-2017 Inria. All rights reserved.
44
* See COPYING in top-level directory.
55
*/
66

@@ -119,6 +119,7 @@ extern "C" {
119119

120120
#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM)
121121
#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
122+
#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)
122123

123124
#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid)
124125
#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)

include/private/private.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright © 2009 CNRS
3-
* Copyright © 2009-2016 Inria. All rights reserved.
3+
* Copyright © 2009-2017 Inria. All rights reserved.
44
* Copyright © 2009-2012 Université Bordeaux
55
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
66
*
@@ -110,6 +110,8 @@ struct hwloc_topology {
110110
* see hwloc_alloc_or_fail which is convenient for that. */
111111
void *(*alloc_membind)(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
112112
int (*free_membind)(hwloc_topology_t topology, void *addr, size_t len);
113+
114+
int (*get_allowed_resources)(hwloc_topology_t topology);
113115
} binding_hooks;
114116

115117
struct hwloc_topology_support support;

0 commit comments

Comments
 (0)