Skip to content

Commit a34a229

Browse files
authored
Merge pull request #4538 from grondo/issue#4536
resource: add norestrict option to avoid restricting hwloc topology XML
2 parents 9b2d79e + 61cba39 commit a34a229

File tree

13 files changed

+69
-33
lines changed

13 files changed

+69
-33
lines changed

doc/man5/flux-config-resource.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ exclude
3232
the scheduler, but will still be used to determine satisfiability of job
3333
requests until the instance is restarted.
3434

35+
norestrict
36+
(optional) Disable restricting of the loaded HWLOC topology XML to the
37+
current cpu affinity mask of the Flux broker. This option should be used
38+
when the Flux system instance is constrained to a subset of cores,
39+
but jobs run within this instance should have access to all cores.
40+
3541

3642
EXAMPLE
3743
=======
@@ -41,6 +47,7 @@ EXAMPLE
4147
[resource]
4248
path = "/etc/flux/system/R"
4349
exclude = "test[3,108]"
50+
norestrict = true
4451

4552

4653
RESOURCES

doc/test/spell.en.pws

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,3 +634,4 @@ pbatch
634634
pdebug
635635
parentof
636636
bg
637+
norestrict

src/cmd/flux-R.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ static char *get_xml (optparse_t *p)
364364
log_err_exit ("failed to read XML");
365365
}
366366
else if (optparse_hasopt (p, "local")) {
367-
if (!(s = rhwloc_local_topology_xml ()))
367+
if (!(s = rhwloc_local_topology_xml (0)))
368368
log_err_exit ("failed to gather local topology XML");
369369
}
370370

src/common/librlist/rhwloc.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
#include <flux/idset.h>
1616

17-
#include "hwloc.h"
17+
#include "rhwloc.h"
1818

1919
/* Common hwloc_topology_init() and flags for Flux hwloc usage:
2020
*/
@@ -65,7 +65,8 @@ hwloc_topology_t rhwloc_xml_topology_load (const char *xml)
6565
return topo;
6666
}
6767

68-
hwloc_topology_t rhwloc_local_topology_load (void)
68+
69+
hwloc_topology_t rhwloc_local_topology_load (rhwloc_flags_t flags)
6970
{
7071
hwloc_topology_t topo = NULL;
7172
hwloc_bitmap_t rset = NULL;
@@ -87,6 +88,8 @@ hwloc_topology_t rhwloc_local_topology_load (void)
8788
#endif
8889
if (hwloc_topology_load (topo) < 0)
8990
goto err;
91+
if (flags & RHWLOC_NO_RESTRICT)
92+
return (topo);
9093
if (!(rset = hwloc_bitmap_alloc ())
9194
|| (hwloc_get_cpubind (topo, rset, HWLOC_CPUBIND_PROCESS) < 0))
9295
goto err;
@@ -100,12 +103,12 @@ hwloc_topology_t rhwloc_local_topology_load (void)
100103
return NULL;
101104
}
102105

103-
char *rhwloc_local_topology_xml (void)
106+
char *rhwloc_local_topology_xml (rhwloc_flags_t rflags)
104107
{
105108
char *buf;
106109
int buflen;
107110
char *copy;
108-
hwloc_topology_t topo = rhwloc_local_topology_load ();
111+
hwloc_topology_t topo = rhwloc_local_topology_load (rflags);
109112
if (topo == NULL)
110113
return (NULL);
111114
#if HWLOC_API_VERSION >= 0x20000

src/common/librlist/rhwloc.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,21 @@
1313

1414
#include <hwloc.h>
1515

16+
typedef enum {
17+
RHWLOC_NO_RESTRICT = 0x1
18+
} rhwloc_flags_t;
19+
1620
/* Load local topology with Flux standard flags and filtering
1721
*/
18-
hwloc_topology_t rhwloc_local_topology_load (void);
22+
hwloc_topology_t rhwloc_local_topology_load (rhwloc_flags_t flags);
1923

2024
/* As above, but return hwloc_topoology_t from XML
2125
*/
2226
hwloc_topology_t rhwloc_xml_topology_load (const char *xml);
2327

2428
/* Load local topology and return XML as allocated string
2529
*/
26-
char *rhwloc_local_topology_xml (void);
30+
char *rhwloc_local_topology_xml (rhwloc_flags_t flags);
2731

2832
/* Return HostName from an hwloc topology object
2933
*/

src/common/librlist/rlist.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2269,7 +2269,7 @@ struct rlist *rlist_from_hwloc (int rank, const char *xml)
22692269
if (xml)
22702270
topo = rhwloc_xml_topology_load (xml);
22712271
else
2272-
topo = rhwloc_local_topology_load ();
2272+
topo = rhwloc_local_topology_load (0);
22732273
if (!topo)
22742274
goto fail;
22752275
if (!(ids = rhwloc_core_idset_string (topo))

src/common/librlist/test/rhwloc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@ void test_xml ()
781781
struct rlist *rl2 = NULL;
782782
char *s1 = NULL;
783783
char *s2 = NULL;
784-
char *xml = rhwloc_local_topology_xml ();
784+
char *xml = rhwloc_local_topology_xml (0);
785785
if (!xml)
786786
BAIL_OUT ("rhwloc_local_topology_xml failed!");
787787
pass ("rhwloc_topology_xml");

src/modules/resource/resource.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,26 +43,32 @@
4343
*
4444
* noverify = true
4545
* Skip verification that configured resources match local hwloc
46+
*
47+
* norestrict = false
48+
* When generating hwloc topology XML, do not restrict to current cpumask
4649
*/
4750
static int parse_config (struct resource_ctx *ctx,
4851
const flux_conf_t *conf,
4952
const char **excludep,
5053
json_t **R,
5154
bool *noverifyp,
55+
bool *norestrictp,
5256
flux_error_t *errp)
5357
{
5458
flux_error_t error;
5559
const char *exclude = NULL;
5660
const char *path = NULL;
5761
int noverify = 0;
62+
int norestrict = 0;
5863
json_t *o = NULL;
5964

6065
if (flux_conf_unpack (conf,
6166
&error,
62-
"{s?:{s?:s s?:s s?:b !}}",
67+
"{s?:{s?:s s?:s s?:b s?b !}}",
6368
"resource",
6469
"path", &path,
6570
"exclude", &exclude,
71+
"norestrict", &norestrict,
6672
"noverify", &noverify) < 0) {
6773
errprintf (errp,
6874
"error parsing [resource] configuration: %s",
@@ -97,6 +103,8 @@ static int parse_config (struct resource_ctx *ctx,
97103
*excludep = exclude;
98104
if (noverifyp)
99105
*noverifyp = noverify ? true : false;
106+
if (norestrictp)
107+
*norestrictp = norestrict ? true : false;
100108
if (R)
101109
*R = o;
102110
return 0;
@@ -126,6 +134,7 @@ static void config_reload_cb (flux_t *h,
126134
&exclude,
127135
NULL,
128136
NULL,
137+
NULL,
129138
&error) < 0) {
130139
errstr = error.text;
131140
goto error;
@@ -397,6 +406,7 @@ int mod_main (flux_t *h, int argc, char **argv)
397406
json_t *eventlog = NULL;
398407
bool monitor_force_up = false;
399408
bool noverify = false;
409+
bool norestrict = false;
400410
json_t *R_from_config;
401411

402412
if (!(ctx = resource_ctx_create (h)))
@@ -410,6 +420,7 @@ int mod_main (flux_t *h, int argc, char **argv)
410420
&exclude_idset,
411421
&R_from_config,
412422
&noverify,
423+
&norestrict,
413424
&error) < 0) {
414425
flux_log (h, LOG_ERR, "%s", error.text);
415426
goto error;
@@ -426,7 +437,7 @@ int mod_main (flux_t *h, int argc, char **argv)
426437
}
427438
if (!(ctx->inventory = inventory_create (ctx, R_from_config)))
428439
goto error;
429-
if (!(ctx->topology = topo_create (ctx, noverify)))
440+
if (!(ctx->topology = topo_create (ctx, noverify, norestrict)))
430441
goto error;
431442
if (!(ctx->monitor = monitor_create (ctx, monitor_force_up)))
432443
goto error;

src/modules/resource/topo.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#endif
2323
#include <jansson.h>
2424
#include <flux/core.h>
25-
#include <hwloc.h>
2625

2726
#include "src/common/libidset/idset.h"
2827
#include "src/common/libutil/errno_safe.h"
@@ -275,15 +274,18 @@ void topo_destroy (struct topo *topo)
275274
}
276275
}
277276

278-
struct topo *topo_create (struct resource_ctx *ctx, bool no_verify)
277+
struct topo *topo_create (struct resource_ctx *ctx,
278+
bool no_verify,
279+
bool no_restrict)
279280
{
280281
struct topo *topo;
281282
json_t *R;
283+
int flags = no_restrict ? RHWLOC_NO_RESTRICT : 0;
282284

283285
if (!(topo = calloc (1, sizeof (*topo))))
284286
return NULL;
285287
topo->ctx = ctx;
286-
if (!(topo->xml = rhwloc_local_topology_xml ())) {
288+
if (!(topo->xml = rhwloc_local_topology_xml (flags))) {
287289
flux_log_error (ctx->h, "error loading hwloc topology");
288290
goto error;
289291
}

src/modules/resource/topo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
#ifndef _FLUX_RESOURCE_TOPO_H
1212
#define _FLUX_RESOURCE_TOPO_H
1313

14-
struct topo *topo_create (struct resource_ctx *ctx, bool no_verify);
14+
struct topo *topo_create (struct resource_ctx *ctx,
15+
bool no_verify,
16+
bool no_restrict);
1517
void topo_destroy (struct topo *topo);
1618

1719

0 commit comments

Comments
 (0)