Skip to content

Commit 0766987

Browse files
authored
Merge pull request #5863 from grondo/issue#5856
resource: improve `resource.status` response time with many drained ranks
2 parents 3a79643 + 0c3da97 commit 0766987

File tree

11 files changed

+408
-303
lines changed

11 files changed

+408
-303
lines changed

src/common/librlist/rlist.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,8 +859,10 @@ int rlist_append (struct rlist *rl, const struct rlist *rl2)
859859
struct rnode *n = zlistx_first (rl2->nodes);
860860
while (n) {
861861
struct rnode *copy = rnode_copy_avail (n);
862-
if (!copy || rlist_add_rnode (rl, copy) < 0)
862+
if (!copy || rlist_add_rnode (rl, copy) < 0) {
863+
rnode_destroy (copy);
863864
return -1;
865+
}
864866
n = zlistx_next (rl2->nodes);
865867
}
866868

src/modules/resource/Makefile.am

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,13 @@ libresource_la_SOURCES = \
3535
rutil.c \
3636
rutil.h \
3737
status.c \
38-
status.h
38+
status.h \
39+
drainset.h \
40+
drainset.c
3941

40-
TESTS = test_rutil.t
42+
TESTS = \
43+
test_rutil.t \
44+
test_drainset.t
4145

4246
test_ldadd = \
4347
$(builddir)/libresource.la \
@@ -66,3 +70,8 @@ test_rutil_t_SOURCES = test/rutil.c
6670
test_rutil_t_CPPFLAGS = $(test_cppflags)
6771
test_rutil_t_LDADD = $(test_ldadd)
6872
test_rutil_t_LDFLAGS = $(test_ldflags)
73+
74+
test_drainset_t_SOURCES = test/drainset.c
75+
test_drainset_t_CPPFLAGS = $(test_cppflags)
76+
test_drainset_t_LDADD = $(test_ldadd)
77+
test_drainset_t_LDFLAGS = $(test_ldflags)

src/modules/resource/drain.c

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "drain.h"
5353
#include "rutil.h"
5454
#include "inventory.h"
55+
#include "drainset.h"
5556

5657
struct draininfo {
5758
bool drained;
@@ -272,34 +273,23 @@ static void broker_torpid_cb (flux_future_t *f, void *arg)
272273

273274
json_t *drain_get_info (struct drain *drain)
274275
{
275-
json_t *o;
276-
unsigned int rank;
277-
278-
if (!(o = json_object ()))
279-
goto nomem;
280-
for (rank = 0; rank < drain->ctx->size; rank++) {
276+
json_t *o = NULL;
277+
struct drainset *ds = drainset_create ();
278+
if (!ds)
279+
goto error;
280+
for (unsigned int rank = 0; rank < drain->ctx->size; rank++) {
281281
if (drain->info[rank].drained) {
282-
char *reason = drain->info[rank].reason;
283-
json_t *val;
284-
if (!(val = json_pack ("{s:f s:s}",
285-
"timestamp",
286-
drain->info[rank].timestamp,
287-
"reason",
288-
reason ? reason : "")))
289-
goto nomem;
290-
if (rutil_idkey_insert_id (o, rank, val) < 0) {
291-
ERRNO_SAFE_WRAP (json_decref, val);
282+
if (drainset_drain_rank (ds,
283+
rank,
284+
drain->info[rank].timestamp,
285+
drain->info[rank].reason) < 0)
292286
goto error;
293-
}
294-
json_decref (val);
295287
}
296288
}
297-
return o;
298-
nomem:
299-
errno = ENOMEM;
289+
o = drainset_to_json (ds);
300290
error:
301-
ERRNO_SAFE_WRAP (json_decref, o);
302-
return NULL;
291+
drainset_destroy (ds);
292+
return o;
303293
}
304294

305295
struct idset *drain_get (struct drain *drain)

src/modules/resource/drainset.c

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
/************************************************************\
2+
* Copyright 2024 Lawrence Livermore National Security, LLC
3+
* (c.f. AUTHORS, NOTICE.LLNS, COPYING)
4+
*
5+
* This file is part of the Flux resource manager framework.
6+
* For details, see https://github.com/flux-framework.
7+
*
8+
* SPDX-License-Identifier: LGPL-3.0
9+
\************************************************************/
10+
11+
/* drainset.c - a set of drained ranks with timestamp and reason
12+
*/
13+
14+
#if HAVE_CONFIG_H
15+
#include "config.h"
16+
#endif
17+
#include <time.h>
18+
19+
#include "src/common/libczmqcontainers/czmq_containers.h"
20+
#include "src/common/libutil/errno_safe.h"
21+
#include "src/common/libutil/errprintf.h"
22+
#include "ccan/str/str.h"
23+
24+
#include "drainset.h"
25+
26+
struct draininfo {
27+
struct idset *ranks;
28+
double timestamp;
29+
char *reason;
30+
};
31+
32+
struct drainset {
33+
zhashx_t *map;
34+
};
35+
36+
static void draininfo_destroy (struct draininfo *d)
37+
{
38+
if (d) {
39+
int saved_errno = errno;
40+
idset_destroy (d->ranks);
41+
free (d->reason);
42+
free (d);
43+
errno = saved_errno;
44+
}
45+
}
46+
47+
static void draininfo_free (void **item)
48+
{
49+
if (item) {
50+
draininfo_destroy (*item);
51+
*item = NULL;
52+
}
53+
}
54+
55+
static struct draininfo *draininfo_create_rank (unsigned int rank,
56+
const char *reason,
57+
double timestamp)
58+
{
59+
struct draininfo *d;
60+
if (!(d = calloc (1, sizeof (*d)))
61+
|| !(d->ranks = idset_create (0, IDSET_FLAG_AUTOGROW))
62+
|| idset_set (d->ranks, rank) < 0
63+
|| (reason && !(d->reason = strdup (reason))))
64+
goto error;
65+
d->timestamp = timestamp;
66+
return d;
67+
error:
68+
draininfo_destroy (d);
69+
return NULL;
70+
}
71+
72+
/* Use "modified Bernstein hash" as employed by zhashx internally, but input
73+
* is draininfo reason+timestamp instead of a simple NULL-terminated string.
74+
* Copied from: msg_hash_uuid_matchtag_hasher()
75+
*/
76+
static size_t draininfo_hasher (const void *key)
77+
{
78+
const struct draininfo *d = key;
79+
size_t key_hash = 0;
80+
const char *cp;
81+
82+
cp = d->reason ? d->reason : "";
83+
while (*cp)
84+
key_hash = 33 * key_hash ^ *cp++;
85+
cp = (const char *) &d->timestamp;
86+
for (int i = 0; i < sizeof (d->timestamp); i++)
87+
key_hash = 33 * key_hash ^ *cp++;
88+
return key_hash;
89+
}
90+
91+
static int drainmap_key_cmp (const void *key1, const void *key2)
92+
{
93+
const struct draininfo *d1 = key1;
94+
const struct draininfo *d2 = key2;
95+
if (d1->timestamp == d2->timestamp) {
96+
const char *s1 = d1->reason;
97+
const char *s2 = d2->reason;
98+
return strcmp (s1 ? s1 : "", s2 ? s2 : "");
99+
}
100+
return d1->timestamp < d2->timestamp ? -1 : 1;
101+
}
102+
103+
static zhashx_t *drainmap_create ()
104+
{
105+
zhashx_t *map;
106+
107+
if (!(map = zhashx_new ())) {
108+
errno = ENOMEM;
109+
return NULL;
110+
}
111+
zhashx_set_key_hasher (map, draininfo_hasher);
112+
zhashx_set_key_comparator (map, drainmap_key_cmp);
113+
zhashx_set_key_destructor (map, draininfo_free);
114+
zhashx_set_key_duplicator (map, NULL);
115+
return map;
116+
}
117+
118+
void drainset_destroy (struct drainset *ds)
119+
{
120+
if (ds) {
121+
int saved_errno = errno;
122+
zhashx_destroy (&ds->map);
123+
free (ds);
124+
errno = saved_errno;
125+
}
126+
}
127+
128+
struct drainset *drainset_create (void)
129+
{
130+
struct drainset *ds;
131+
132+
if (!(ds = malloc (sizeof (*ds)))
133+
|| !(ds->map = drainmap_create ()))
134+
goto error;
135+
return ds;
136+
error:
137+
drainset_destroy (ds);
138+
return NULL;
139+
}
140+
141+
static struct draininfo *drainset_find (struct drainset *ds,
142+
double timestamp,
143+
const char *reason)
144+
{
145+
struct draininfo tmp = {.timestamp = timestamp, .reason = (char *)reason};
146+
return zhashx_lookup (ds->map, &tmp);
147+
}
148+
149+
int drainset_drain_rank (struct drainset *ds,
150+
unsigned int rank,
151+
double timestamp,
152+
const char *reason)
153+
{
154+
int rc = -1;
155+
struct draininfo *match;
156+
struct draininfo *new = NULL;
157+
if (!ds) {
158+
errno = EINVAL;
159+
return -1;
160+
}
161+
if ((match = drainset_find (ds, timestamp, reason))) {
162+
if (idset_set (match->ranks, rank) < 0)
163+
return -1;
164+
return 0;
165+
}
166+
if (!(new = draininfo_create_rank (rank, reason, timestamp))
167+
|| zhashx_insert (ds->map, new, new) < 0) {
168+
draininfo_destroy (new);
169+
goto out;
170+
}
171+
rc = 0;
172+
out:
173+
return rc;
174+
}
175+
176+
json_t *drainset_to_json (struct drainset *ds)
177+
{
178+
json_t *o;
179+
struct draininfo *d;
180+
181+
if (!(o = json_object ()))
182+
goto nomem;
183+
d = zhashx_first (ds->map);
184+
while (d) {
185+
json_t *val;
186+
char *s;
187+
if (!(val = json_pack ("{s:f s:s}",
188+
"timestamp", d->timestamp,
189+
"reason", d->reason ? d->reason : ""))
190+
|| !(s = idset_encode (d->ranks, IDSET_FLAG_RANGE))) {
191+
json_decref (val);
192+
goto nomem;
193+
}
194+
if (json_object_set_new (o, s, val) < 0) {
195+
ERRNO_SAFE_WRAP (json_decref, val);
196+
ERRNO_SAFE_WRAP (free, s);
197+
goto error;
198+
}
199+
free (s);
200+
d = zhashx_next (ds->map);
201+
}
202+
return o;
203+
nomem:
204+
errno = EPROTO;
205+
error:
206+
ERRNO_SAFE_WRAP (json_decref, o);
207+
return NULL;
208+
}
209+
210+
/*
211+
* vi:tabstop=4 shiftwidth=4 expandtab
212+
*/

src/modules/resource/drainset.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/************************************************************\
2+
* Copyright 2024 Lawrence Livermore National Security, LLC
3+
* (c.f. AUTHORS, NOTICE.LLNS, COPYING)
4+
*
5+
* This file is part of the Flux resource manager framework.
6+
* For details, see https://github.com/flux-framework.
7+
*
8+
* SPDX-License-Identifier: LGPL-3.0
9+
\************************************************************/
10+
11+
#ifndef _FLUX_RESOURCE_DRAINSET_H
12+
#define _FLUX_RESOURCE_DRAINSET_H
13+
14+
#include <stdbool.h>
15+
#include <jansson.h>
16+
17+
#include <flux/core.h>
18+
#include <flux/idset.h>
19+
20+
struct drainset * drainset_create (void);
21+
void drainset_destroy (struct drainset *dset);
22+
23+
int drainset_drain_rank (struct drainset *dset,
24+
unsigned int rank,
25+
double timestamp,
26+
const char *reason);
27+
28+
json_t *drainset_to_json (struct drainset *dset);
29+
30+
#endif /* ! _FLUX_RESOURCE_DRAINSET_H */
31+
32+
/*
33+
* vi:tabstop=4 shiftwidth=4 expandtab
34+
*/

0 commit comments

Comments
 (0)