Skip to content

Commit dac645b

Browse files
anakryikoAlexei Starovoitov
authored andcommitted
libbpf: use stable map placeholder FDs
Move map creation to later during BPF object loading by pre-creating stable placeholder FDs (utilizing memfd_create()). Use dup2() syscall to then atomically make those placeholder FDs point to real kernel BPF map objects. This change allows to delay BPF map creation to after all the BPF program relocations. That, in turn, allows to delay BTF finalization and loading into kernel to after all the relocations as well. We'll take advantage of the latter in subsequent patches to allow libbpf to adjust BTF in a way that helps with BPF global function usage. Clean up a few places where we close map->fd, which now shouldn't happen, because map->fd should be a valid FD regardless of whether map was created or not. Surprisingly and nicely it simplifies a bunch of error handling code. If this change doesn't backfire, I'm tempted to pre-create such stable FDs for other entities (progs, maybe even BTF). We previously did some manipulations to make gen_loader work with fake map FDs, with stable map FDs this hack is not necessary for maps (we still have it for BTF, but I left it as is for now). Acked-by: Jiri Olsa <[email protected]> Signed-off-by: Andrii Nakryiko <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent f08c18e commit dac645b

File tree

2 files changed

+77
-38
lines changed

2 files changed

+77
-38
lines changed

tools/lib/bpf/libbpf.c

Lines changed: 63 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1503,6 +1503,16 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
15031503
return ERR_PTR(-ENOENT);
15041504
}
15051505

1506+
static int create_placeholder_fd(void)
1507+
{
1508+
int fd;
1509+
1510+
fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
1511+
if (fd < 0)
1512+
return -errno;
1513+
return fd;
1514+
}
1515+
15061516
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
15071517
{
15081518
struct bpf_map *map;
@@ -1515,7 +1525,21 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
15151525

15161526
map = &obj->maps[obj->nr_maps++];
15171527
map->obj = obj;
1518-
map->fd = -1;
1528+
/* Preallocate map FD without actually creating BPF map just yet.
1529+
* These map FD "placeholders" will be reused later without changing
1530+
* FD value when map is actually created in the kernel.
1531+
*
1532+
* This is useful to be able to perform BPF program relocations
1533+
* without having to create BPF maps before that step. This allows us
1534+
* to finalize and load BTF very late in BPF object's loading phase,
1535+
* right before BPF maps have to be created and BPF programs have to
1536+
* be loaded. By having these map FD placeholders we can perform all
1537+
* the sanitizations, relocations, and any other adjustments before we
1538+
* start creating actual BPF kernel objects (BTF, maps, progs).
1539+
*/
1540+
map->fd = create_placeholder_fd();
1541+
if (map->fd < 0)
1542+
return ERR_PTR(map->fd);
15191543
map->inner_map_fd = -1;
15201544
map->autocreate = true;
15211545

@@ -2607,7 +2631,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
26072631
map->inner_map = calloc(1, sizeof(*map->inner_map));
26082632
if (!map->inner_map)
26092633
return -ENOMEM;
2610-
map->inner_map->fd = -1;
2634+
map->inner_map->fd = create_placeholder_fd();
2635+
if (map->inner_map->fd < 0)
2636+
return map->inner_map->fd;
26112637
map->inner_map->sec_idx = sec_idx;
26122638
map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
26132639
if (!map->inner_map->name)
@@ -4549,14 +4575,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
45494575
goto err_free_new_name;
45504576
}
45514577

4552-
err = zclose(map->fd);
4553-
if (err) {
4554-
err = -errno;
4555-
goto err_close_new_fd;
4556-
}
4578+
err = reuse_fd(map->fd, new_fd);
4579+
if (err)
4580+
goto err_free_new_name;
4581+
45574582
free(map->name);
45584583

4559-
map->fd = new_fd;
45604584
map->name = new_name;
45614585
map->def.type = info.type;
45624586
map->def.key_size = info.key_size;
@@ -4570,8 +4594,6 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
45704594

45714595
return 0;
45724596

4573-
err_close_new_fd:
4574-
close(new_fd);
45754597
err_free_new_name:
45764598
free(new_name);
45774599
return libbpf_err(err);
@@ -5210,7 +5232,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
52105232
LIBBPF_OPTS(bpf_map_create_opts, create_attr);
52115233
struct bpf_map_def *def = &map->def;
52125234
const char *map_name = NULL;
5213-
int err = 0;
5235+
int err = 0, map_fd;
52145236

52155237
if (kernel_supports(obj, FEAT_PROG_NAME))
52165238
map_name = map->name;
@@ -5269,17 +5291,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
52695291
bpf_gen__map_create(obj->gen_loader, def->type, map_name,
52705292
def->key_size, def->value_size, def->max_entries,
52715293
&create_attr, is_inner ? -1 : map - obj->maps);
5272-
/* Pretend to have valid FD to pass various fd >= 0 checks.
5273-
* This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
5294+
/* We keep pretenting we have valid FD to pass various fd >= 0
5295+
* checks by just keeping original placeholder FDs in place.
5296+
* See bpf_object__add_map() comment.
5297+
* This placeholder fd will not be used with any syscall and
5298+
* will be reset to -1 eventually.
52745299
*/
5275-
map->fd = 0;
5300+
map_fd = map->fd;
52765301
} else {
5277-
map->fd = bpf_map_create(def->type, map_name,
5278-
def->key_size, def->value_size,
5279-
def->max_entries, &create_attr);
5302+
map_fd = bpf_map_create(def->type, map_name,
5303+
def->key_size, def->value_size,
5304+
def->max_entries, &create_attr);
52805305
}
5281-
if (map->fd < 0 && (create_attr.btf_key_type_id ||
5282-
create_attr.btf_value_type_id)) {
5306+
if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
52835307
char *cp, errmsg[STRERR_BUFSIZE];
52845308

52855309
err = -errno;
@@ -5291,21 +5315,31 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
52915315
create_attr.btf_value_type_id = 0;
52925316
map->btf_key_type_id = 0;
52935317
map->btf_value_type_id = 0;
5294-
map->fd = bpf_map_create(def->type, map_name,
5295-
def->key_size, def->value_size,
5296-
def->max_entries, &create_attr);
5318+
map_fd = bpf_map_create(def->type, map_name,
5319+
def->key_size, def->value_size,
5320+
def->max_entries, &create_attr);
52975321
}
52985322

5299-
err = map->fd < 0 ? -errno : 0;
5300-
53015323
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
53025324
if (obj->gen_loader)
53035325
map->inner_map->fd = -1;
53045326
bpf_map__destroy(map->inner_map);
53055327
zfree(&map->inner_map);
53065328
}
53075329

5308-
return err;
5330+
if (map_fd < 0)
5331+
return map_fd;
5332+
5333+
/* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5334+
if (map->fd == map_fd)
5335+
return 0;
5336+
5337+
/* Keep placeholder FD value but now point it to the BPF map object.
5338+
* This way everything that relied on this map's FD (e.g., relocated
5339+
* ldimm64 instructions) will stay valid and won't need adjustments.
5340+
* map->fd stays valid but now point to what map_fd points to.
5341+
*/
5342+
return reuse_fd(map->fd, map_fd);
53095343
}
53105344

53115345
static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
@@ -5389,10 +5423,8 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj)
53895423
continue;
53905424

53915425
err = init_prog_array_slots(obj, map);
5392-
if (err < 0) {
5393-
zclose(map->fd);
5426+
if (err < 0)
53945427
return err;
5395-
}
53965428
}
53975429
return 0;
53985430
}
@@ -5483,25 +5515,20 @@ bpf_object__create_maps(struct bpf_object *obj)
54835515

54845516
if (bpf_map__is_internal(map)) {
54855517
err = bpf_object__populate_internal_map(obj, map);
5486-
if (err < 0) {
5487-
zclose(map->fd);
5518+
if (err < 0)
54885519
goto err_out;
5489-
}
54905520
}
54915521

54925522
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
54935523
err = init_map_in_map_slots(obj, map);
5494-
if (err < 0) {
5495-
zclose(map->fd);
5524+
if (err < 0)
54965525
goto err_out;
5497-
}
54985526
}
54995527
}
55005528

55015529
if (map->pin_path && !map->pinned) {
55025530
err = bpf_map__pin(map, NULL);
55035531
if (err) {
5504-
zclose(map->fd);
55055532
if (!retried && err == -EEXIST) {
55065533
retried = true;
55075534
goto retry;
@@ -8075,8 +8102,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
80758102
err = err ? : bpf_object__sanitize_and_load_btf(obj);
80768103
err = err ? : bpf_object__sanitize_maps(obj);
80778104
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8078-
err = err ? : bpf_object__create_maps(obj);
80798105
err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8106+
err = err ? : bpf_object__create_maps(obj);
80808107
err = err ? : bpf_object__load_progs(obj, extra_log_level);
80818108
err = err ? : bpf_object_init_prog_arrays(obj);
80828109
err = err ? : bpf_object_prepare_struct_ops(obj);
@@ -8085,8 +8112,6 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
80858112
/* reset FDs */
80868113
if (obj->btf)
80878114
btf__set_fd(obj->btf, -1);
8088-
for (i = 0; i < obj->nr_maps; i++)
8089-
obj->maps[i].fd = -1;
80908115
if (!err)
80918116
err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
80928117
}

tools/lib/bpf/libbpf_internal.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,20 @@ static inline int ensure_good_fd(int fd)
555555
return fd;
556556
}
557557

558+
/* Point *fixed_fd* to the same file that *tmp_fd* points to.
559+
* Regardless of success, *tmp_fd* is closed.
560+
* Whatever *fixed_fd* pointed to is closed silently.
561+
*/
562+
static inline int reuse_fd(int fixed_fd, int tmp_fd)
563+
{
564+
int err;
565+
566+
err = dup2(tmp_fd, fixed_fd);
567+
err = err < 0 ? -errno : 0;
568+
close(tmp_fd); /* clean up temporary FD */
569+
return err;
570+
}
571+
558572
/* The following two functions are exposed to bpftool */
559573
int bpf_core_add_cands(struct bpf_core_cand *local_cand,
560574
size_t local_essent_len,

0 commit comments

Comments
 (0)