Skip to content

Commit fadc55e

Browse files
committed
nsenter: implement a two-stage join for setns
If we are running with privileges and are asked to join an externally created user namespaces as well as some other namespace that was *not* created underneath said user namespace, the approach we added in commit 2cd9c31 ("nsenter: guarantee correct user namespace ordering") doesn't work. While in theory you would want all externally created namespaces to be sane, it seems that some tools really do create unrelated namespaces and ask us to join them. Luckily we can just loosely copy what nsenter(1) appears to do -- we first try to join any namespaces we can (with host root privileges), then we join any user namespaces, and then we join any remaining namespaces (now with the user namespace's privileges). Note that we *do not* have to try to join namespaces after we create our own user namespace. Namespace permissions are based purely on the owning user namespace (not the rootuid) so we will not have access to any extra namespaces once we unshare(CLONE_NEWUSER) (in fact we will not be able to setns(2) to anything!). Fixes: 2cd9c31 ("nsenter: guarantee correct user namespace ordering") Signed-off-by: Aleksa Sarai <[email protected]>
1 parent a97d7cb commit fadc55e

File tree

1 file changed

+137
-29
lines changed

1 file changed

+137
-29
lines changed

libcontainer/nsenter/nsexec.c

Lines changed: 137 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,14 @@ void nl_free(struct nlconfig_t *config)
420420
free(config->data);
421421
}
422422

423+
struct namespace_t {
424+
int fd;
425+
char type[PATH_MAX];
426+
char path[PATH_MAX];
427+
};
428+
429+
typedef int nsset_t;
430+
423431
static struct nstype_t {
424432
int type;
425433
char *name;
@@ -451,35 +459,28 @@ static int nstype(char *name)
451459
bail("unknown namespace type %s", name);
452460
}
453461

454-
void join_namespaces(char *nslist)
462+
static nsset_t __open_namespaces(char *nsspec, struct namespace_t **ns_list, size_t *ns_len)
455463
{
456-
int num = 0, i;
457-
char *saveptr = NULL;
458-
char *namespace = strtok_r(nslist, ",", &saveptr);
459-
struct namespace_t {
460-
int fd;
461-
char type[PATH_MAX];
462-
char path[PATH_MAX];
463-
} *namespaces = NULL;
464+
int len = 0;
465+
nsset_t ns_to_join = 0;
466+
char *namespace, *saveptr = NULL;
467+
struct namespace_t *namespaces = NULL;
468+
469+
namespace = strtok_r(nsspec, ",", &saveptr);
464470

465-
if (!namespace || !strlen(namespace) || !strlen(nslist))
471+
if (!namespace || !strlen(namespace) || !strlen(nsspec))
466472
bail("ns paths are empty");
467473

468-
/*
469-
* We have to open the file descriptors first, since after
470-
* we join the mnt namespace we might no longer be able to
471-
* access the paths.
472-
*/
473474
do {
474475
int fd;
475476
char *path;
476477
struct namespace_t *ns;
477478

478479
/* Resize the namespace array. */
479-
namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t));
480+
namespaces = realloc(namespaces, ++len * sizeof(struct namespace_t));
480481
if (!namespaces)
481482
bail("failed to reallocate namespace array");
482-
ns = &namespaces[num - 1];
483+
ns = &namespaces[len - 1];
483484

484485
/* Split 'ns:path'. */
485486
path = strstr(namespace, ":");
@@ -495,22 +496,43 @@ void join_namespaces(char *nslist)
495496
strncpy(ns->type, namespace, PATH_MAX - 1);
496497
strncpy(ns->path, path, PATH_MAX - 1);
497498
ns->path[PATH_MAX - 1] = '\0';
499+
500+
ns_to_join |= nstype(ns->type);
498501
} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
499502

500-
/*
501-
* The ordering in which we join namespaces is important. We should
502-
* always join the user namespace *first*. This is all guaranteed
503-
* from the container_linux.go side of this, so we're just going to
504-
* follow the order given to us.
505-
*/
503+
*ns_list = namespaces;
504+
*ns_len = len;
505+
return ns_to_join;
506+
}
506507

507-
for (i = 0; i < num; i++) {
508-
struct namespace_t *ns = &namespaces[i];
509-
int type = nstype(ns->type);
508+
/*
509+
* Try to join all namespaces that are in the "allow" nsset, and return the
510+
* set we were able to successfully join. If a permission error is returned
511+
* from nsset(2), the namespace is skipped (non-permission errors are fatal).
512+
*/
513+
static nsset_t __join_namespaces(nsset_t allow, struct namespace_t *ns_list, size_t ns_len)
514+
{
515+
nsset_t joined = 0;
510516

511-
write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", type, ns->type, ns->path);
512-
if (setns(ns->fd, type) < 0)
517+
for (size_t i = 0; i < ns_len; i++) {
518+
struct namespace_t *ns = &ns_list[i];
519+
int type = nstype(ns->type);
520+
int err, saved_errno;
521+
522+
if (!(type & allow))
523+
continue;
524+
525+
err = setns(ns->fd, type);
526+
saved_errno = errno;
527+
write_log(DEBUG, "setns(%#x) into %s namespace (with path %s): %s",
528+
type, ns->type, ns->path, strerror(errno));
529+
if (err < 0) {
530+
/* Skip permission errors. */
531+
if (saved_errno == EPERM)
532+
continue;
513533
bail("failed to setns into %s namespace", ns->type);
534+
}
535+
joined |= type;
514536

515537
/*
516538
* If we change user namespaces, make sure we switch to root in the
@@ -524,9 +546,95 @@ void join_namespaces(char *nslist)
524546
}
525547

526548
close(ns->fd);
549+
ns->fd = -1;
527550
}
551+
return joined;
552+
}
553+
554+
static char *strappend(char *dst, char *src)
555+
{
556+
if (!dst)
557+
return strdup(src);
558+
559+
size_t len = strlen(dst) + strlen(src) + 1;
560+
dst = realloc(dst, len);
561+
strncat(dst, src, len);
562+
return dst;
563+
}
564+
565+
static char *nsset_to_str(nsset_t nsset)
566+
{
567+
char *str = NULL;
568+
for (struct nstype_t * ns = all_ns_types; ns->name != NULL; ns++) {
569+
if (ns->type & nsset) {
570+
if (str)
571+
str = strappend(str, ", ");
572+
str = strappend(str, ns->name);
573+
}
574+
}
575+
return str ? : strdup("");
576+
}
577+
578+
static void __close_namespaces(nsset_t to_join, nsset_t joined, struct namespace_t *ns_list, size_t ns_len)
579+
{
580+
/* We expect to have joined every namespace. */
581+
nsset_t failed_to_join = to_join & ~joined;
582+
583+
/* Double-check that we used up (and thus joined) all of the nsfds. */
584+
for (size_t i = 0; i < ns_len; i++) {
585+
struct namespace_t *ns = &ns_list[i];
586+
int type = nstype(ns->type);
587+
588+
if (ns->fd < 0)
589+
continue;
590+
591+
failed_to_join |= type;
592+
write_log(FATAL, "failed to setns(%#x) into %s namespace (with path %s): %s",
593+
type, ns->type, ns->path, strerror(EPERM));
594+
close(ns->fd);
595+
ns->fd = -1;
596+
}
597+
598+
/* Make sure we joined the namespaces we planned to. */
599+
if (failed_to_join)
600+
bail("failed to join {%s} namespaces: %s", nsset_to_str(failed_to_join), strerror(EPERM));
601+
602+
free(ns_list);
603+
}
604+
605+
void join_namespaces(char *nsspec)
606+
{
607+
nsset_t to_join = 0, joined = 0;
608+
struct namespace_t *ns_list;
609+
size_t ns_len;
610+
611+
/*
612+
* We have to open the file descriptors first, since after we join the
613+
* mnt or user namespaces we might no longer be able to access the
614+
* paths.
615+
*/
616+
to_join = __open_namespaces(nsspec, &ns_list, &ns_len);
617+
618+
/*
619+
* We first try to join all non-userns namespaces to join any namespaces
620+
* that we might not be able to join once we switch credentials to the
621+
* container's userns. We then join the user namespace, and then try to
622+
* join any remaining namespaces (this last step is needed for rootless
623+
* containers -- we don't get setns(2) permissions until we join the userns
624+
* and get CAP_SYS_ADMIN).
625+
*
626+
* Splitting the joins this way is necessary for containers that are
627+
* configured to join some externally-created namespace but are also
628+
* configured to join an unrelated user namespace.
629+
*
630+
* This is similar to what nsenter(1) seems to do in practice.
631+
*/
632+
joined |= __join_namespaces(to_join & ~(joined | CLONE_NEWUSER), ns_list, ns_len);
633+
joined |= __join_namespaces(CLONE_NEWUSER, ns_list, ns_len);
634+
joined |= __join_namespaces(to_join & ~(joined | CLONE_NEWUSER), ns_list, ns_len);
528635

529-
free(namespaces);
636+
/* Verify that we joined all of the namespaces. */
637+
__close_namespaces(to_join, joined, ns_list, ns_len);
530638
}
531639

532640
static inline int sane_kill(pid_t pid, int signum)

0 commit comments

Comments
 (0)