@@ -420,6 +420,14 @@ void nl_free(struct nlconfig_t *config)
420420 free (config -> data );
421421}
422422
423+ struct namespace_t {
424+ int fd ;
425+ char type [PATH_MAX ];
426+ char path [PATH_MAX ];
427+ };
428+
429+ typedef int nsset_t ;
430+
423431static struct nstype_t {
424432 int type ;
425433 char * name ;
@@ -451,35 +459,28 @@ static int nstype(char *name)
451459 bail ("unknown namespace type %s" , name );
452460}
453461
454- void join_namespaces (char * nslist )
462+ static nsset_t __open_namespaces (char * nsspec , struct namespace_t * * ns_list , size_t * ns_len )
455463{
456- int num = 0 , i ;
457- char * saveptr = NULL ;
458- char * namespace = strtok_r (nslist , "," , & saveptr );
459- struct namespace_t {
460- int fd ;
461- char type [PATH_MAX ];
462- char path [PATH_MAX ];
463- } * namespaces = NULL ;
464+ int len = 0 ;
465+ nsset_t ns_to_join = 0 ;
466+ char * namespace , * saveptr = NULL ;
467+ struct namespace_t * namespaces = NULL ;
468+
469+ namespace = strtok_r (nsspec , "," , & saveptr );
464470
465- if (!namespace || !strlen (namespace ) || !strlen (nslist ))
471+ if (!namespace || !strlen (namespace ) || !strlen (nsspec ))
466472 bail ("ns paths are empty" );
467473
468- /*
469- * We have to open the file descriptors first, since after
470- * we join the mnt namespace we might no longer be able to
471- * access the paths.
472- */
473474 do {
474475 int fd ;
475476 char * path ;
476477 struct namespace_t * ns ;
477478
478479 /* Resize the namespace array. */
479- namespaces = realloc (namespaces , ++ num * sizeof (struct namespace_t ));
480+ namespaces = realloc (namespaces , ++ len * sizeof (struct namespace_t ));
480481 if (!namespaces )
481482 bail ("failed to reallocate namespace array" );
482- ns = & namespaces [num - 1 ];
483+ ns = & namespaces [len - 1 ];
483484
484485 /* Split 'ns:path'. */
485486 path = strstr (namespace , ":" );
@@ -495,22 +496,43 @@ void join_namespaces(char *nslist)
495496 strncpy (ns -> type , namespace , PATH_MAX - 1 );
496497 strncpy (ns -> path , path , PATH_MAX - 1 );
497498 ns -> path [PATH_MAX - 1 ] = '\0' ;
499+
500+ ns_to_join |= nstype (ns -> type );
498501 } while ((namespace = strtok_r (NULL , "," , & saveptr )) != NULL );
499502
500- /*
501- * The ordering in which we join namespaces is important. We should
502- * always join the user namespace *first*. This is all guaranteed
503- * from the container_linux.go side of this, so we're just going to
504- * follow the order given to us.
505- */
503+ * ns_list = namespaces ;
504+ * ns_len = len ;
505+ return ns_to_join ;
506+ }
506507
507- for (i = 0 ; i < num ; i ++ ) {
508- struct namespace_t * ns = & namespaces [i ];
509- int type = nstype (ns -> type );
508+ /*
509+ * Try to join all namespaces that are in the "allow" nsset, and return the
510+ * set we were able to successfully join. If a permission error is returned
511+ * from nsset(2), the namespace is skipped (non-permission errors are fatal).
512+ */
513+ static nsset_t __join_namespaces (nsset_t allow , struct namespace_t * ns_list , size_t ns_len )
514+ {
515+ nsset_t joined = 0 ;
510516
511- write_log (DEBUG , "setns(%#x) into %s namespace (with path %s)" , type , ns -> type , ns -> path );
512- if (setns (ns -> fd , type ) < 0 )
517+ for (size_t i = 0 ; i < ns_len ; i ++ ) {
518+ struct namespace_t * ns = & ns_list [i ];
519+ int type = nstype (ns -> type );
520+ int err , saved_errno ;
521+
522+ if (!(type & allow ))
523+ continue ;
524+
525+ err = setns (ns -> fd , type );
526+ saved_errno = errno ;
527+ write_log (DEBUG , "setns(%#x) into %s namespace (with path %s): %s" ,
528+ type , ns -> type , ns -> path , strerror (errno ));
529+ if (err < 0 ) {
530+ /* Skip permission errors. */
531+ if (saved_errno == EPERM )
532+ continue ;
513533 bail ("failed to setns into %s namespace" , ns -> type );
534+ }
535+ joined |= type ;
514536
515537 /*
516538 * If we change user namespaces, make sure we switch to root in the
@@ -524,9 +546,95 @@ void join_namespaces(char *nslist)
524546 }
525547
526548 close (ns -> fd );
549+ ns -> fd = -1 ;
527550 }
551+ return joined ;
552+ }
553+
554+ static char * strappend (char * dst , char * src )
555+ {
556+ if (!dst )
557+ return strdup (src );
558+
559+ size_t len = strlen (dst ) + strlen (src ) + 1 ;
560+ dst = realloc (dst , len );
561+ strncat (dst , src , len );
562+ return dst ;
563+ }
564+
565+ static char * nsset_to_str (nsset_t nsset )
566+ {
567+ char * str = NULL ;
568+ for (struct nstype_t * ns = all_ns_types ; ns -> name != NULL ; ns ++ ) {
569+ if (ns -> type & nsset ) {
570+ if (str )
571+ str = strappend (str , ", " );
572+ str = strappend (str , ns -> name );
573+ }
574+ }
575+ return str ? : strdup ("" );
576+ }
577+
578+ static void __close_namespaces (nsset_t to_join , nsset_t joined , struct namespace_t * ns_list , size_t ns_len )
579+ {
580+ /* We expect to have joined every namespace. */
581+ nsset_t failed_to_join = to_join & ~joined ;
582+
583+ /* Double-check that we used up (and thus joined) all of the nsfds. */
584+ for (size_t i = 0 ; i < ns_len ; i ++ ) {
585+ struct namespace_t * ns = & ns_list [i ];
586+ int type = nstype (ns -> type );
587+
588+ if (ns -> fd < 0 )
589+ continue ;
590+
591+ failed_to_join |= type ;
592+ write_log (FATAL , "failed to setns(%#x) into %s namespace (with path %s): %s" ,
593+ type , ns -> type , ns -> path , strerror (EPERM ));
594+ close (ns -> fd );
595+ ns -> fd = -1 ;
596+ }
597+
598+ /* Make sure we joined the namespaces we planned to. */
599+ if (failed_to_join )
600+ bail ("failed to join {%s} namespaces: %s" , nsset_to_str (failed_to_join ), strerror (EPERM ));
601+
602+ free (ns_list );
603+ }
604+
605+ void join_namespaces (char * nsspec )
606+ {
607+ nsset_t to_join = 0 , joined = 0 ;
608+ struct namespace_t * ns_list ;
609+ size_t ns_len ;
610+
611+ /*
612+ * We have to open the file descriptors first, since after we join the
613+ * mnt or user namespaces we might no longer be able to access the
614+ * paths.
615+ */
616+ to_join = __open_namespaces (nsspec , & ns_list , & ns_len );
617+
618+ /*
619+ * We first try to join all non-userns namespaces to join any namespaces
620+ * that we might not be able to join once we switch credentials to the
621+ * container's userns. We then join the user namespace, and then try to
622+ * join any remaining namespaces (this last step is needed for rootless
623+ * containers -- we don't get setns(2) permissions until we join the userns
624+ * and get CAP_SYS_ADMIN).
625+ *
626+ * Splitting the joins this way is necessary for containers that are
627+ * configured to join some externally-created namespace but are also
628+ * configured to join an unrelated user namespace.
629+ *
630+ * This is similar to what nsenter(1) seems to do in practice.
631+ */
632+ joined |= __join_namespaces (to_join & ~(joined | CLONE_NEWUSER ), ns_list , ns_len );
633+ joined |= __join_namespaces (CLONE_NEWUSER , ns_list , ns_len );
634+ joined |= __join_namespaces (to_join & ~(joined | CLONE_NEWUSER ), ns_list , ns_len );
528635
529- free (namespaces );
636+ /* Verify that we joined all of the namespaces. */
637+ __close_namespaces (to_join , joined , ns_list , ns_len );
530638}
531639
532640static inline int sane_kill (pid_t pid , int signum )
0 commit comments