@@ -3080,16 +3080,23 @@ main (int argc,
30803080
30813081 if (mkdir ("oldroot" , 0755 ))
30823082 die_with_error ("Creating oldroot failed" );
3083- #if 0
3083+
3084+ #define ROOT_MODE_CHROOT 0
3085+ #define ROOT_MODE_PIVOT 1
3086+
3087+ int root_mode = ROOT_MODE_PIVOT ;
30843088 if (pivot_root (base_path , "oldroot" ))
3085- die_with_error ("pivot_root" );
3086- #else
3087- if (mount ("/" , "oldroot" , NULL , MS_SILENT | MS_MGC_VAL | MS_BIND | MS_REC , NULL ) < 0 )
3088- die_with_error ("setting up newroot bind" );
3089+ {
3090+ warn ("pivot_root() failed, falling back to chroot() with limitations" );
3091+ root_mode = ROOT_MODE_CHROOT ;
3092+
3093+ if (mount ("/" , "oldroot" , NULL , MS_SILENT | MS_MGC_VAL | MS_BIND | MS_REC , NULL ) < 0 )
3094+ die_with_error ("setting up newroot bind" );
3095+
3096+ if (chroot (base_path ))
3097+ die_with_error ("chroot" );
3098+ }
30893099
3090- if (chroot (base_path ))
3091- die_with_error ("chroot" );
3092- #endif
30933100 if (chdir ("/" ) != 0 )
30943101 die_with_error ("chdir / (base path)" );
30953102
@@ -3152,38 +3159,40 @@ main (int argc,
31523159
31533160 if (umount2 ("oldroot" , MNT_DETACH ))
31543161 die_with_error ("unmount old root" );
3155- #if 0
3162+
31563163 /* This is our second pivot. It's like we're a Silicon Valley startup flush
31573164 * with cash but short on ideas!
31583165 *
31593166 * We're aiming to make /newroot the real root, and get rid of /oldroot. To do
31603167 * that we need a temporary place to store it before we can unmount it.
31613168 */
3162- { cleanup_fd int oldrootfd = open ("/" , O_DIRECTORY | O_RDONLY );
3163- if (oldrootfd < 0 )
3164- die_with_error ("can't open /" );
3165- if (chdir ("/newroot" ) != 0 )
3166- die_with_error ("chdir /newroot" );
3167- /* While the documentation claims that put_old must be underneath
3168- * new_root, it is perfectly fine to use the same directory as the
3169- * kernel checks only if old_root is accessible from new_root.
3170- *
3171- * Both runc and LXC are using this "alternative" method for
3172- * setting up the root of the container:
3173- *
3174- * https://github.com/opencontainers/runc/blob/HEAD/libcontainer/rootfs_linux.go#L671
3175- * https://github.com/lxc/lxc/blob/HEAD/src/lxc/conf.c#L1121
3176- */
3177- if (pivot_root ("." , "." ) != 0 )
3178- die_with_error ("pivot_root(/newroot)" );
3179- if (fchdir (oldrootfd ) < 0 )
3180- die_with_error ("fchdir to oldroot" );
3181- if (umount2 ("." , MNT_DETACH ) < 0 )
3182- die_with_error ("umount old root" );
3183- if (chdir ("/" ) != 0 )
3184- die_with_error ("chdir /" );
3185- }
3186- #endif
3169+ if (root_mode == ROOT_MODE_PIVOT )
3170+ {
3171+ cleanup_fd int oldrootfd = open ("/" , O_DIRECTORY | O_RDONLY );
3172+ if (oldrootfd < 0 )
3173+ die_with_error ("can't open /" );
3174+ if (chdir ("/newroot" ) != 0 )
3175+ die_with_error ("chdir /newroot" );
3176+ /* While the documentation claims that put_old must be underneath
3177+ * new_root, it is perfectly fine to use the same directory as the
3178+ * kernel checks only if old_root is accessible from new_root.
3179+ *
3180+ * Both runc and LXC are using this "alternative" method for
3181+ * setting up the root of the container:
3182+ *
3183+ * https://github.com/opencontainers/runc/blob/HEAD/libcontainer/rootfs_linux.go#L671
3184+ * https://github.com/lxc/lxc/blob/HEAD/src/lxc/conf.c#L1121
3185+ */
3186+ if (pivot_root ("." , "." ) != 0 )
3187+ die_with_error ("pivot_root(/newroot)" );
3188+ if (fchdir (oldrootfd ) < 0 )
3189+ die_with_error ("fchdir to oldroot" );
3190+ if (umount2 ("." , MNT_DETACH ) < 0 )
3191+ die_with_error ("umount old root" );
3192+ if (chdir ("/" ) != 0 )
3193+ die_with_error ("chdir /" );
3194+ }
3195+
31873196 if (opt_userns2_fd > 0 && setns (opt_userns2_fd , CLONE_NEWUSER ) != 0 )
31883197 die_with_error ("Setting userns2 failed" );
31893198
@@ -3215,17 +3224,20 @@ main (int argc,
32153224 if (write_to_fd (sysctl_fd , "1" , 1 ) < 0 )
32163225 die_with_error ("sysctl user.max_user_namespaces = 1" );
32173226 }
3218- #if 0
3219- if (unshare (CLONE_NEWUSER ))
3220- die_with_error ("unshare user ns" );
3221- #endif
3227+
3228+ if (root_mode == ROOT_MODE_PIVOT )
3229+ {
3230+ if (unshare (CLONE_NEWUSER ))
3231+ die_with_error ("unshare user ns" );
3232+ }
3233+
32223234 /* We're in a new user namespace, we got back the bounding set, clear it again */
32233235 drop_cap_bounding_set (FALSE);
3224- #if 0
3225- write_uid_gid_map ( opt_sandbox_uid , ns_uid ,
3226- opt_sandbox_gid , ns_gid ,
3227- -1 , FALSE, FALSE);
3228- #endif
3236+
3237+ if ( root_mode == ROOT_MODE_PIVOT )
3238+ write_uid_gid_map ( opt_sandbox_uid , ns_uid ,
3239+ opt_sandbox_gid , ns_gid ,
3240+ -1 , FALSE, FALSE);
32293241 }
32303242
32313243 if (opt_disable_userns || opt_assert_userns_disabled )
@@ -3236,15 +3248,18 @@ main (int argc,
32363248 if (res == 0 )
32373249 die ("creation of new user namespaces was not disabled as requested" );
32383250 }
3239- #if 1
3240- /* Now make /newroot the real root */
3241- if (chdir ("/newroot" ) != 0 )
3242- die_with_error ("chdir newroot" );
3243- if (chroot ("/newroot" ) != 0 )
3244- die_with_error ("chroot /newroot" );
3245- if (chdir ("/" ) != 0 )
3246- die_with_error ("chdir /" );
3247- #endif
3251+
3252+ if (root_mode == ROOT_MODE_CHROOT )
3253+ {
3254+ /* Now make /newroot the real root */
3255+ if (chdir ("/newroot" ) != 0 )
3256+ die_with_error ("chdir newroot" );
3257+ if (chroot ("/newroot" ) != 0 )
3258+ die_with_error ("chroot /newroot" );
3259+ if (chdir ("/" ) != 0 )
3260+ die_with_error ("chdir /" );
3261+ }
3262+
32483263 /* All privileged ops are done now, so drop caps we don't need */
32493264 drop_privs (!is_privileged , TRUE);
32503265
0 commit comments