Skip to content

Commit d0c454c

Browse files
committed
Add ostree-shutdown.service: hide /sysroot and make /etc read-only
We have a lot of bind mounts; these are usually set up in the initramfs. So far during shutdown we've let systemd just try to sort things out via auto-generated mount units i.e. `sysroot.mount` and `etc.mount` and so on. systemd has some special casing for `-.mount` (i.e. `/`) and `etc.mount` https://github.com/systemd/systemd/blob/e91bfad241799b449df73efc30d833b9c5937001/src/shared/fstab-util.c#L72 However it doesn't special case `/sysroot` - which is currently an ostree-specific invention (when used in the real root). We cannot actually unmount `/sysroot` while it's in use, and it is because `/etc` is a bind mount into it. And we can't tear down `/etc` because it's just expected that e.g. pid 1 and other things hold open references to it - until things finally transition into systemd-shutdown. What we can do though is explicitly detach it during the shutdown phase; this ensures that systemd won't try to clean it up then, suppressing errors about its inability to do so. While we're here, let's also remount `/etc` read-only; while systemd itself will try to do so during systemd-shutdown. Per comments if this service fails, it's a bug in something else to be fixed. Closes: #3513 Signed-off-by: Colin Walters <walters@verbum.org>
1 parent a5a52e0 commit d0c454c

File tree

4 files changed

+85
-0
lines changed

4 files changed

+85
-0
lines changed

Makefile-boot.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ endif
3838
if BUILDOPT_SYSTEMD
3939
systemdsystemunit_DATA = src/boot/ostree-prepare-root.service \
4040
src/boot/ostree-remount.service \
41+
src/boot/ostree-shutdown.service \
4142
src/boot/ostree-boot-complete.service \
4243
src/boot/ostree-finalize-staged.service \
4344
src/boot/ostree-finalize-staged-hold.service \
@@ -69,6 +70,7 @@ EXTRA_DIST += src/boot/dracut/module-setup.sh \
6970
src/boot/ostree-boot-complete.service \
7071
src/boot/ostree-prepare-root.service \
7172
src/boot/ostree-remount.service \
73+
src/boot/ostree-shutdown.service \
7274
src/boot/ostree-finalize-staged.service \
7375
src/boot/ostree-finalize-staged-hold.service \
7476
src/boot/ostree-state-overlay@.service \

src/boot/ostree-shutdown.service

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# SPDX-License-Identifier: LGPL-2.0+
2+
3+
[Unit]
4+
Description=OSTree Shutdown
5+
Documentation=man:ostree(1)
6+
DefaultDependencies=no
7+
# Only enabled via generator, but for good measure
8+
ConditionKernelCommandLine=ostree
9+
# Run after core mounts
10+
RequiresMountsFor=/etc /sysroot
11+
# However, we want to only shut down after `/var` has been umounted.
12+
# Since this runs via ExecStop, this Before= is actually After= at shutdown
13+
Before=var.mount
14+
Conflicts=umount.target
15+
Before=umount.target
16+
17+
[Service]
18+
Type=oneshot
19+
RemainAfterExit=yes
20+
ExecStop=/usr/lib/ostree/ostree-remount --shutdown
21+
22+
# No [Install] section - we're only enabled via generator

src/libostree/ostree-impl-system-generator.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha
108108
"local-fs.target.requires/ostree-remount.service")
109109
< 0)
110110
return glnx_throw_errno_prefix (error, "symlinkat");
111+
if (symlinkat (SYSTEM_DATA_UNIT_PATH "/ostree-shutdown.service", normal_dir_dfd,
112+
"local-fs.target.wants/ostree-shutdown.service")
113+
< 0)
114+
return glnx_throw_errno_prefix (error, "symlinkat");
111115

112116
if (!glnx_shutil_mkdir_p_at (normal_dir_dfd, "multi-user.target.wants", 0755, cancellable, error))
113117
return FALSE;

src/switchroot/ostree-remount.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@
4242
#include "ostree-mount-util.h"
4343
#include "otcore.h"
4444

45+
static gboolean opt_shutdown;
46+
47+
static GOptionEntry options[] = { { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &opt_shutdown,
48+
"Perform shutdown unmounting", NULL },
49+
{ NULL } };
50+
4551
static void
4652
do_remount (const char *target, bool writable)
4753
{
@@ -133,10 +139,61 @@ relabel_dir_for_upper (const char *upper_path, const char *real_path, gboolean i
133139
#endif
134140
}
135141

142+
// ostree-prepare-root sets things up so that /sysroot points to the "physical" (real) root in the
143+
// initramfs, and then with composefs `/` is an overlay+EROFS that holds references to content in
144+
// that physical filesystem.
145+
//
146+
// In a typical mutable system where the OS is in a mutable `/` (or `/usr), systemd explicitly
147+
// skips unmounting both `/` and `/usr`. It will remount them read-only though - and that's
148+
// the semantic we want to match here.
149+
static void
150+
do_shutdown (void)
151+
{
152+
const char *sysroot = "/sysroot";
153+
if (mount (sysroot, sysroot, NULL, MS_REMOUNT | MS_SILENT | MS_RDONLY, NULL) < 0)
154+
{
155+
// Hopefully at this point nothing has any write references, but if they
156+
// do we still want to continue.
157+
perror ("Remounting /sysroot read-only");
158+
}
159+
// And fully detach it from the mountns because otherwise systemd thinks
160+
// it can be unmounted, but it can't - it's required by `/` (and in a
161+
// composefs setup `/etc`) and possibly `/var`. Again, we only really
162+
// care that it got mounted read-only and hence outstanding data flushed.
163+
// A better fix in the future would be to teach systemd to honor `-.mount`
164+
// having a `Requires=sysroot.mount` meaning we can't unmount the latter.
165+
if (umount2 (sysroot, MNT_DETACH) < 0)
166+
err (EXIT_FAILURE, "umount(/sysroot)");
167+
168+
// And finally: /etc
169+
// NOTE! This one is intentionally last in that we want to try to make
170+
// this read-only, but if it fails, systemd-shutdown will have another
171+
// attempt after a process killing spree. If anything happens to be
172+
// holding a writable fd at this point, conceptually it would have
173+
// created race conditions vs ostree-finalize-staged.service, and so
174+
// having this service fail will be a signal that those things need
175+
// to be fixed.
176+
do_remount ("/etc", false);
177+
// Don't add anything else after this.
178+
}
179+
136180
int
137181
main (int argc, char *argv[])
138182
{
139183
g_autoptr (GError) error = NULL;
184+
g_autoptr (GOptionContext) context = g_option_context_new ("");
185+
g_option_context_add_main_entries (context, options, NULL);
186+
if (!g_option_context_parse (context, &argc, &argv, &error))
187+
errx (EXIT_FAILURE, "Error parsing options: %s", error->message);
188+
189+
// Handle the shutdown option
190+
if (opt_shutdown)
191+
{
192+
do_shutdown ();
193+
return 0;
194+
}
195+
// Otherwise fall through to the default startup
196+
140197
g_autoptr (GVariant) ostree_run_metadata_v = NULL;
141198
{
142199
glnx_autofd int fd = open (OTCORE_RUN_BOOTED, O_RDONLY | O_CLOEXEC);

0 commit comments

Comments
 (0)