Skip to content

Commit 1406b99

Browse files
frankdavidIDX GitHub Automation
andauthored
feat: Add command to manually swap the GuestOS alternative (#9154)
We already have an automatic mechanism that rolls back the GuestOS to the other boot alternative if the GuestOS boot fails. This PR adds a manual mechanism that node operators can use to swap the boot alternative manually. The new commands are available through the limited host console: ``` guestos-alternative show guestos-alternative swap guestos-alternative swap A ``` --------- Co-authored-by: IDX GitHub Automation <infra+github-automation@dfinity.org>
1 parent 3341ebe commit 1406b99

File tree

14 files changed

+376
-24
lines changed

14 files changed

+376
-24
lines changed

Cargo.lock

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ic-os/components/hostos/misc/limited-console

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ show_menu() {
2222
echo " ip-addresses - Show all IP addresses"
2323
echo " ping-gateway - Test connectivity to IPv6 gateway"
2424
echo " routes - Show routing table"
25+
echo " guestos-alternative - Show or swap the GuestOS boot alternative (A/B)"
2526
echo " manual-recovery - Manual node recovery (ONLY FOR EMERGENCY USE)"
2627
echo " rbash-console - Drop into restricted bash console"
2728
echo " clear - Clear the console"
@@ -119,6 +120,21 @@ execute_command() {
119120
echo "IPv6 routes:"
120121
/bin/ip -6 route show
121122
;;
123+
"guestos-alternative")
124+
case "${2:-}" in
125+
"show")
126+
/opt/ic/bin/hostos_tool guestos-alternative show
127+
;;
128+
"swap")
129+
sudo /opt/ic/bin/hostos_tool guestos-alternative swap ${3:+"$3"}
130+
;;
131+
*)
132+
echo "Usage: guestos-alternative show|swap [A|B]"
133+
echo " show - Show the current GuestOS boot alternative"
134+
echo " swap [A|B]- Swap GuestOS boot alternative (defaults to opposite of current)"
135+
;;
136+
esac
137+
;;
122138
"manual-recovery")
123139
echo "=== Manual Node Recovery ==="
124140
echo "This will perform a manual node recovery. Do not attempt this unless you are certain it is appropriate."

ic-os/components/hostos/misc/sudoers

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,7 @@ root ALL=(ALL:ALL) NOPASSWD:ALL
3232
# Allow limited-console to run recovery via the secure launcher script
3333
limited-console ALL=(ALL:ALL) NOPASSWD: /opt/ic/bin/guestos-recovery-launcher.sh mode=* version=* recovery-hash-prefix=*
3434

35+
# Allow limited-console to swap the GuestOS boot alternative
36+
limited-console ALL=(ALL:ALL) NOPASSWD: /opt/ic/bin/hostos_tool guestos-alternative swap *
37+
3538
# See sudoers(5) for more information on "#include" directives:

rs/ic_os/boot/grub/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package(default_visibility = ["//rs:ic-os-pkg"])
55
DEPENDENCIES = [
66
# Keep sorted.
77
"//rs/sys",
8+
"@crate_index//:clap",
89
"@crate_index//:regex",
910
"@crate_index//:strum",
1011
"@crate_index//:thiserror",

rs/ic_os/boot/grub/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ version = "1.0.0"
44
edition.workspace = true
55

66
[dependencies]
7+
clap = { workspace = true }
78
ic-sys = { path = "../../../sys" }
89
regex = { workspace = true }
910
strum = { workspace = true }

rs/ic_os/boot/grub/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ use thiserror::Error;
99

1010
const GRUB_ENV_SIZE: usize = 1024;
1111

12-
#[derive(Debug, Eq, PartialEq, Clone, Copy, EnumString, Display)]
12+
#[derive(Debug, Eq, PartialEq, Clone, Copy, EnumString, Display, clap::ValueEnum)]
1313
pub enum BootAlternative {
1414
// Bash scripts depend on the string representations, be very careful if you want to change them
1515
#[strum(serialize = "A")]
16+
#[clap(name = "A")]
1617
A,
1718
#[strum(serialize = "B")]
19+
#[clap(name = "B")]
1820
B,
1921
}
2022

rs/ic_os/device/src/mount.rs

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use gpt::GptDisk;
66
use std::fs::File;
77
use std::path::{Path, PathBuf};
88
#[cfg(target_os = "linux")]
9-
use sys_mount::{FilesystemType, Mount, Unmount, UnmountFlags};
9+
use sys_mount::{FilesystemType, Mount, MountFlags, Unmount, UnmountFlags};
1010
use tempfile::TempDir;
1111
use uuid::Uuid;
1212

@@ -88,6 +88,7 @@ impl FileSystem {
8888
#[derive(Copy, Clone)]
8989
pub struct MountOptions {
9090
pub file_system: FileSystem,
91+
pub read_only: bool,
9192
}
9293

9394
/// Represents a mounted partition with access to its filesystem.
@@ -187,10 +188,13 @@ impl PartitionProvider for UdevPartitionProvider {
187188
);
188189

189190
let tempdir = TempDir::new()?;
191+
let mut builder =
192+
Mount::builder().fstype(FilesystemType::Manual(options.file_system.as_str()));
193+
if options.read_only {
194+
builder = builder.flags(MountFlags::RDONLY);
195+
}
190196
Ok(Box::new(TempDeviceMount {
191-
mount: Mount::builder()
192-
.fstype(FilesystemType::Manual(options.file_system.as_str()))
193-
.mount(device_path, &tempdir)?,
197+
mount: builder.mount(device_path, &tempdir)?,
194198
_loop_device: None,
195199
_tempdir: tempdir,
196200
}))
@@ -252,14 +256,17 @@ impl Mounter for LoopDeviceMounter {
252256

253257
// Sometimes the mount can fail with EIO when udev is not ready yet
254258
let mount = retry_if_io_error(nix::Error::EIO, || {
255-
Mount::builder()
256-
.fstype(FilesystemType::Manual(options.file_system.as_str()))
257-
.mount(
258-
loop_device
259-
.path()
260-
.ok_or_else(|| std::io::Error::other("Loop device has no path"))?,
261-
mount_point,
262-
)
259+
let mut builder =
260+
Mount::builder().fstype(FilesystemType::Manual(options.file_system.as_str()));
261+
if options.read_only {
262+
builder = builder.flags(MountFlags::RDONLY);
263+
}
264+
builder.mount(
265+
loop_device
266+
.path()
267+
.ok_or_else(|| std::io::Error::other("Loop device has no path"))?,
268+
mount_point,
269+
)
263270
})
264271
.context("Failed to create mount")?;
265272

@@ -451,6 +458,7 @@ mod tests {
451458
0,
452459
MountOptions {
453460
file_system: FileSystem::Ext4,
461+
read_only: true,
454462
},
455463
)
456464
.unwrap();

rs/ic_os/open_rootfs/src/recovery.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use sev_guest::attestation_package::generate_attestation_package;
1111
use sev_guest::firmware::SevGuestFirmware;
1212
use std::path::Path;
1313

14-
pub const CONFIG_PARTITION_LABEL: &str = "CONFIG";
14+
pub const CONFIG_DEVICE_LABEL: &str = "CONFIG";
1515
pub const RECOVERY_PROPOSAL_FILE_NAME: &str = "alternative_guestos_proposal.cbor";
1616

1717
/// Reads and verifies an alternative GuestOS proposal, returning the rootfs hash from it.
@@ -24,9 +24,10 @@ pub fn extract_and_verify_recovery_rootfs_hash(
2424
) -> Result<String> {
2525
let config_mount = partition_provider
2626
.mount_partition(
27-
PartitionSelector::ByLabel(CONFIG_PARTITION_LABEL.to_string()),
27+
PartitionSelector::ByLabel(CONFIG_DEVICE_LABEL.to_string()),
2828
MountOptions {
2929
file_system: FileSystem::Vfat,
30+
read_only: true,
3031
},
3132
)
3233
.context("Failed to mount CONFIG partition")?;
@@ -46,6 +47,7 @@ pub fn extract_and_verify_recovery_rootfs_hash(
4647
PartitionSelector::ByUuid(get_boot_partition_uuid(root_device, command_runner)?),
4748
MountOptions {
4849
file_system: FileSystem::Ext4,
50+
read_only: false, // Partition may need repair
4951
},
5052
)?;
5153

rs/ic_os/open_rootfs/src/tests.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::partitions::{A_BOOT_UUID, A_ROOT_UUID, B_BOOT_UUID, B_ROOT_UUID};
2-
use crate::recovery::{CONFIG_PARTITION_LABEL, RECOVERY_PROPOSAL_FILE_NAME};
2+
use crate::recovery::{CONFIG_DEVICE_LABEL, RECOVERY_PROPOSAL_FILE_NAME};
33
use anyhow::{Context, Result};
44
use candid::Encode;
55
use command_runner::MockCommandRunner;
@@ -72,7 +72,7 @@ impl TestFixture {
7272
Arc::new(TempDir::with_prefix("b_boot").unwrap()),
7373
);
7474
partitions.insert(
75-
PartitionSelector::ByLabel(CONFIG_PARTITION_LABEL.to_string()),
75+
PartitionSelector::ByLabel(CONFIG_DEVICE_LABEL.to_string()),
7676
config_media,
7777
);
7878

@@ -222,9 +222,7 @@ impl TestFixture {
222222
// Write NNS public key override to CONFIG media
223223
fs::write(
224224
self.partition_provider
225-
.get_partition(PartitionSelector::ByLabel(
226-
CONFIG_PARTITION_LABEL.to_string(),
227-
))
225+
.get_partition(PartitionSelector::ByLabel(CONFIG_DEVICE_LABEL.to_string()))
228226
.unwrap()
229227
.join("nns_public_key_override.pem"),
230228
&nns_public_key,
@@ -440,9 +438,7 @@ fn test_nns_root_key_mismatch() {
440438
fs::remove_file(
441439
fixture
442440
.partition_provider
443-
.get_partition(PartitionSelector::ByLabel(
444-
CONFIG_PARTITION_LABEL.to_string(),
445-
))
441+
.get_partition(PartitionSelector::ByLabel(CONFIG_DEVICE_LABEL.to_string()))
446442
.unwrap()
447443
.join("nns_public_key_override.pem"),
448444
)

rs/ic_os/os_tools/guest_vm_runner/src/guest_direct_boot.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ pub async fn prepare_direct_boot(
7676
PartitionSelector::ByUuid(GRUB_PARTITION_UUID),
7777
MountOptions {
7878
file_system: GRUB_PARTITION_FS,
79+
read_only: false, // Partition may need repair
7980
},
8081
)
8182
.context("Could not mount grub partition")?;
@@ -97,6 +98,8 @@ pub async fn prepare_direct_boot(
9798
boot_alternative = boot_alternative.get_opposite();
9899
}
99100

101+
println!("Will boot into {boot_alternative} from {guest_vm_type:?} GuestVM");
102+
100103
// The variable name inside 'boot_args' that contains the kernel command line parameters.
101104
// Note that this depends on the boot alternative since they contain the root partition and
102105
// other boot alternative-specific parameters.
@@ -110,6 +113,7 @@ pub async fn prepare_direct_boot(
110113
PartitionSelector::ByUuid(boot_partition_uuid),
111114
MountOptions {
112115
file_system: BOOT_PARTITION_FS,
116+
read_only: false, // Partition may need repair
113117
},
114118
)
115119
.with_context(|| format!("Could not mount boot partition {boot_alternative}"))?;

0 commit comments

Comments
 (0)