From 50e5cd62aa93f4f8ce82845f10f1c410f837c07b Mon Sep 17 00:00:00 2001 From: jbtrystram Date: Fri, 31 Oct 2025 14:06:24 +0100 Subject: [PATCH] kola/multipath: wait for systemd firstboot target Although `mpath-var-lib-containers.service` is set to only run on first boot, it sometime runs twice when the system reboots too early. Sometimes, in low load CI environement, the reboot in this test happens before systemd's `first-boot-complete.target` is reached. This make `ConditionFirstBoot` to still be true at the next boot, causing the mpath service to fail, because it already ran during the actual first boot. A previous attempt[1] at fixing this improved the flake but this happened again and i noticed that systemd didn't reach this target before the reboot: `Reached target first-boot-complete.target - First Boot Complete` is only shown after the second boot in the logs. Likely fixes https://github.com/coreos/rhel-coreos-config/issues/66 [1] https://github.com/coreos/coreos-assembler/commit/abd0c18a756ffeedc39e9cb1a5d70e977bdf40f3 --- mantle/kola/tests/misc/multipath.go | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/mantle/kola/tests/misc/multipath.go b/mantle/kola/tests/misc/multipath.go index f0e69007ba..dcc00f9c3d 100644 --- a/mantle/kola/tests/misc/multipath.go +++ b/mantle/kola/tests/misc/multipath.go @@ -15,7 +15,9 @@ package misc import ( + "fmt" "strings" + "time" coreosarch "github.com/coreos/stream-metadata-go/arch" @@ -23,6 +25,7 @@ import ( "github.com/coreos/coreos-assembler/mantle/kola/register" "github.com/coreos/coreos-assembler/mantle/platform" "github.com/coreos/coreos-assembler/mantle/platform/conf" + "github.com/coreos/coreos-assembler/mantle/util" ) var ( @@ -168,6 +171,8 @@ func verifyMultipath(c cluster.TestCluster, m platform.Machine, path string) { func runMultipathDay1(c cluster.TestCluster) { m := c.Machines()[0] verifyMultipathBoot(c, m) + // wait until first-boot-complete.target is reached + waitForCompleteFirstboot(c) if err := m.Reboot(); err != nil { c.Fatalf("Failed to reboot the machine: %v", err) } @@ -188,8 +193,33 @@ func runMultipathDay2(c cluster.TestCluster) { func runMultipathPartition(c cluster.TestCluster) { m := c.Machines()[0] verifyMultipath(c, m, "/var/lib/containers") + // wait until first-boot-complete.target is reached + waitForCompleteFirstboot(c) if err := m.Reboot(); err != nil { c.Fatalf("Failed to reboot the machine: %v", err) } verifyMultipath(c, m, "/var/lib/containers") } + +func waitForCompleteFirstboot(c cluster.TestCluster) { + m := c.Machines()[0] + err := util.WaitUntilReady(2*time.Minute, 10*time.Second, func() (bool, error) { + + _, err := c.SSH(m, "sudo systemd-run --wait --quiet --property='After=first-boot-complete.target' echo 'firstboot complete'") + if err != nil { + return false, err + } + // get the actual target state to double check + firstbootTargetState, err := c.SSH(m, "systemctl is-active first-boot-complete.target") + + if err != nil { + return false, err + } else if string(firstbootTargetState) != "active" { + return false, fmt.Errorf("first-boot-complete.target state: %s.", string(firstbootTargetState)) + } + return true, nil + }) + if err != nil { + c.Fatalf("Timed out while waiting for first-boot-complete.target to be ready: %v", err) + } +}