From f836cc4ad81b8d1db2f7cdb66a4832c8cb24e6bc Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Fri, 14 Nov 2025 14:11:43 -0500 Subject: [PATCH 1/3] mantle/kola: swallow stderr from mkfs.ext4 We don't need the message about creating the filesystem to show up in our kola logs. --- mantle/kola/tests/ignition/qemufailure.go | 1 - 1 file changed, 1 deletion(-) diff --git a/mantle/kola/tests/ignition/qemufailure.go b/mantle/kola/tests/ignition/qemufailure.go index 4bc9441bc6..70922ac668 100644 --- a/mantle/kola/tests/ignition/qemufailure.go +++ b/mantle/kola/tests/ignition/qemufailure.go @@ -207,7 +207,6 @@ func dualBootfsFailure(c cluster.TestCluster) error { } cmd := exec.Command("mkfs.ext4", "-L", "boot", fakeBootFile.Name()) - cmd.Stderr = os.Stderr if err := cmd.Run(); err != nil { c.Fatal(err) } From b1efc61025f6f9ec18134a78abdeeab01e9ffedd Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Fri, 14 Nov 2025 14:13:09 -0500 Subject: [PATCH 2/3] mantle/kola: check console log for error even in timeout There is a race condition that occurs for /dev/virtio-ports/com.coreos.ignition.journal and it's hard to track down. In this case we are just detecting a failure anyway. If we can find the original search failure term after the timeout happens let's consider that success too. See https://github.com/coreos/fedora-coreos-tracker/issues/2019 for more context. --- mantle/kola/tests/ignition/qemufailure.go | 54 +++++++++++++++-------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/mantle/kola/tests/ignition/qemufailure.go b/mantle/kola/tests/ignition/qemufailure.go index 70922ac668..c5bb13b3d0 100644 --- a/mantle/kola/tests/ignition/qemufailure.go +++ b/mantle/kola/tests/ignition/qemufailure.go @@ -31,6 +31,11 @@ import ( "github.com/coreos/coreos-assembler/mantle/platform/conf" "github.com/coreos/coreos-assembler/mantle/util" "github.com/coreos/ignition/v2/config/v3_2/types" + "github.com/coreos/pkg/capnslog" +) + +var ( + plog = capnslog.NewPackageLogger("github.com/coreos/coreos-assembler/mantle", "kola/tests/ignition/qemufailure") ) func init() { @@ -105,23 +110,25 @@ func verifyError(builder *platform.QemuBuilder, searchPattern string) error { defer cancel() + checkConsole := func(path string, searchPattern string) error { + // Expected initramfs failure, checking the console file to ensure + // that it failed the expected way + found, err := fileContainsPattern(path, searchPattern) + if err != nil { + return errors.Wrapf(err, "looking for pattern '%s' in file '%s' failed", searchPattern, path) + } else if !found { + return fmt.Errorf("pattern '%s' in file '%s' not found", searchPattern, path) + } + return nil + } + errchan := make(chan error) go func() { resultingError := inst.WaitAll(ctx) if resultingError == nil { resultingError = fmt.Errorf("ignition unexpectedly succeeded") } else if resultingError == platform.ErrInitramfsEmergency { - // Expected initramfs failure, checking the console file to ensure - // that it failed the expected way - found, err := fileContainsPattern(builder.ConsoleFile, searchPattern) - if err != nil { - resultingError = errors.Wrapf(err, "looking for pattern '%s' in file '%s' failed", searchPattern, builder.ConsoleFile) - } else if !found { - resultingError = fmt.Errorf("pattern '%s' in file '%s' not found", searchPattern, builder.ConsoleFile) - } else { - // The expected case - resultingError = nil - } + resultingError = checkConsole(builder.ConsoleFile, searchPattern) } else { resultingError = errors.Wrapf(resultingError, "expected initramfs emergency.target error") } @@ -133,17 +140,28 @@ func verifyError(builder *platform.QemuBuilder, searchPattern string) error { if err := inst.Kill(); err != nil { return errors.Wrapf(err, "failed to kill the vm instance") } - // If somehow the journal dumping failed let's flag that. We - // just ignore errors here. This effort is only trying to help + // The journal dumping can fail if the `com.coreos.ignition.journal` + // device doesn't show up for some reason (race condition) [1]. + // Let's try to detect that case and check the console anyway for + // the original failure we were looking for. + // + // We just ignore errors here. This effort is only trying to help // be more informative about why things failed. This "string" // we are searching for comes from ignition-virtio-dump-journal - searchPattern = "Didn't find virtio port /dev/virtio-ports/com.coreos.ignition.journal" - found, _ := fileContainsPattern(builder.ConsoleFile, searchPattern) + // [1] https://github.com/coreos/fedora-coreos-tracker/issues/2019 + virtioPortSearchPattern := "Didn't find virtio port /dev/virtio-ports/com.coreos.ignition.journal" + found, _ := fileContainsPattern(builder.ConsoleFile, virtioPortSearchPattern) if found { - return errors.Wrapf(ctx.Err(), "Journal dumping during emergency.target failed") - } else { - return errors.Wrapf(ctx.Err(), "timed out waiting for initramfs error") + plog.Warning("Journal dumping during emergency.target failed. Continuing best effort.") + // Check the log even though there was a timeout + if err := checkConsole(builder.ConsoleFile, searchPattern); err == nil { + // Even though there was a timeout we found the string + // we wanted in the console log anyway. Let's just + // take the win. + return nil + } } + return errors.Wrapf(ctx.Err(), "timed out waiting for initramfs error") case err := <-errchan: if err != nil { return err From 7ab70570bf6993b32cda2a7ed6fbc8f28729d562 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Fri, 14 Nov 2025 14:06:27 -0500 Subject: [PATCH 3/3] mantle/kola: decrease timeout on failure tests All of these tests run within 15 seconds today. Let's drop the timeout to one minute, which will enable to detect failures quicker and also allow us to fallback to the new "best effort" console check in the timeout path quicker. --- mantle/kola/tests/ignition/qemufailure.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mantle/kola/tests/ignition/qemufailure.go b/mantle/kola/tests/ignition/qemufailure.go index c5bb13b3d0..ab3a66f924 100644 --- a/mantle/kola/tests/ignition/qemufailure.go +++ b/mantle/kola/tests/ignition/qemufailure.go @@ -106,7 +106,7 @@ func verifyError(builder *platform.QemuBuilder, searchPattern string) error { return err } defer inst.Destroy() - ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) defer cancel()