Add read_panic_message kipc (#2313)

hawkw · mkeeter · cbiffle · web-flow · commit e7f5f92e84e2 · 2026-01-05T23:36:38.000Z
Currently, there is no way to programmatically access the panic message of a task which has faulted due to a Rust panic fron within the Hubris userspace. This branch adds a new `read_panic_message` kipc that copies the contents of a panicked task's panic message buffer into the caller. If the requested task has not panicked, this kipc returns an error indicating this. This is intended by use by supervisor implementations or other tasks which wish to report panic messages from userspace. I've also added a test case that exercises this functionality. Fixes #2311 --------- Co-authored-by: Matt Keeter <matt@oxide.computer> Co-authored-by: Cliff L. Biffle <cliff@oxide.computer>
diff --git a/doc/kipc.adoc b/doc/kipc.adoc
@@ -332,7 +332,7 @@ This interface is primarily intended for testing interrupt handling, and can
 only be called by a task running as the supervisor. When this KIPC is called,
 an actual machine interrupt is triggered for the relevant hardware interrupt
 source. The kernel then responds to the interrupt using its normal
-interrupt-handling infrastructure, which dispatches a notification to the 
+interrupt-handling infrastructure, which dispatches a notification to the
 subscribed task.
 
 This KIPC is *not* intended for use as a general-purpose inter-task signalling
@@ -402,6 +402,55 @@ while let Some(fault) = kipc::find_faulted_task(next_task) {
 }
 ----
 
+=== `read_panic_message` (10)
+
+If the task with the requested index is in the faulted state due to a panic,
+reads the contents of its panic message into the response buffer.
+
+==== Request
+
+[source,rust]
+----
+struct ReadPanicMessageRequest {
+    task_index: u32,
+}
+----
+
+==== Preconditions
+
+`task_index` must be a valid task index for this system.
+
+==== Response
+
+[source,rust]
+----
+Result<&[u8], abi::ReadPanicMessageError>
+----
+
+==== Notes
+
+If the requested task is not currently in the faulted state due to a panic,
+this KIPC returns the `abi::ReadPanicMessageError::TaskNotPanicked` response
+code.
+
+If the requested task has panicked, but the buffer containing its panic
+message is invalid, this KIPC returns the
+`abi::ReadPanicMessageError::BadPanicMessage` response code. If the target task
+uses the panic handler provided by the `userlib` crate, this should not be
+possible. However, it may occur if a task calls the `sys_panic` syscall
+directly with invalid arguments, such as a slice pointing outside the task's memory.
+
+The panic message is truncated to the length of the response buffer. Since
+Hubris only stores the first 128 bytes of panic messages, a 128-byte response
+buffer will normally not result in further truncation.
+
+The panic message is normally UTF-8. However, because of this byte-based
+truncation, and because the panic message is coming from a failing task in the
+first place, be careful not to *assume* that the panic message is valid UTF-8.
+See the
+link:++https://doc.rust-lang.org/stable/std/primitive.slice.html#method.utf8_chunks++[`<[u8]>::utf8_chunks`]
+function for one possible method of handling the panic message safely.
+
 == Receiving from the kernel
 
 The kernel never sends messages to tasks. It's simply not equipped to do so.
diff --git a/sys/abi/src/lib.rs b/sys/abi/src/lib.rs
@@ -512,6 +512,7 @@ pub enum Kipcnum {
     ReadTaskDumpRegion = 7,
     SoftwareIrq = 8,
     FindFaultedTask = 9,
+    ReadPanicMessage = 10,
 }
 
 impl core::convert::TryFrom<u16> for Kipcnum {
@@ -528,6 +529,7 @@ impl core::convert::TryFrom<u16> for Kipcnum {
             7 => Ok(Self::ReadTaskDumpRegion),
             8 => Ok(Self::SoftwareIrq),
             9 => Ok(Self::FindFaultedTask),
+            10 => Ok(Self::ReadPanicMessage),
             _ => Err(()),
         }
     }
@@ -582,3 +584,35 @@ bitflags::bitflags! {
         const CLEAR_PENDING = 1 << 1;
     }
 }
+
+/// Errors returned by [`Kipcnum::ReadPanicMessage`].
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+#[repr(u32)]
+pub enum ReadPanicMessageError {
+    /// The task in question has not panicked.
+    TaskNotPanicked = 1,
+    /// The task has panicked, but its panic message buffer is invalid, so the
+    /// kernel has not let us have it.
+    ///
+    /// In practice, this is quite unlikely, and would require the task to have
+    /// panicked with a panic message slice of a length that exceeds the end of
+    /// the address space. Panicking via the Hubris userlib will never do this.
+    /// But, since the panicked task could be any arbitrary binary...anything is
+    /// possible.
+    BadPanicBuffer = 2,
+}
+
+/// We're using an explicit `TryFrom` impl for `ReadPanicMessageError` instead of
+/// `FromPrimitive` because the kernel doesn't currently depend on `num-traits`
+/// and this seems okay.
+impl core::convert::TryFrom<u32> for ReadPanicMessageError {
+    type Error = ();
+
+    fn try_from(x: u32) -> Result<Self, Self::Error> {
+        match x {
+            1 => Ok(Self::TaskNotPanicked),
+            2 => Ok(Self::BadPanicBuffer),
+            _ => Err(()),
+        }
+    }
+}
diff --git a/sys/kern/src/kipc.rs b/sys/kern/src/kipc.rs
@@ -11,7 +11,7 @@ use unwrap_lite::UnwrapLite;
 use crate::arch;
 use crate::err::UserError;
 use crate::task::{current_id, ArchState, NextTask, Task};
-use crate::umem::USlice;
+use crate::umem::{safe_copy, USlice};
 
 /// Message dispatcher.
 pub fn handle_kernel_message(
@@ -43,6 +43,9 @@ pub fn handle_kernel_message(
         Ok(Kipcnum::FindFaultedTask) => {
             find_faulted_task(tasks, caller, args.message?, args.response?)
         }
+        Ok(Kipcnum::ReadPanicMessage) => {
+            read_panic_message(tasks, caller, args.message?, args.response?)
+        }
 
         _ => {
             // Task has sent an unknown message to the kernel. That's bad.
@@ -507,3 +510,75 @@ fn find_faulted_task(
         .set_send_response_and_length(0, response_len);
     Ok(NextTask::Same)
 }
+
+fn read_panic_message(
+    tasks: &mut [Task],
+    caller: usize,
+    message: USlice<u8>,
+    response: USlice<u8>,
+) -> Result<NextTask, UserError> {
+    let index: u32 = deserialize_message(&tasks[caller], message)?;
+    let index = index as usize;
+    let Some(task) = tasks.get(index) else {
+        return Err(UserError::Unrecoverable(FaultInfo::SyscallUsage(
+            UsageError::TaskOutOfRange,
+        )));
+    };
+
+    // Make sure the task is actually panicked.
+    let TaskState::Faulted {
+        fault: FaultInfo::Panic,
+        ..
+    } = task.state()
+    else {
+        return Err(UserError::Recoverable(
+            abi::ReadPanicMessageError::TaskNotPanicked as u32,
+            NextTask::Same,
+        ));
+    };
+
+    let Ok(message) = task.save().as_panic_args().message else {
+        // There's really only one reason that `as_panic_args().message` would
+        // be an error. Because it's just a `USlice<u8>`, it can't be
+        // misaligned, so the only possible invalid slice here is one whose
+        // length exceeds the size of the address space, so that `base + len`
+        // would overflow.
+        //
+        // But, we shouldn't fault the *caller* over that; they didn't do it!
+        return Err(UserError::Recoverable(
+            abi::ReadPanicMessageError::BadPanicBuffer as u32,
+            NextTask::Same,
+        ));
+    };
+
+    // Note that if the panic was recorded by `userlib`'s panic handler, it will
+    // never exceed 128 bytes in length, and if the caller requested this kipc
+    // using the `userlib::ipc::read_panic_message()` wrapper, then the caller's
+    // buffer will always be exactly 128 bytes long. However, we can't rely on
+    // that here, as either task *could* be an arbitrary binary that wasn't
+    // compiled with the Hubris userlib, so we need to be safe regardless.
+    match safe_copy(tasks, index, message, caller, response) {
+        Ok(len) => {
+            // Ladies and gentlemen...we got him!
+            tasks[caller]
+                .save_mut()
+                .set_send_response_and_length(0, len);
+
+            Ok(NextTask::Same)
+        }
+        Err(crate::err::InteractFault {
+            dst: Some(fault), ..
+        }) => {
+            // If the caller's buffer was invalid, they take a fault.
+            Err(UserError::Unrecoverable(fault))
+        }
+        Err(_) => {
+            // Source region was bad, but it's not the caller's fault; give them
+            // a recoverable error.
+            Err(UserError::Recoverable(
+                abi::ReadPanicMessageError::BadPanicBuffer as u32,
+                NextTask::Same,
+            ))
+        }
+    }
+}
diff --git a/sys/userlib/src/kipc.rs b/sys/userlib/src/kipc.rs
@@ -15,11 +15,12 @@
 //! wastes flash space in the supervisor.
 
 use core::num::NonZeroUsize;
+use core::str::Utf8Chunks;
 
-use abi::{Kipcnum, TaskId};
+use abi::{Kipcnum, ReadPanicMessageError, TaskId};
 use zerocopy::IntoBytes;
 
-use crate::{sys_send, UnwrapLite};
+use crate::{sys_send, UnwrapLite, PANIC_MESSAGE_MAX_LEN};
 
 pub fn read_task_status(task: usize) -> abi::TaskState {
     // Coerce `task` to a known size (Rust doesn't assume that usize == u32)
@@ -162,3 +163,59 @@ pub fn software_irq(task: usize, mask: u32) {
         &[],
     );
 }
+
+/// Reads a task's panic message into the provided `buf`, if the task is
+/// panicked.
+///
+/// Note that Hubris normally only preserves the first
+/// [`PANIC_MESSAGE_MAX_LEN`] bytes of a task's panic message, and panic
+/// messages greater than that length are truncated. Thus, this function
+/// accepts a buffer of that length.
+///
+/// # Returns
+///
+/// - [`Ok`]`([`Utf8Chunks`])` if the task is panicked.
+///
+///   The returned [`Utf8Chunks`] is an iterator returning a
+///   [`core::str::Utf8Chunk`] for each contiguous chunk of valid or invalid
+///   UTF-8 bytes in the panicked task's panic message buffer. This is due to
+///   the truncation of panic messages to [`PANIC_MESSAGE_MAX_LEN`] bytes,
+///   which may occur inside of a code point. If the panic message is truncated
+///   within a code point, there will be an invalid byte sequence at the end of
+///   the buffer, and the `Utf8Chunks` iterator allows the caller to select
+///   only the valid Unicode portion of the message. Provided that the panicked
+///   task panicked using the Hubris userlib's panic handler, the iterator will
+///   contain a single valid UTF-8 chunk, which may be followed by up to one
+///   invalid chunk. However, the task may potentially have called the panic
+///   syscall through other means, and therefore, there may be multiple valid
+///   chunks interspersed with invalid bytes.
+///
+///   The total byte length of all chunks in the iterator will be at most
+///   [`PANIC_MESSAGE_MAX_LEN`] bytes. Note that the iterator may contain only
+///   a single zero-length chunk, if the task has panicked but was compiled
+///   without panic messages enabled.
+/// - [`Err`]`(`[`ReadPanicMessageError::TaskNotPanicked`]`)` if the task is
+///   not currently faulted due to a panic.
+/// - [`Err`]`(`[`ReadPanicMessageError::BadPanicMessage`]`)` if the task has
+///   panicked but the panic message buffer is invalid to read from.
+pub fn read_panic_message(
+    task: usize,
+    buf: &mut [u8; PANIC_MESSAGE_MAX_LEN],
+) -> Result<Utf8Chunks<'_>, ReadPanicMessageError> {
+    let task = task as u32;
+    let (rc, len) = sys_send(
+        TaskId::KERNEL,
+        Kipcnum::ReadPanicMessage as u16,
+        task.as_bytes(),
+        &mut buf[..],
+        &[],
+    );
+
+    if rc == 0 {
+        Ok(buf[..len].utf8_chunks())
+    } else {
+        // If the kernel sent us an unknown response code....i dunno, guess
+        // i'll die?
+        Err(ReadPanicMessageError::try_from(rc).unwrap_lite())
+    }
+}
diff --git a/sys/userlib/src/lib.rs b/sys/userlib/src/lib.rs
@@ -1402,9 +1402,19 @@ compile_error!(
      this check in userlib.)"
 );
 
+/// Maximum length (in bytes) of the panic message string captured when the
+/// `panic-messages` feature is enabled. Panics which format messages longer
+/// than this many bytes are truncated to this length.
+///
+/// There's a tradeoff here between "getting a useful message" and "wasting a
+/// lot of RAM." Somewhat arbitrarily, we choose to collect this many bytes
+/// of panic message (and permanently reserve the same number of bytes of
+/// RAM):
+pub const PANIC_MESSAGE_MAX_LEN: usize = 128;
+
 /// Panic handler for user tasks with the `panic-messages` feature enabled. This
 /// handler will try its best to generate a panic message, up to a maximum
-/// buffer size (configured below).
+/// of [`PANIC_MESSAGE_MAX_LEN`] bytes.
 ///
 /// Including this panic handler permanently reserves a buffer in the RAM of a
 /// task, to ensure that memory is available for the panic message, even if the
@@ -1423,13 +1433,7 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
     //
     // There is unfortunately no way to have the compiler _check_ that the code
     // does not panic, so we have to work very carefully.
-
-    // There's a tradeoff here between "getting a useful message" and "wasting a
-    // lot of RAM." Somewhat arbitrarily, we choose to collect this many bytes
-    // of panic message (and permanently reserve the same number of bytes of
-    // RAM):
-    const BUFSIZE: usize = 128;
-
+    //
     // Panic messages get constructed using `core::fmt::Write`. If we implement
     // that trait, we can provide our own type that will back the
     // `core::fmt::Formatter` handed into any formatting routines (like those on
@@ -1444,7 +1448,7 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
         /// Content will be written here. While the content itself will be
         /// UTF-8, it may end in an incomplete UTF-8 character to simplify our
         /// truncation logic.
-        buf: &'static mut [u8; BUFSIZE],
+        buf: &'static mut [u8; PANIC_MESSAGE_MAX_LEN],
         /// Number of bytes of `buf` that are valid.
         ///
         /// Invariant: always in the range `0..buf.len()`.
@@ -1520,7 +1524,8 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
 
     // We declare a single static panic buffer per task, to ensure the memory is
     // available.
-    static mut PANIC_BUFFER: [u8; BUFSIZE] = [0; BUFSIZE];
+    static mut PANIC_BUFFER: [u8; PANIC_MESSAGE_MAX_LEN] =
+        [0; PANIC_MESSAGE_MAX_LEN];
 
     // Okay. Now we start the actual panicking process.
     //
diff --git a/test/test-suite/src/main.rs b/test/test-suite/src/main.rs
@@ -134,6 +134,7 @@ test_cases! {
     test_irq_status,
     #[cfg(feature = "fru-id-eeprom")]
     at24csw080::test_at24csw080,
+    test_read_panic_message,
 }
 
 /// Tests that we can send a message to our assistant, and that the assistant
@@ -1553,6 +1554,40 @@ fn test_irq_status() {
     assert_eq!(status, expected_status);
 }
 
+/// Tests that when a task panics, its panic message can be read via the `read_panic_message` kipc.
+fn test_read_panic_message() {
+    set_autorestart(false);
+
+    let mut buf = [0u8; userlib::PANIC_MESSAGE_MAX_LEN];
+
+    match kipc::read_panic_message(ASSIST.get_task_index().into(), &mut buf) {
+        Err(userlib::ReadPanicMessageError::TaskNotPanicked) => {}
+        x => panic!("expected `Err(TaskNotPanicked)`, got: {x:?}"),
+    }
+
+    // Ask the assistant to panic.
+    let assist = assist_task_id();
+    let mut response = 0u32;
+    let (_, _) = userlib::sys_send(
+        assist,
+        AssistOp::Panic as u16,
+        &0u32.to_le_bytes(),
+        response.as_mut_bytes(),
+        &[],
+    );
+
+    let mut msg_chunks =
+        kipc::read_panic_message(ASSIST.get_task_index().into(), &mut buf)
+            .unwrap();
+    // it should look kinda like a panic message (but since the line number may
+    // change, don't make assertions about the entire contents of the string...
+    let msg = msg_chunks
+        .next()
+        .expect("Utf8Chunks always has at least one chunk")
+        .valid();
+    assert!(msg.starts_with("panicked at"));
+}
+
 /// Asks the test runner (running as supervisor) to please trigger a software
 /// interrupt for `notifications::TEST_IRQ`, thank you.
 #[track_caller]