Skip to content

Commit ac2d0c1

Browse files
committed
crashdump: create core dump file when a guest crashes
- the core dump file is an ELF file with special segments that describe the guest's memory when it crashed, the CPU register's values and other special notes that tell the debugger how to set up a debugging session starting from the core dump Signed-off-by: Doru Blânzeanu <[email protected]>
1 parent c454b0b commit ac2d0c1

File tree

8 files changed

+460
-33
lines changed

8 files changed

+460
-33
lines changed

Cargo.lock

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/hyperlight_host/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ tempfile = { version = "3.20", optional = true }
4242
anyhow = "1.0"
4343
metrics = "0.24.2"
4444
serde_json = "1.0"
45+
elfcore = { git = "https://github.com/dblnz/elfcore.git", branch = "split-linux-impl-from-elfcore" }
4546

4647
[target.'cfg(windows)'.dependencies]
4748
windows = { version = "0.61", features = [

src/hyperlight_host/src/hypervisor/crashdump.rs

Lines changed: 239 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,47 +14,260 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
use std::io::Write;
17+
use std::cmp::min;
1818

19+
use elfcore::{
20+
ArchComponentState, ArchState, CoreDumpBuilder, CoreError, Elf64_Auxv, ProcessInfoSource,
21+
ReadProcessMemory, ThreadView, VaProtection, VaRegion,
22+
};
1923
use tempfile::NamedTempFile;
2024

2125
use super::Hypervisor;
26+
use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags};
2227
use crate::{new_error, Result};
2328

24-
/// Dump registers + memory regions + raw memory to a tempfile
25-
#[cfg(crashdump)]
26-
pub(crate) fn crashdump_to_tempfile(hv: &dyn Hypervisor) -> Result<()> {
27-
let mut temp_file = NamedTempFile::with_prefix("mem")?;
28-
let hv_details = format!("{:#x?}", hv);
29+
const NT_X86_XSTATE: u32 = 0x202;
30+
const AT_ENTRY: u64 = 9;
31+
const AT_NULL: u64 = 0;
32+
33+
/// Structure to hold the crash dump context
34+
/// This structure contains the information needed to create a core dump
35+
#[derive(Debug)]
36+
pub(crate) struct CrashDumpContext<'a> {
37+
regions: &'a [MemoryRegion],
38+
regs: [u64; 27],
39+
xsave: Vec<u8>,
40+
entry: u64,
41+
}
42+
43+
impl<'a> CrashDumpContext<'a> {
44+
pub(crate) fn new(
45+
regions: &'a [MemoryRegion],
46+
regs: [u64; 27],
47+
xsave: Vec<u8>,
48+
entry: u64,
49+
) -> Self {
50+
Self {
51+
regions,
52+
regs,
53+
xsave,
54+
entry,
55+
}
56+
}
57+
}
58+
59+
/// Structure that contains the process information for the core dump
60+
/// This serves as a source of information for `elfcore`'s [`CoreDumpBuilder`]
61+
struct GuestView {
62+
regions: Vec<VaRegion>,
63+
threads: Vec<ThreadView>,
64+
aux_vector: Vec<elfcore::Elf64_Auxv>,
65+
}
2966

30-
// write hypervisor details such as registers, info about mapped memory regions, etc.
31-
temp_file.write_all(hv_details.as_bytes())?;
32-
temp_file.write_all(b"================ MEMORY DUMP =================\n")?;
67+
impl GuestView {
68+
fn new(ctx: &CrashDumpContext) -> Self {
69+
// Map the regions to the format `CoreDumpBuilder` expects
70+
let regions = ctx
71+
.regions
72+
.iter()
73+
.filter(|r| !r.host_region.is_empty())
74+
.map(|r| VaRegion {
75+
begin: r.guest_region.start as u64,
76+
end: r.guest_region.end as u64,
77+
offset: r.host_region.start as u64,
78+
protection: VaProtection {
79+
is_private: false,
80+
read: r.flags.contains(MemoryRegionFlags::READ),
81+
write: r.flags.contains(MemoryRegionFlags::WRITE),
82+
execute: r.flags.contains(MemoryRegionFlags::EXECUTE),
83+
},
84+
mapped_file_name: None,
85+
})
86+
.collect();
3387

34-
// write the raw memory dump for each memory region
35-
for region in hv.get_memory_regions() {
36-
if region.host_region.start == 0 || region.host_region.is_empty() {
37-
continue;
88+
// The xsave state is checked as it can be empty
89+
let mut components = vec![];
90+
if !ctx.xsave.is_empty() {
91+
components.push(ArchComponentState {
92+
name: "XSAVE",
93+
note_type: NT_X86_XSTATE,
94+
note_name: b"LINUX",
95+
data: ctx.xsave.clone(),
96+
});
3897
}
39-
// SAFETY: we got this memory region from the hypervisor so should never be invalid
40-
let region_slice = unsafe {
41-
std::slice::from_raw_parts(
42-
region.host_region.start as *const u8,
43-
region.host_region.len(),
44-
)
98+
99+
// Create the thread view
100+
// The thread view contains the information about the thread
101+
// NOTE: Some of these fields are not used in the current implementation
102+
let thread = ThreadView {
103+
flags: 0, // Kernel flags for the process
104+
tid: 1,
105+
uid: 0, // User ID
106+
gid: 0, // Group ID
107+
comm: "\0".to_string(),
108+
ppid: 0, // Parent PID
109+
pgrp: 0, // Process group ID
110+
nice: 0, // Nice value
111+
state: 0, // Process state
112+
utime: 0, // User time
113+
stime: 0, // System time
114+
cutime: 0, // Children User time
115+
cstime: 0, // Children User time
116+
cursig: 0, // Current signal
117+
session: 0, // Session ID of the process
118+
sighold: 0, // Blocked signal
119+
sigpend: 0, // Pending signal
120+
cmd_line: "\0".to_string(),
121+
122+
arch_state: Box::new(ArchState {
123+
gpr_state: ctx.regs.to_vec(),
124+
components,
125+
}),
45126
};
46-
temp_file.write_all(region_slice)?;
127+
128+
// Create the auxv vector
129+
// The first entry is AT_ENTRY, which is the entry point of the program
130+
// The entry point is the address where the program starts executing
131+
// This helps the debugger to know that the entry is changed by an offset
132+
// so the symbols can be loaded correctly.
133+
// The second entry is AT_NULL, which marks the end of the vector
134+
let auxv = vec![
135+
Elf64_Auxv {
136+
a_type: AT_ENTRY,
137+
a_val: ctx.entry,
138+
},
139+
Elf64_Auxv {
140+
a_type: AT_NULL,
141+
a_val: 0,
142+
},
143+
];
144+
145+
Self {
146+
regions,
147+
threads: vec![thread],
148+
aux_vector: auxv,
149+
}
150+
}
151+
}
152+
153+
impl ProcessInfoSource for GuestView {
154+
fn pid(&self) -> i32 {
155+
1
156+
}
157+
fn threads(&self) -> &[elfcore::ThreadView] {
158+
&self.threads
47159
}
48-
temp_file.flush()?;
160+
fn page_size(&self) -> usize {
161+
0x1000
162+
}
163+
fn aux_vector(&self) -> Option<&[elfcore::Elf64_Auxv]> {
164+
Some(&self.aux_vector)
165+
}
166+
fn va_regions(&self) -> &[elfcore::VaRegion] {
167+
&self.regions
168+
}
169+
fn mapped_files(&self) -> Option<&[elfcore::MappedFile]> {
170+
None
171+
}
172+
}
49173

50-
// persist the tempfile to disk
51-
let persist_path = temp_file.path().with_extension("dmp");
174+
/// Structure that reads the guest memory
175+
/// This structure serves as a custom memory reader for `elfcore`'s
176+
/// [`CoreDumpBuilder`]
177+
struct GuestMemReader {
178+
regions: Vec<MemoryRegion>,
179+
}
180+
181+
impl GuestMemReader {
182+
fn new(ctx: &CrashDumpContext) -> Self {
183+
Self {
184+
regions: ctx.regions.to_vec(),
185+
}
186+
}
187+
}
188+
189+
impl ReadProcessMemory for GuestMemReader {
190+
fn read_process_memory(
191+
&mut self,
192+
base: usize,
193+
buf: &mut [u8],
194+
) -> std::result::Result<usize, CoreError> {
195+
for r in self.regions.iter() {
196+
// Check if the base address is within the guest region
197+
if base >= r.guest_region.start && base < r.guest_region.end {
198+
let offset = base - r.guest_region.start;
199+
let region_slice = unsafe {
200+
std::slice::from_raw_parts(
201+
r.host_region.start as *const u8,
202+
r.host_region.len(),
203+
)
204+
};
205+
206+
// Calculate how much we can copy
207+
let copy_size = min(buf.len(), region_slice.len() - offset);
208+
if copy_size == 0 {
209+
return std::result::Result::Ok(0);
210+
}
211+
212+
// Only copy the amount that fits in both buffers
213+
buf[..copy_size].copy_from_slice(&region_slice[offset..offset + copy_size]);
214+
215+
// Return the number of bytes copied
216+
return std::result::Result::Ok(copy_size);
217+
}
218+
}
219+
220+
// If we reach here, we didn't find a matching region
221+
std::result::Result::Ok(0)
222+
}
223+
}
224+
225+
/// Create core dump file from the hypervisor information
226+
///
227+
/// This function generates an ELF core dump file capturing the hypervisor's state,
228+
/// which can be used for debugging when crashes occur. The file is created in the
229+
/// system's temporary directory with extension '.elf' and the path is printed to stdout and logs.
230+
///
231+
/// # Arguments
232+
/// * `hv`: Reference to the hypervisor implementation
233+
///
234+
/// # Returns
235+
/// * `Result<()>`: Success or error
236+
pub(crate) fn crashdump_to_tempfile(hv: &dyn Hypervisor) -> Result<()> {
237+
log::info!("Creating core dump file...");
238+
239+
// Create a temporary file with a recognizable prefix
240+
let temp_file = NamedTempFile::with_prefix("hl_core_")
241+
.map_err(|e| new_error!("Failed to create temporary file: {:?}", e))?;
242+
243+
// Get crash context from hypervisor
244+
let ctx = hv
245+
.crashdump_context()
246+
.map_err(|e| new_error!("Failed to get crashdump context: {:?}", e))?;
247+
248+
// Set up data sources for the core dump
249+
let guest_view = GuestView::new(&ctx);
250+
let memory_reader = GuestMemReader::new(&ctx);
251+
252+
// Create and write core dump
253+
let core_builder = CoreDumpBuilder::from_source(
254+
Box::new(guest_view) as Box<dyn ProcessInfoSource>,
255+
Box::new(memory_reader) as Box<dyn ReadProcessMemory>,
256+
);
257+
258+
core_builder
259+
.write(&temp_file)
260+
.map_err(|e| new_error!("Failed to write core dump: {:?}", e))?;
261+
262+
let persist_path = temp_file.path().with_extension("elf");
52263
temp_file
53264
.persist(&persist_path)
54-
.map_err(|e| new_error!("Failed to persist crashdump file: {:?}", e))?;
265+
.map_err(|e| new_error!("Failed to persist core dump file: {:?}", e))?;
266+
267+
let path_string = persist_path.to_string_lossy().to_string();
55268

56-
println!("Memory dumped to file: {:?}", persist_path);
57-
log::error!("Memory dumped to file: {:?}", persist_path);
269+
println!("Core dump created successfully: {}", path_string);
270+
log::error!("Core dump file: {}", path_string);
58271

59272
Ok(())
60273
}

0 commit comments

Comments
 (0)