diff --git a/src/symbolize/gimli/parse_running_mmaps_unix.rs b/src/symbolize/gimli/parse_running_mmaps_unix.rs index 5803d5dc..b834ca7d 100644 --- a/src/symbolize/gimli/parse_running_mmaps_unix.rs +++ b/src/symbolize/gimli/parse_running_mmaps_unix.rs @@ -13,20 +13,8 @@ use core::str::FromStr; pub(super) struct MapsEntry { /// start (inclusive) and limit (exclusive) of address range. address: (usize, usize), - /// The perms field are the permissions for the entry - /// - /// r = read - /// w = write - /// x = execute - /// s = shared - /// p = private (copy on write) - perms: [char; 4], /// Offset into the file (or "whatever"). offset: u64, - /// device (major, minor) - dev: (usize, usize), - /// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS). - inode: usize, /// Usually the file backing the mapping. /// /// Note: The man page for proc includes a note about "coordination" by @@ -56,15 +44,25 @@ pub(super) struct MapsEntry { } pub(super) fn parse_maps() -> Result, &'static str> { - let mut v = Vec::new(); let mut proc_self_maps = File::open("/proc/self/maps").map_err(|_| "Couldn't open /proc/self/maps")?; let mut buf = String::new(); let _bytes_read = proc_self_maps .read_to_string(&mut buf) .map_err(|_| "Couldn't read /proc/self/maps")?; - for line in buf.lines() { + + let mut v = Vec::new(); + let mut buf = buf.as_str(); + while let Some(match_idx) = buf.bytes().position(|b| b == b'\n') { + // Unsafe is unfortunately necessary to get the bounds check removed (for code size). + + // SAFETY: match_idx is the position of the newline, so it must be valid. + let line = unsafe { buf.get_unchecked(..match_idx) }; + v.push(line.parse()?); + + // SAFETY: match_idx is the position of the newline, so the byte after it must be valid. + buf = unsafe { buf.get_unchecked((match_idx + 1)..) }; } Ok(v) @@ -92,70 +90,88 @@ impl FromStr for MapsEntry { // e.g.: "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]" // e.g.: "7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795 /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" // e.g.: "35b1a21000-35b1a22000 rw-p 00000000 00:00 0" - // - // Note that paths may contain spaces, so we can't use `str::split` for parsing (until - // Split::remainder is stabilized #77998). fn from_str(s: &str) -> Result { - let (range_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); + // While there are nicer standard library APIs available for this, we aim for minimal code size. + + let mut state = s; + + fn parse_start<'a>(state: &mut &'a str) -> &'a str { + // Unsafe is unfortunately necessary to get the bounds check removed (for code size). + + let start_idx = state.bytes().position(|b| b != b' '); + if let Some(start_idx) = start_idx { + // SAFETY: It comes from position, so it's in bounds. + // It must be on a UTF-8 boundary as it's the first byte that isn't ' '. + *state = unsafe { state.get_unchecked(start_idx..) }; + } + let match_idx = state.bytes().position(|b| b == b' '); + match match_idx { + None => { + let result = *state; + *state = ""; + result + } + Some(match_idx) => { + // SAFETY: match_index comes from .bytes().position() of an ASCII character, + // so it's both in bounds and a UTF-8 boundary + let result = unsafe { state.get_unchecked(..match_idx) }; + // SAFETY: Since match_idx is the ' ', there must be at least the end after it. + *state = unsafe { state.get_unchecked((match_idx + 1)..) }; + result + } + } + } + + fn error(msg: &str) -> &str { + if cfg!(debug_assertions) { + msg + } else { + "invalid map entry" + } + } + + let range_str = parse_start(&mut state); if range_str.is_empty() { - return Err("Couldn't find address"); + return Err(error("Couldn't find address")); } - let (perms_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); + let perms_str = parse_start(&mut state); if perms_str.is_empty() { - return Err("Couldn't find permissions"); + return Err(error("Couldn't find permissions")); } - let (offset_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); + let offset_str = parse_start(&mut state); if offset_str.is_empty() { - return Err("Couldn't find offset"); + return Err(error("Couldn't find offset")); } - let (dev_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); + let dev_str = parse_start(&mut state); if dev_str.is_empty() { - return Err("Couldn't find dev"); + return Err(error("Couldn't find dev")); } - let (inode_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); + let inode_str = parse_start(&mut state); if inode_str.is_empty() { - return Err("Couldn't find inode"); + return Err(error("Couldn't find inode")); } // Pathname may be omitted in which case it will be empty - let pathname_str = s.trim_start(); + let pathname_str = state.trim_ascii_start(); - let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number"); - let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number"); + let hex = |s| usize::from_str_radix(s, 16).map_err(|_| error("Couldn't parse hex number")); + let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| error("Couldn't parse hex number")); let address = if let Some((start, limit)) = range_str.split_once('-') { (hex(start)?, hex(limit)?) } else { - return Err("Couldn't parse address range"); - }; - let perms: [char; 4] = { - let mut chars = perms_str.chars(); - let mut c = || chars.next().ok_or("insufficient perms"); - let perms = [c()?, c()?, c()?, c()?]; - if chars.next().is_some() { - return Err("too many perms"); - } - perms + return Err(error("Couldn't parse address range")); }; let offset = hex64(offset_str)?; - let dev = if let Some((major, minor)) = dev_str.split_once(':') { - (hex(major)?, hex(minor)?) - } else { - return Err("Couldn't parse dev"); - }; - let inode = hex(inode_str)?; let pathname = pathname_str.into(); Ok(MapsEntry { address, - perms, offset, - dev, - inode, pathname, }) } @@ -172,10 +188,7 @@ fn check_maps_entry_parsing_64bit() { .unwrap(), MapsEntry { address: (0xffffffffff600000, 0xffffffffff601000), - perms: ['-', '-', 'x', 'p'], offset: 0x00000000, - dev: (0x00, 0x00), - inode: 0x0, pathname: "[vsyscall]".into(), } ); @@ -187,10 +200,7 @@ fn check_maps_entry_parsing_64bit() { .unwrap(), MapsEntry { address: (0x7f5985f46000, 0x7f5985f48000), - perms: ['r', 'w', '-', 'p'], offset: 0x00039000, - dev: (0x103, 0x06), - inode: 0x76021795, pathname: "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2".into(), } ); @@ -200,10 +210,7 @@ fn check_maps_entry_parsing_64bit() { .unwrap(), MapsEntry { address: (0x35b1a21000, 0x35b1a22000), - perms: ['r', 'w', '-', 'p'], offset: 0x00000000, - dev: (0x00, 0x00), - inode: 0x0, pathname: Default::default(), } ); @@ -224,10 +231,7 @@ fn check_maps_entry_parsing_32bit() { .unwrap(), MapsEntry { address: (0x08056000, 0x08077000), - perms: ['r', 'w', '-', 'p'], offset: 0x00000000, - dev: (0x00, 0x00), - inode: 0x0, pathname: "[heap]".into(), } ); @@ -239,10 +243,7 @@ fn check_maps_entry_parsing_32bit() { .unwrap(), MapsEntry { address: (0xb7c79000, 0xb7e02000), - perms: ['r', '-', '-', 'p'], offset: 0x00000000, - dev: (0x08, 0x01), - inode: 0x60662705, pathname: "/usr/lib/locale/locale-archive".into(), } ); @@ -252,10 +253,7 @@ fn check_maps_entry_parsing_32bit() { .unwrap(), MapsEntry { address: (0xb7e02000, 0xb7e03000), - perms: ['r', 'w', '-', 'p'], offset: 0x00000000, - dev: (0x00, 0x00), - inode: 0x0, pathname: Default::default(), } ); @@ -266,10 +264,7 @@ fn check_maps_entry_parsing_32bit() { .unwrap(), MapsEntry { address: (0xb7c79000, 0xb7e02000), - perms: ['r', '-', '-', 'p'], offset: 0x00000000, - dev: (0x08, 0x01), - inode: 0x60662705, pathname: "/executable/path/with some spaces".into(), } ); @@ -280,10 +275,7 @@ fn check_maps_entry_parsing_32bit() { .unwrap(), MapsEntry { address: (0xb7c79000, 0xb7e02000), - perms: ['r', '-', '-', 'p'], offset: 0x00000000, - dev: (0x08, 0x01), - inode: 0x60662705, pathname: "/executable/path/with multiple-continuous spaces ".into(), } ); @@ -294,10 +286,7 @@ fn check_maps_entry_parsing_32bit() { .unwrap(), MapsEntry { address: (0xb7c79000, 0xb7e02000), - perms: ['r', '-', '-', 'p'], offset: 0x00000000, - dev: (0x08, 0x01), - inode: 0x60662705, pathname: "/executable/path/starts-with-spaces".into(), } );