Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 65 additions & 76 deletions src/symbolize/gimli/parse_running_mmaps_unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,8 @@ use core::str::FromStr;
pub(super) struct MapsEntry {
/// start (inclusive) and limit (exclusive) of address range.
address: (usize, usize),
/// The perms field are the permissions for the entry
///
/// r = read
/// w = write
/// x = execute
/// s = shared
/// p = private (copy on write)
perms: [char; 4],
/// Offset into the file (or "whatever").
offset: u64,
/// device (major, minor)
dev: (usize, usize),
/// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS).
inode: usize,
/// Usually the file backing the mapping.
///
/// Note: The man page for proc includes a note about "coordination" by
Expand Down Expand Up @@ -56,15 +44,25 @@ pub(super) struct MapsEntry {
}

pub(super) fn parse_maps() -> Result<Vec<MapsEntry>, &'static str> {
let mut v = Vec::new();
let mut proc_self_maps =
File::open("/proc/self/maps").map_err(|_| "Couldn't open /proc/self/maps")?;
let mut buf = String::new();
let _bytes_read = proc_self_maps
.read_to_string(&mut buf)
.map_err(|_| "Couldn't read /proc/self/maps")?;
for line in buf.lines() {

let mut v = Vec::new();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kinda insane that this matters that much

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's only a tiny improvement, so that much is a bit of an overstatement :). actually i don't have numbers on this one, but I did see the call to drop_in_place before but not after.

let mut buf = buf.as_str();
while let Some(match_idx) = buf.bytes().position(|b| b == b'\n') {
// Unsafe is unfortunately necessary to get the bounds check removed (for code size).

// SAFETY: match_idx is the position of the newline, so it must be valid.
let line = unsafe { buf.get_unchecked(..match_idx) };

v.push(line.parse()?);

// SAFETY: match_idx is the position of the newline, so the byte after it must be valid.
buf = unsafe { buf.get_unchecked((match_idx + 1)..) };
}

Ok(v)
Expand Down Expand Up @@ -92,70 +90,88 @@ impl FromStr for MapsEntry {
// e.g.: "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]"
// e.g.: "7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795 /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2"
// e.g.: "35b1a21000-35b1a22000 rw-p 00000000 00:00 0"
//
// Note that paths may contain spaces, so we can't use `str::split` for parsing (until
// Split::remainder is stabilized #77998).
fn from_str(s: &str) -> Result<Self, Self::Err> {
let (range_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
// While there are nicer standard library APIs available for this, we aim for minimal code size.

let mut state = s;

fn parse_start<'a>(state: &mut &'a str) -> &'a str {
// Unsafe is unfortunately necessary to get the bounds check removed (for code size).

let start_idx = state.bytes().position(|b| b != b' ');
if let Some(start_idx) = start_idx {
// SAFETY: It comes from position, so it's in bounds.
// It must be on a UTF-8 boundary as it's the first byte that isn't ' '.
*state = unsafe { state.get_unchecked(start_idx..) };
}
let match_idx = state.bytes().position(|b| b == b' ');
match match_idx {
None => {
let result = *state;
*state = "";
result
}
Some(match_idx) => {
// SAFETY: match_index comes from .bytes().position() of an ASCII character,
// so it's both in bounds and a UTF-8 boundary
let result = unsafe { state.get_unchecked(..match_idx) };
// SAFETY: Since match_idx is the ' ', there must be at least the end after it.
*state = unsafe { state.get_unchecked((match_idx + 1)..) };
result
}
}
}
Comment on lines +94 to +123
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hkBst This is an example of the kind of microoptimization that is both very hard to do in the compiler and also can reap significant benefits since here we care mostly about code size, even in the dead code, as this code size is essentially multiplied by all Rust binaries.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@workingjubilee Thanks for thinking of me, although I'm not quite sure why you did.


fn error(msg: &str) -> &str {
if cfg!(debug_assertions) {
msg
} else {
"invalid map entry"
}
}

let range_str = parse_start(&mut state);
if range_str.is_empty() {
return Err("Couldn't find address");
return Err(error("Couldn't find address"));
}

let (perms_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
let perms_str = parse_start(&mut state);
if perms_str.is_empty() {
return Err("Couldn't find permissions");
return Err(error("Couldn't find permissions"));
}

let (offset_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
let offset_str = parse_start(&mut state);
if offset_str.is_empty() {
return Err("Couldn't find offset");
return Err(error("Couldn't find offset"));
}

let (dev_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
let dev_str = parse_start(&mut state);
if dev_str.is_empty() {
return Err("Couldn't find dev");
return Err(error("Couldn't find dev"));
}

let (inode_str, s) = s.trim_start().split_once(' ').unwrap_or((s, ""));
let inode_str = parse_start(&mut state);
if inode_str.is_empty() {
return Err("Couldn't find inode");
return Err(error("Couldn't find inode"));
}

// Pathname may be omitted in which case it will be empty
let pathname_str = s.trim_start();
let pathname_str = state.trim_ascii_start();

let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number");
let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number");
let hex = |s| usize::from_str_radix(s, 16).map_err(|_| error("Couldn't parse hex number"));
let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| error("Couldn't parse hex number"));

let address = if let Some((start, limit)) = range_str.split_once('-') {
(hex(start)?, hex(limit)?)
} else {
return Err("Couldn't parse address range");
};
let perms: [char; 4] = {
let mut chars = perms_str.chars();
let mut c = || chars.next().ok_or("insufficient perms");
let perms = [c()?, c()?, c()?, c()?];
if chars.next().is_some() {
return Err("too many perms");
}
perms
return Err(error("Couldn't parse address range"));
};
let offset = hex64(offset_str)?;
let dev = if let Some((major, minor)) = dev_str.split_once(':') {
(hex(major)?, hex(minor)?)
} else {
return Err("Couldn't parse dev");
};
let inode = hex(inode_str)?;
let pathname = pathname_str.into();

Ok(MapsEntry {
address,
perms,
offset,
dev,
inode,
pathname,
})
}
Expand All @@ -172,10 +188,7 @@ fn check_maps_entry_parsing_64bit() {
.unwrap(),
MapsEntry {
address: (0xffffffffff600000, 0xffffffffff601000),
perms: ['-', '-', 'x', 'p'],
offset: 0x00000000,
dev: (0x00, 0x00),
inode: 0x0,
pathname: "[vsyscall]".into(),
}
);
Expand All @@ -187,10 +200,7 @@ fn check_maps_entry_parsing_64bit() {
.unwrap(),
MapsEntry {
address: (0x7f5985f46000, 0x7f5985f48000),
perms: ['r', 'w', '-', 'p'],
offset: 0x00039000,
dev: (0x103, 0x06),
inode: 0x76021795,
pathname: "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2".into(),
}
);
Expand All @@ -200,10 +210,7 @@ fn check_maps_entry_parsing_64bit() {
.unwrap(),
MapsEntry {
address: (0x35b1a21000, 0x35b1a22000),
perms: ['r', 'w', '-', 'p'],
offset: 0x00000000,
dev: (0x00, 0x00),
inode: 0x0,
pathname: Default::default(),
}
);
Expand All @@ -224,10 +231,7 @@ fn check_maps_entry_parsing_32bit() {
.unwrap(),
MapsEntry {
address: (0x08056000, 0x08077000),
perms: ['r', 'w', '-', 'p'],
offset: 0x00000000,
dev: (0x00, 0x00),
inode: 0x0,
pathname: "[heap]".into(),
}
);
Expand All @@ -239,10 +243,7 @@ fn check_maps_entry_parsing_32bit() {
.unwrap(),
MapsEntry {
address: (0xb7c79000, 0xb7e02000),
perms: ['r', '-', '-', 'p'],
offset: 0x00000000,
dev: (0x08, 0x01),
inode: 0x60662705,
pathname: "/usr/lib/locale/locale-archive".into(),
}
);
Expand All @@ -252,10 +253,7 @@ fn check_maps_entry_parsing_32bit() {
.unwrap(),
MapsEntry {
address: (0xb7e02000, 0xb7e03000),
perms: ['r', 'w', '-', 'p'],
offset: 0x00000000,
dev: (0x00, 0x00),
inode: 0x0,
pathname: Default::default(),
}
);
Expand All @@ -266,10 +264,7 @@ fn check_maps_entry_parsing_32bit() {
.unwrap(),
MapsEntry {
address: (0xb7c79000, 0xb7e02000),
perms: ['r', '-', '-', 'p'],
offset: 0x00000000,
dev: (0x08, 0x01),
inode: 0x60662705,
pathname: "/executable/path/with some spaces".into(),
}
);
Expand All @@ -280,10 +275,7 @@ fn check_maps_entry_parsing_32bit() {
.unwrap(),
MapsEntry {
address: (0xb7c79000, 0xb7e02000),
perms: ['r', '-', '-', 'p'],
offset: 0x00000000,
dev: (0x08, 0x01),
inode: 0x60662705,
pathname: "/executable/path/with multiple-continuous spaces ".into(),
}
);
Expand All @@ -294,10 +286,7 @@ fn check_maps_entry_parsing_32bit() {
.unwrap(),
MapsEntry {
address: (0xb7c79000, 0xb7e02000),
perms: ['r', '-', '-', 'p'],
offset: 0x00000000,
dev: (0x08, 0x01),
inode: 0x60662705,
pathname: "/executable/path/starts-with-spaces".into(),
}
);
Expand Down
Loading