-
Notifications
You must be signed in to change notification settings - Fork 272
Optimize proc maps parsing code size #729
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,20 +13,8 @@ use core::str::FromStr; | |
pub(super) struct MapsEntry { | ||
/// start (inclusive) and limit (exclusive) of address range. | ||
address: (usize, usize), | ||
/// The perms field are the permissions for the entry | ||
/// | ||
/// r = read | ||
/// w = write | ||
/// x = execute | ||
/// s = shared | ||
/// p = private (copy on write) | ||
perms: [char; 4], | ||
/// Offset into the file (or "whatever"). | ||
offset: u64, | ||
/// device (major, minor) | ||
dev: (usize, usize), | ||
/// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS). | ||
inode: usize, | ||
/// Usually the file backing the mapping. | ||
/// | ||
/// Note: The man page for proc includes a note about "coordination" by | ||
|
@@ -56,15 +44,25 @@ pub(super) struct MapsEntry { | |
} | ||
|
||
pub(super) fn parse_maps() -> Result<Vec<MapsEntry>, &'static str> { | ||
let mut v = Vec::new(); | ||
let mut proc_self_maps = | ||
File::open("/proc/self/maps").map_err(|_| "Couldn't open /proc/self/maps")?; | ||
let mut buf = String::new(); | ||
let _bytes_read = proc_self_maps | ||
.read_to_string(&mut buf) | ||
.map_err(|_| "Couldn't read /proc/self/maps")?; | ||
for line in buf.lines() { | ||
|
||
let mut v = Vec::new(); | ||
let mut buf = buf.as_str(); | ||
while let Some(match_idx) = buf.bytes().position(|b| b == b'\n') { | ||
// Unsafe is unfortunately necessary to get the bounds check removed (for code size). | ||
|
||
// SAFETY: match_idx is the position of the newline, so it must be valid. | ||
let line = unsafe { buf.get_unchecked(..match_idx) }; | ||
|
||
v.push(line.parse()?); | ||
|
||
// SAFETY: match_idx is the position of the newline, so the byte after it must be valid. | ||
buf = unsafe { buf.get_unchecked((match_idx + 1)..) }; | ||
} | ||
|
||
Ok(v) | ||
|
@@ -92,70 +90,88 @@ impl FromStr for MapsEntry { | |
// e.g.: "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]" | ||
// e.g.: "7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795 /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" | ||
// e.g.: "35b1a21000-35b1a22000 rw-p 00000000 00:00 0" | ||
// | ||
// Note that paths may contain spaces, so we can't use `str::split` for parsing (until | ||
// Split::remainder is stabilized #77998). | ||
fn from_str(s: &str) -> Result<Self, Self::Err> { | ||
let (range_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); | ||
// While there are nicer standard library APIs available for this, we aim for minimal code size. | ||
|
||
let mut state = s; | ||
|
||
fn parse_start<'a>(state: &mut &'a str) -> &'a str { | ||
// Unsafe is unfortunately necessary to get the bounds check removed (for code size). | ||
|
||
let start_idx = state.bytes().position(|b| b != b' '); | ||
if let Some(start_idx) = start_idx { | ||
// SAFETY: It comes from position, so it's in bounds. | ||
// It must be on a UTF-8 boundary as it's the first byte that isn't ' '. | ||
*state = unsafe { state.get_unchecked(start_idx..) }; | ||
} | ||
let match_idx = state.bytes().position(|b| b == b' '); | ||
match match_idx { | ||
None => { | ||
let result = *state; | ||
*state = ""; | ||
result | ||
} | ||
Some(match_idx) => { | ||
// SAFETY: match_index comes from .bytes().position() of an ASCII character, | ||
// so it's both in bounds and a UTF-8 boundary | ||
let result = unsafe { state.get_unchecked(..match_idx) }; | ||
// SAFETY: Since match_idx is the ' ', there must be at least the end after it. | ||
*state = unsafe { state.get_unchecked((match_idx + 1)..) }; | ||
result | ||
} | ||
} | ||
} | ||
Comment on lines
+94
to
+123
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hkBst This is an example of the kind of microoptimization that is both very hard to do in the compiler and also can reap significant benefits since here we care mostly about code size, even in the dead code, as this code size is essentially multiplied by all Rust binaries. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @workingjubilee Thanks for thinking of me, although I'm not quite sure why you did. |
||
|
||
fn error(msg: &str) -> &str { | ||
if cfg!(debug_assertions) { | ||
msg | ||
} else { | ||
"invalid map entry" | ||
} | ||
} | ||
|
||
let range_str = parse_start(&mut state); | ||
if range_str.is_empty() { | ||
return Err("Couldn't find address"); | ||
return Err(error("Couldn't find address")); | ||
} | ||
|
||
let (perms_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); | ||
let perms_str = parse_start(&mut state); | ||
if perms_str.is_empty() { | ||
return Err("Couldn't find permissions"); | ||
return Err(error("Couldn't find permissions")); | ||
} | ||
|
||
let (offset_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); | ||
let offset_str = parse_start(&mut state); | ||
if offset_str.is_empty() { | ||
return Err("Couldn't find offset"); | ||
return Err(error("Couldn't find offset")); | ||
} | ||
|
||
let (dev_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); | ||
let dev_str = parse_start(&mut state); | ||
if dev_str.is_empty() { | ||
return Err("Couldn't find dev"); | ||
return Err(error("Couldn't find dev")); | ||
} | ||
|
||
let (inode_str, s) = s.trim_start().split_once(' ').unwrap_or((s, "")); | ||
let inode_str = parse_start(&mut state); | ||
if inode_str.is_empty() { | ||
return Err("Couldn't find inode"); | ||
return Err(error("Couldn't find inode")); | ||
} | ||
|
||
// Pathname may be omitted in which case it will be empty | ||
let pathname_str = s.trim_start(); | ||
let pathname_str = state.trim_ascii_start(); | ||
|
||
let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number"); | ||
let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number"); | ||
let hex = |s| usize::from_str_radix(s, 16).map_err(|_| error("Couldn't parse hex number")); | ||
let hex64 = |s| u64::from_str_radix(s, 16).map_err(|_| error("Couldn't parse hex number")); | ||
|
||
let address = if let Some((start, limit)) = range_str.split_once('-') { | ||
(hex(start)?, hex(limit)?) | ||
} else { | ||
return Err("Couldn't parse address range"); | ||
}; | ||
let perms: [char; 4] = { | ||
let mut chars = perms_str.chars(); | ||
let mut c = || chars.next().ok_or("insufficient perms"); | ||
let perms = [c()?, c()?, c()?, c()?]; | ||
if chars.next().is_some() { | ||
return Err("too many perms"); | ||
} | ||
perms | ||
return Err(error("Couldn't parse address range")); | ||
}; | ||
let offset = hex64(offset_str)?; | ||
let dev = if let Some((major, minor)) = dev_str.split_once(':') { | ||
(hex(major)?, hex(minor)?) | ||
} else { | ||
return Err("Couldn't parse dev"); | ||
}; | ||
let inode = hex(inode_str)?; | ||
let pathname = pathname_str.into(); | ||
|
||
Ok(MapsEntry { | ||
address, | ||
perms, | ||
offset, | ||
dev, | ||
inode, | ||
pathname, | ||
}) | ||
} | ||
|
@@ -172,10 +188,7 @@ fn check_maps_entry_parsing_64bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0xffffffffff600000, 0xffffffffff601000), | ||
perms: ['-', '-', 'x', 'p'], | ||
offset: 0x00000000, | ||
dev: (0x00, 0x00), | ||
inode: 0x0, | ||
pathname: "[vsyscall]".into(), | ||
} | ||
); | ||
|
@@ -187,10 +200,7 @@ fn check_maps_entry_parsing_64bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0x7f5985f46000, 0x7f5985f48000), | ||
perms: ['r', 'w', '-', 'p'], | ||
offset: 0x00039000, | ||
dev: (0x103, 0x06), | ||
inode: 0x76021795, | ||
pathname: "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2".into(), | ||
} | ||
); | ||
|
@@ -200,10 +210,7 @@ fn check_maps_entry_parsing_64bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0x35b1a21000, 0x35b1a22000), | ||
perms: ['r', 'w', '-', 'p'], | ||
offset: 0x00000000, | ||
dev: (0x00, 0x00), | ||
inode: 0x0, | ||
pathname: Default::default(), | ||
} | ||
); | ||
|
@@ -224,10 +231,7 @@ fn check_maps_entry_parsing_32bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0x08056000, 0x08077000), | ||
perms: ['r', 'w', '-', 'p'], | ||
offset: 0x00000000, | ||
dev: (0x00, 0x00), | ||
inode: 0x0, | ||
pathname: "[heap]".into(), | ||
} | ||
); | ||
|
@@ -239,10 +243,7 @@ fn check_maps_entry_parsing_32bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0xb7c79000, 0xb7e02000), | ||
perms: ['r', '-', '-', 'p'], | ||
offset: 0x00000000, | ||
dev: (0x08, 0x01), | ||
inode: 0x60662705, | ||
pathname: "/usr/lib/locale/locale-archive".into(), | ||
} | ||
); | ||
|
@@ -252,10 +253,7 @@ fn check_maps_entry_parsing_32bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0xb7e02000, 0xb7e03000), | ||
perms: ['r', 'w', '-', 'p'], | ||
offset: 0x00000000, | ||
dev: (0x00, 0x00), | ||
inode: 0x0, | ||
pathname: Default::default(), | ||
} | ||
); | ||
|
@@ -266,10 +264,7 @@ fn check_maps_entry_parsing_32bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0xb7c79000, 0xb7e02000), | ||
perms: ['r', '-', '-', 'p'], | ||
offset: 0x00000000, | ||
dev: (0x08, 0x01), | ||
inode: 0x60662705, | ||
pathname: "/executable/path/with some spaces".into(), | ||
} | ||
); | ||
|
@@ -280,10 +275,7 @@ fn check_maps_entry_parsing_32bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0xb7c79000, 0xb7e02000), | ||
perms: ['r', '-', '-', 'p'], | ||
offset: 0x00000000, | ||
dev: (0x08, 0x01), | ||
inode: 0x60662705, | ||
pathname: "/executable/path/with multiple-continuous spaces ".into(), | ||
} | ||
); | ||
|
@@ -294,10 +286,7 @@ fn check_maps_entry_parsing_32bit() { | |
.unwrap(), | ||
MapsEntry { | ||
address: (0xb7c79000, 0xb7e02000), | ||
perms: ['r', '-', '-', 'p'], | ||
offset: 0x00000000, | ||
dev: (0x08, 0x01), | ||
inode: 0x60662705, | ||
pathname: "/executable/path/starts-with-spaces".into(), | ||
} | ||
); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
kinda insane that this matters that much
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's only a tiny improvement, so that much is a bit of an overstatement :). actually i don't have numbers on this one, but I did see the call to
drop_in_place
before but not after.