Skip to content

Commit a84ae25

Browse files
authored
Split out samply-debugid crate (#685)
Fixes #684
2 parents 80859bb + 127c0f8 commit a84ae25

File tree

15 files changed

+322
-160
lines changed

15 files changed

+322
-160
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ members = [
55
"fxprof-processed-profile",
66
"gecko_profile",
77
"samply-api",
8+
"samply-debugid",
89
"samply-quota-manager",
910
"samply-symbols",
1011
"samply",

samply-debugid/Cargo.toml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[package]
2+
name = "samply-debugid"
3+
version = "0.1.0"
4+
authors = ["Markus Stange <[email protected]>"]
5+
license = "MIT OR Apache-2.0"
6+
edition = "2021"
7+
description = "Samply compatible debugids."
8+
repository = "https://github.com/mstange/samply/"
9+
readme = "README.md"
10+
11+
[dependencies.debugid]
12+
default-features = false
13+
version = "0.8.0"
14+
15+
[dependencies.object]
16+
default-features = false
17+
features = ["read_core"]
18+
version = "0.37"
19+
20+
[dependencies.uuid]
21+
default-features = false
22+
version = "1"

samply-debugid/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# samply-debugid
2+
3+
This crate allows generating [`debugid`s](https://crates.io/crates/debugid)
4+
that are compatible with `samply`. Useful for writing your own profiles to be
5+
symbolicated and displayed with `samply load`.

samply-debugid/src/codeid.rs

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
use std::str::FromStr;
2+
3+
use object::Object;
4+
use uuid::Uuid;
5+
6+
/// An enum carrying an identifier for a binary. This is stores the same information
7+
/// as a [`debugid::CodeId`], but without projecting it down to a string.
8+
///
9+
/// All types need to be treated rather differently, see their respective documentation.
10+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
11+
pub enum CodeId {
12+
/// The code ID for a Windows PE file. When combined with the binary name,
13+
/// the code ID lets you obtain binaries from symbol servers. It is not useful
14+
/// on its own, it has to be paired with the binary name.
15+
///
16+
/// On Windows, a binary's code ID is distinct from its debug ID (= pdb GUID + age).
17+
/// If you have a binary file, you can get both the code ID and the debug ID
18+
/// from it. If you only have a PDB file, you usually *cannot* get the code ID of
19+
/// the corresponding binary from it.
20+
PeCodeId(PeCodeId),
21+
22+
/// The code ID for a macOS / iOS binary (mach-O). This is just the mach-O UUID.
23+
/// The mach-O UUID is shared between both the binary file and the debug file (dSYM),
24+
/// and it can be used on its own to find dSYMs using Spotlight.
25+
///
26+
/// The debug ID and the code ID contain the same information; the debug ID
27+
/// is literally just the UUID plus a zero at the end.
28+
MachoUuid(Uuid),
29+
30+
/// The code ID for a Linux ELF file. This is the "ELF build ID" (also called "GNU build ID").
31+
/// The build ID is usually 20 bytes, commonly written out as 40 hex chars.
32+
///
33+
/// It can be used to find debug files on the local file system or to download
34+
/// binaries or debug files from a `debuginfod` symbol server. it does not have to be
35+
/// paired with the binary name.
36+
///
37+
/// An ELF binary's code ID is more useful than its debug ID: The debug ID is truncated
38+
/// to 16 bytes (32 hex characters), whereas the code ID is the full ELF build ID.
39+
ElfBuildId(ElfBuildId),
40+
}
41+
42+
impl FromStr for CodeId {
43+
type Err = ();
44+
45+
fn from_str(s: &str) -> Result<Self, Self::Err> {
46+
if s.len() <= 17 {
47+
// 8 bytes timestamp + 1 to 8 bytes of image size
48+
Ok(CodeId::PeCodeId(PeCodeId::from_str(s)?))
49+
} else if s.len() == 32 && is_uppercase_hex(s) {
50+
// mach-O UUID
51+
Ok(CodeId::MachoUuid(Uuid::from_str(s).map_err(|_| ())?))
52+
} else {
53+
// ELF build ID. These are usually 40 hex characters (= 20 bytes).
54+
Ok(CodeId::ElfBuildId(ElfBuildId::from_str(s)?))
55+
}
56+
}
57+
}
58+
59+
fn is_uppercase_hex(s: &str) -> bool {
60+
s.chars()
61+
.all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_uppercase()))
62+
}
63+
64+
impl std::fmt::Display for CodeId {
65+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66+
match self {
67+
CodeId::PeCodeId(pe) => std::fmt::Display::fmt(pe, f),
68+
CodeId::MachoUuid(uuid) => f.write_fmt(format_args!("{:X}", uuid.simple())),
69+
CodeId::ElfBuildId(elf) => std::fmt::Display::fmt(elf, f),
70+
}
71+
}
72+
}
73+
74+
/// The code ID for a Windows PE file.
75+
///
76+
/// When combined with the binary name, the `PeCodeId` lets you obtain binaries from
77+
/// symbol servers. It is not useful on its own, it has to be paired with the binary name.
78+
///
79+
/// A Windows binary's `PeCodeId` is distinct from its debug ID (= pdb GUID + age).
80+
/// If you have a binary file, you can get both the `PeCodeId` and the debug ID
81+
/// from it. If you only have a PDB file, you usually *cannot* get the `PeCodeId` of
82+
/// the corresponding binary from it.
83+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
84+
pub struct PeCodeId {
85+
pub timestamp: u32,
86+
pub image_size: u32,
87+
}
88+
89+
impl FromStr for PeCodeId {
90+
type Err = ();
91+
92+
fn from_str(s: &str) -> Result<Self, Self::Err> {
93+
if s.len() < 9 || s.len() > 16 {
94+
return Err(());
95+
}
96+
let timestamp = u32::from_str_radix(&s[..8], 16).map_err(|_| ())?;
97+
let image_size = u32::from_str_radix(&s[8..], 16).map_err(|_| ())?;
98+
Ok(Self {
99+
timestamp,
100+
image_size,
101+
})
102+
}
103+
}
104+
105+
impl std::fmt::Display for PeCodeId {
106+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107+
f.write_fmt(format_args!("{:08X}{:x}", self.timestamp, self.image_size))
108+
}
109+
}
110+
111+
/// The build ID for an ELF file (also called "GNU build ID").
112+
///
113+
/// The build ID can be used to find debug files on the local file system or to download
114+
/// binaries or debug files from a `debuginfod` symbol server. it does not have to be
115+
/// paired with the binary name.
116+
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
117+
pub struct ElfBuildId(pub Vec<u8>);
118+
119+
impl ElfBuildId {
120+
/// Create a new `ElfBuildId` from a slice of bytes (commonly a sha1 hash
121+
/// generated by the linker, i.e. 20 bytes).
122+
pub fn from_bytes(bytes: &[u8]) -> Self {
123+
Self(bytes.to_owned())
124+
}
125+
}
126+
127+
impl FromStr for ElfBuildId {
128+
type Err = ();
129+
130+
fn from_str(s: &str) -> Result<Self, Self::Err> {
131+
let byte_count = s.len() / 2;
132+
let mut bytes = Vec::with_capacity(byte_count);
133+
for i in 0..byte_count {
134+
let hex_byte = &s[i * 2..i * 2 + 2];
135+
let b = u8::from_str_radix(hex_byte, 16).map_err(|_| ())?;
136+
bytes.push(b);
137+
}
138+
Ok(Self(bytes))
139+
}
140+
}
141+
142+
impl std::fmt::Display for ElfBuildId {
143+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144+
for byte in &self.0 {
145+
f.write_fmt(format_args!("{byte:02x}"))?;
146+
}
147+
Ok(())
148+
}
149+
}
150+
151+
/// Tries to obtain a CodeId for an object.
152+
///
153+
/// This currently only handles mach-O and ELF.
154+
pub fn code_id_for_object<'data>(obj: &impl Object<'data>) -> Option<CodeId> {
155+
// ELF
156+
if let Ok(Some(build_id)) = obj.build_id() {
157+
return Some(CodeId::ElfBuildId(ElfBuildId::from_bytes(build_id)));
158+
}
159+
160+
// mach-O
161+
if let Ok(Some(uuid)) = obj.mach_uuid() {
162+
return Some(CodeId::MachoUuid(Uuid::from_bytes(uuid)));
163+
}
164+
165+
None
166+
}

samply-debugid/src/debugid.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
use debugid::DebugId;
2+
use object::{Object, ObjectSection};
3+
use uuid::Uuid;
4+
5+
pub trait DebugIdExt {
6+
/// Creates a DebugId from some identifier. The identifier could be
7+
/// an ELF build ID, or a hash derived from the text section.
8+
/// The `little_endian` argument specifies whether the object file
9+
/// is targeting a little endian architecture.
10+
fn from_identifier(identifier: &[u8], little_endian: bool) -> Self;
11+
12+
/// Creates a DebugId from a hash of the first 4096 bytes of the .text section.
13+
/// The `little_endian` argument specifies whether the object file
14+
/// is targeting a little endian architecture.
15+
fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self;
16+
}
17+
18+
impl DebugIdExt for DebugId {
19+
fn from_identifier(identifier: &[u8], little_endian: bool) -> Self {
20+
// Make sure that we have exactly 16 bytes available, either truncate or fill
21+
// the remainder with zeros.
22+
// ELF build IDs are usually 20 bytes, so if the identifier is an ELF build ID
23+
// then we're performing a lossy truncation.
24+
let mut d = [0u8; 16];
25+
let shared_len = identifier.len().min(d.len());
26+
d[0..shared_len].copy_from_slice(&identifier[0..shared_len]);
27+
28+
// Pretend that the build ID was stored as a UUID with u32 u16 u16 fields inside
29+
// the file. Parse those fields in the endianness of the file. Then use
30+
// Uuid::from_fields to serialize them as big endian.
31+
// For ELF build IDs this is a bit silly, because ELF build IDs aren't actually
32+
// field-based UUIDs, but this is what the tools in the breakpad and
33+
// sentry/symbolic universe do, so we do the same for compatibility with those
34+
// tools.
35+
let (d1, d2, d3) = if little_endian {
36+
(
37+
u32::from_le_bytes([d[0], d[1], d[2], d[3]]),
38+
u16::from_le_bytes([d[4], d[5]]),
39+
u16::from_le_bytes([d[6], d[7]]),
40+
)
41+
} else {
42+
(
43+
u32::from_be_bytes([d[0], d[1], d[2], d[3]]),
44+
u16::from_be_bytes([d[4], d[5]]),
45+
u16::from_be_bytes([d[6], d[7]]),
46+
)
47+
};
48+
let uuid = Uuid::from_fields(d1, d2, d3, d[8..16].try_into().unwrap());
49+
DebugId::from_uuid(uuid)
50+
}
51+
52+
// This algorithm XORs 16-byte chunks directly into a 16-byte buffer.
53+
fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self {
54+
const UUID_SIZE: usize = 16;
55+
const PAGE_SIZE: usize = 4096;
56+
let mut hash = [0; UUID_SIZE];
57+
for (i, byte) in text_first_page.iter().cloned().take(PAGE_SIZE).enumerate() {
58+
hash[i % UUID_SIZE] ^= byte;
59+
}
60+
DebugId::from_identifier(&hash, little_endian)
61+
}
62+
}
63+
64+
/// Tries to obtain a DebugId for an object. This uses the build ID, if available,
65+
/// and falls back to hashing the first page of the text section otherwise.
66+
/// Returns None on failure.
67+
pub fn debug_id_for_object<'data>(obj: &impl Object<'data>) -> Option<DebugId> {
68+
// Windows
69+
if let Ok(Some(pdb_info)) = obj.pdb_info() {
70+
return Some(DebugId::from_guid_age(&pdb_info.guid(), pdb_info.age()).unwrap());
71+
}
72+
73+
// ELF
74+
if let Ok(Some(build_id)) = obj.build_id() {
75+
return Some(DebugId::from_identifier(build_id, obj.is_little_endian()));
76+
}
77+
78+
// mach-O
79+
if let Ok(Some(uuid)) = obj.mach_uuid() {
80+
return Some(DebugId::from_uuid(Uuid::from_bytes(uuid)));
81+
}
82+
83+
// We were not able to locate a build ID, so fall back to creating a synthetic
84+
// identifier from a hash of the first page of the ".text" (program code) section.
85+
if let Some(section) = obj.section_by_name(".text") {
86+
let data_len = section.size().min(4096);
87+
if let Ok(Some(first_page_data)) = section.data_range(section.address(), data_len) {
88+
return Some(DebugId::from_text_first_page(
89+
first_page_data,
90+
obj.is_little_endian(),
91+
));
92+
}
93+
}
94+
95+
None
96+
}

samply-debugid/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
mod codeid;
2+
mod debugid;
3+
4+
pub use codeid::{code_id_for_object, CodeId, ElfBuildId, PeCodeId};
5+
pub use debugid::{debug_id_for_object, DebugIdExt};

samply-symbols/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ srcsrv = "0.2.2"
4949
lzma-rs = "0.3"
5050
macho-unwind-info = "0.5.0"
5151
debugid = "0.8.0"
52+
samply-debugid = { version = "0.1.0", path = "../samply-debugid" }
5253
flate2 = "1"
5354
yoke = "0.8"
5455
yoke-derive = "0.8"

samply-symbols/src/binary_image.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ use linux_perf_data::jitdump::JitDumpHeader;
33
use linux_perf_data::linux_perf_event_reader::RawData;
44
use object::read::pe::{ImageNtHeaders, ImageOptionalHeader, PeFile, PeFile32, PeFile64};
55
use object::{FileKind, Object, ReadRef};
6+
use samply_debugid::{CodeId, ElfBuildId, PeCodeId};
67

78
use crate::debugid_util::{code_id_for_object, debug_id_for_object};
89
use crate::error::Error;
910
use crate::jitdump::{debug_id_and_code_id_for_jitdump, JitDumpIndex};
1011
use crate::macho::{DyldCacheFileData, MachOData, MachOFatArchiveMemberData};
1112
use crate::shared::{
12-
relative_address_base, CodeId, ElfBuildId, FileAndPathHelperError, FileContents,
13-
FileContentsWrapper, LibraryInfo, PeCodeId, RangeReadRef,
13+
relative_address_base, FileAndPathHelperError, FileContents, FileContentsWrapper, LibraryInfo,
14+
RangeReadRef,
1415
};
1516

1617
#[derive(thiserror::Error, Debug)]

samply-symbols/src/breakpad/index.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ use nom::error::{Error, ErrorKind, ParseError};
1313
use nom::sequence::{terminated, tuple};
1414
use nom::{Err, IResult};
1515
use object::ReadRef;
16+
use samply_debugid::CodeId;
1617
use zerocopy::{IntoBytes, LittleEndian, Ref, U16, U32, U64};
1718
use zerocopy_derive::*;
1819

1920
use crate::source_file_path::SourceFilePathIndex;
20-
use crate::CodeId;
2121

2222
#[derive(Debug, Clone, PartialEq, Eq)]
2323
pub struct BreakpadIndex<'a> {
@@ -1233,7 +1233,7 @@ mod test {
12331233
use std::str::FromStr;
12341234

12351235
use super::*;
1236-
use crate::{ElfBuildId, PeCodeId};
1236+
use samply_debugid::{ElfBuildId, PeCodeId};
12371237

12381238
#[test]
12391239
fn test1() {

0 commit comments

Comments
 (0)