Skip to content

Commit 8d6271f

Browse files
committed
fix: keep static globals discoverable
- handle DW_TAG_variable entries nested under subprograms when DW_AT_external marks them as statics - add demangled aliases for globals and deduplicate suffix matches - expand the Rust fixture/test to ensure DWARF can resolve GLOBAL_ENUM
1 parent 990d7a3 commit 8d6271f

File tree

6 files changed

+363
-56
lines changed

6 files changed

+363
-56
lines changed

ghostscope-dwarf/src/core/demangle.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,32 @@ use gimli::DwLang;
55
/// Demangle a symbol string using language hint when available.
66
/// Returns None if demangling fails or is not applicable.
77
pub fn demangle_by_lang(lang: Option<DwLang>, s: &str) -> Option<String> {
8+
// 1) Trust DW_AT_language when available
89
match lang {
9-
Some(gimli::DW_LANG_Rust) => demangle_rust(s),
10+
Some(gimli::DW_LANG_Rust) => {
11+
if let Some(d) = demangle_rust(s) {
12+
return Some(d);
13+
}
14+
}
1015
Some(gimli::DW_LANG_C_plus_plus)
1116
| Some(gimli::DW_LANG_C_plus_plus_11)
1217
| Some(gimli::DW_LANG_C_plus_plus_14)
1318
| Some(gimli::DW_LANG_C_plus_plus_17)
14-
| Some(gimli::DW_LANG_C_plus_plus_20) => demangle_cpp(s),
15-
_ => {
16-
// Try common patterns heuristically
17-
if is_rust_mangled(s) {
18-
demangle_rust(s)
19-
} else if is_itanium_cpp_mangled(s) {
20-
demangle_cpp(s)
21-
} else {
22-
None
19+
| Some(gimli::DW_LANG_C_plus_plus_20) => {
20+
if let Some(d) = demangle_cpp(s) {
21+
return Some(d);
2322
}
2423
}
24+
_ => {}
25+
}
26+
27+
// 2) Fall back to heuristics if language hint missing or demangle failed
28+
if is_rust_mangled(s) || looks_like_legacy_rust(s) {
29+
demangle_rust(s)
30+
} else if is_itanium_cpp_mangled(s) {
31+
demangle_cpp(s)
32+
} else {
33+
None
2534
}
2635
}
2736

@@ -53,7 +62,11 @@ pub fn demangled_leaf(full: &str) -> String {
5362

5463
/// Heuristic: Rust v0 mangling starts with "_R".
5564
pub fn is_rust_mangled(s: &str) -> bool {
56-
s.starts_with("_R")
65+
s.starts_with("_R") || looks_like_legacy_rust(s)
66+
}
67+
68+
fn looks_like_legacy_rust(s: &str) -> bool {
69+
s.starts_with("_ZN") && s.contains("17h") && s.ends_with('E')
5770
}
5871

5972
/// Heuristic: Itanium C++ mangling starts with "_Z".

ghostscope-dwarf/src/data/lightweight_index.rs

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//! - Support for parallel construction with index shards
77
//! - Fast binary search for symbol lookup
88
9-
use crate::core::IndexEntry;
9+
use crate::core::{demangle_by_lang, demangled_leaf, IndexEntry};
1010
use gimli::{DebugInfoOffset, EndianArcSlice, LittleEndian};
1111
use std::collections::{BTreeMap, HashMap};
1212
use tracing::debug;
@@ -89,11 +89,11 @@ impl LightweightIndex {
8989
}
9090

9191
// Add all type entries (struct/class/union/enum)
92-
for (name, ty_entries) in types {
92+
for (name, ty_entries) in &types {
9393
let start_idx = entries.len();
94-
entries.extend(ty_entries);
94+
entries.extend(ty_entries.clone());
9595
let indices: Vec<usize> = (start_idx..entries.len()).collect();
96-
type_map.insert(name, indices);
96+
type_map.insert(name.clone(), indices);
9797
}
9898

9999
// IMPORTANT: Do NOT sort entries! This would invalidate the indices
@@ -118,6 +118,25 @@ impl LightweightIndex {
118118
total_functions, total_variables, entries.len(), address_map.len()
119119
);
120120

121+
// Ensure demangled aliases exist for variables even if DW_AT_name was missing.
122+
for (idx, entry) in entries.iter().enumerate() {
123+
if entry.tag == gimli::constants::DW_TAG_variable {
124+
if let Some(demangled) = demangle_by_lang(entry.language, entry.name.as_ref()) {
125+
let leaf = demangled_leaf(&demangled);
126+
if leaf != entry.name.as_ref() {
127+
tracing::trace!(
128+
"LightweightIndex: alias '{}' -> '{}' (idx {}, lang={:?})",
129+
entry.name,
130+
leaf,
131+
idx,
132+
entry.language
133+
);
134+
variable_map.entry(leaf).or_default().push(idx);
135+
}
136+
}
137+
}
138+
}
139+
121140
Self {
122141
entries,
123142
function_map,

ghostscope-dwarf/src/module/data.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,10 +2155,17 @@ impl ModuleData {
21552155
};
21562156

21572157
let mut out = Vec::new();
2158+
// Track DIEs we've already emitted (unit_offset, die_offset)
2159+
let mut seen_offsets: HashSet<(u64, u64)> = HashSet::new();
2160+
21582161
// Try demangled full (preserve the demangled name that matched)
21592162
if let Some(indices) = self.demangled_variable_map.get(name) {
21602163
for &idx in indices {
21612164
if let Some(entry) = self.lightweight_index.entry(idx) {
2165+
let key = (entry.unit_offset.0 as u64, entry.die_offset.0 as u64);
2166+
if !seen_offsets.insert(key) {
2167+
continue;
2168+
}
21622169
let link_address = entry.address_ranges.first().and_then(|(lo, hi)| {
21632170
if lo == hi {
21642171
Some(*lo)
@@ -2185,6 +2192,10 @@ impl ModuleData {
21852192
if let Some(indices) = self.demangled_variable_leaf_map.get(name) {
21862193
for &idx in indices {
21872194
if let Some(entry) = self.lightweight_index.entry(idx) {
2195+
let key = (entry.unit_offset.0 as u64, entry.die_offset.0 as u64);
2196+
if !seen_offsets.insert(key) {
2197+
continue;
2198+
}
21882199
let link_address = entry.address_ranges.first().and_then(|(lo, hi)| {
21892200
if lo == hi {
21902201
Some(*lo)
@@ -2213,6 +2224,10 @@ impl ModuleData {
22132224
for key in self.lightweight_index.get_variable_names() {
22142225
if key.rsplit("::").next().map(|s| s == name).unwrap_or(false) {
22152226
for e in self.lightweight_index.find_variables_by_name(key) {
2227+
let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64);
2228+
if !seen_offsets.insert(key) {
2229+
continue;
2230+
}
22162231
let link_address =
22172232
e.address_ranges
22182233
.first()
@@ -2242,6 +2257,10 @@ impl ModuleData {
22422257
if e.tag != gimli::constants::DW_TAG_variable {
22432258
continue;
22442259
}
2260+
let key_offsets = (e.unit_offset.0 as u64, e.die_offset.0 as u64);
2261+
if !seen_offsets.insert(key_offsets) {
2262+
continue;
2263+
}
22452264
let last = e.name.rsplit("::").next().unwrap_or(e.name.as_ref());
22462265
if last == name || e.name == name.into() {
22472266
let link_address =
@@ -2378,13 +2397,18 @@ impl ModuleData {
23782397
pub(crate) fn find_global_variables_by_name(&self, name: &str) -> Vec<GlobalVariableInfo> {
23792398
let mut out = Vec::new();
23802399
let entries = self.lightweight_index.find_variables_by_name(name);
2400+
let mut seen_offsets: HashSet<(u64, u64)> = HashSet::new();
23812401

23822402
// Parse object file once for section classification
23832403
let obj = match object::File::parse(&self._binary_mapped_file.data[..]) {
23842404
Ok(f) => f,
23852405
Err(_) => {
23862406
// Cannot classify sections, but still return entries with link_address
23872407
for e in entries {
2408+
let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64);
2409+
if !seen_offsets.insert(key) {
2410+
continue;
2411+
}
23882412
let link_address =
23892413
e.address_ranges
23902414
.first()
@@ -2402,6 +2426,10 @@ impl ModuleData {
24022426
};
24032427

24042428
for e in entries {
2429+
let key = (e.unit_offset.0 as u64, e.die_offset.0 as u64);
2430+
if !seen_offsets.insert(key) {
2431+
continue;
2432+
}
24052433
let link_address =
24062434
e.address_ranges
24072435
.first()

ghostscope-dwarf/src/parser/fast_parser.rs

Lines changed: 89 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
//! Unified DWARF parser - true single-pass parsing
22
33
use crate::{
4-
core::{IndexEntry, Result},
4+
core::{
5+
demangle::{demangle_by_lang, demangled_leaf},
6+
IndexEntry, Result,
7+
},
58
data::{
69
directory_from_index, resolve_file_path, LightweightFileIndex, LightweightIndex,
710
LineMappingTable, ScopedFileIndexManager,
@@ -246,65 +249,118 @@ impl<'a> DwarfParser<'a> {
246249
}
247250
}
248251
gimli::constants::DW_TAG_variable => {
252+
tracing::trace!(
253+
"Evaluating global variable DIE {:?} in CU {:?}",
254+
entry.offset(),
255+
unit_offset
256+
);
257+
let is_static_symbol = self.is_static_symbol(entry).unwrap_or(false);
249258
let in_function_scope = tag_stack.iter().any(|t| {
250259
*t == gimli::constants::DW_TAG_subprogram
251260
|| *t == gimli::constants::DW_TAG_inlined_subroutine
252261
});
253-
if in_function_scope {
262+
if in_function_scope && !is_static_symbol {
263+
tracing::trace!(
264+
"Skipping variable at {:?} (in function scope, stack={:?})",
265+
entry.offset(),
266+
tag_stack
267+
);
254268
// Skip local variables
255269
tag_stack.push(entry.tag());
256270
continue;
271+
} else if in_function_scope {
272+
// Rust (and some C compilers) sometimes nest file-scoped statics under the
273+
// function that first references them, even though DW_AT_location uses
274+
// DW_OP_addr. When DW_AT_external is false we treat them as true globals.
275+
tracing::trace!(
276+
"Treating static variable at {:?} as global despite function scope (stack={:?})",
277+
entry.offset(),
278+
tag_stack
279+
);
257280
}
258281
if Self::is_declaration(entry).unwrap_or(false) {
282+
tracing::trace!(
283+
"Skipping variable at {:?} (declaration-only DIE)",
284+
entry.offset()
285+
);
259286
tag_stack.push(entry.tag());
260287
continue;
261288
}
289+
let mut collected_names: Vec<(String, bool)> = Vec::new();
290+
let mut push_unique_name = |candidate: String, is_linkage_alias: bool| {
291+
if candidate.is_empty() {
292+
return;
293+
}
294+
if collected_names
295+
.iter()
296+
.any(|(existing, _)| existing == &candidate)
297+
{
298+
return;
299+
}
300+
collected_names.push((candidate, is_linkage_alias));
301+
};
302+
303+
let mut have_primary_name = false;
262304
if let Some(name) = self.extract_name(self.dwarf, unit, entry)? {
263-
let flags = crate::core::IndexFlags {
264-
is_static: self.is_static_symbol(entry).unwrap_or(false),
265-
..Default::default()
266-
};
267-
// Restore variable address for globals/statics via DW_AT_location
268-
let var_addr = self.extract_variable_address(entry, unit)?;
269-
let var_ranges = var_addr.map(|a| vec![(a, a)]).unwrap_or_default();
305+
push_unique_name(name, false);
306+
have_primary_name = true;
307+
}
308+
309+
if let Some((linkage_name, _)) =
310+
self.extract_linkage_name(self.dwarf, unit, entry)?
311+
{
312+
if let Some(demangled) =
313+
demangle_by_lang(cu_language, linkage_name.as_str())
314+
{
315+
let leaf = demangled_leaf(&demangled);
316+
push_unique_name(leaf, false);
317+
have_primary_name = true;
318+
}
319+
push_unique_name(linkage_name.clone(), true);
320+
}
321+
322+
if !have_primary_name {
323+
tracing::trace!(
324+
"DWARF variable at {:?} missing usable name (CU lang={:?}); skipping alias registration",
325+
entry.offset(),
326+
cu_language
327+
);
328+
tag_stack.push(entry.tag());
329+
continue;
330+
}
331+
332+
let flags = crate::core::IndexFlags {
333+
is_static: is_static_symbol,
334+
..Default::default()
335+
};
336+
let var_addr = self.extract_variable_address(entry, unit)?;
337+
let var_ranges = var_addr.map(|a| vec![(a, a)]).unwrap_or_default();
338+
339+
for (name, is_linkage_alias) in collected_names {
340+
let mut entry_flags = flags;
341+
entry_flags.is_linkage = is_linkage_alias;
270342
let index_entry = IndexEntry {
271343
name: std::sync::Arc::from(name.as_str()),
272344
die_offset: entry.offset(),
273345
unit_offset,
274346
tag: entry.tag(),
275-
flags,
347+
flags: entry_flags,
276348
language: cu_language,
277349
address_ranges: var_ranges.clone(),
278350
entry_pc: None,
279351
};
352+
tracing::trace!(
353+
"Registering variable alias '{}' (linkage={}, lang={:?}, die={:?})",
354+
name,
355+
entry_flags.is_linkage,
356+
cu_language,
357+
entry.offset()
358+
);
280359
shard
281360
.variables
282361
.entry(name.clone())
283362
.or_default()
284363
.push(index_entry);
285-
if let Some((linkage_name, _)) =
286-
self.extract_linkage_name(self.dwarf, unit, entry)?
287-
{
288-
if linkage_name != name {
289-
let mut alias_flags = flags;
290-
alias_flags.is_linkage = true;
291-
let index_entry_linkage = IndexEntry {
292-
name: std::sync::Arc::from(linkage_name.as_str()),
293-
die_offset: entry.offset(),
294-
unit_offset,
295-
tag: entry.tag(),
296-
flags: alias_flags,
297-
language: cu_language,
298-
address_ranges: var_ranges,
299-
entry_pc: None,
300-
};
301-
shard
302-
.variables
303-
.entry(linkage_name)
304-
.or_default()
305-
.push(index_entry_linkage);
306-
}
307-
}
308364
}
309365
}
310366
gimli::constants::DW_TAG_structure_type

0 commit comments

Comments
 (0)