-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[lldb] Change synthetic symbol names to have file address #137512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[lldb] Change synthetic symbol names to have file address #137512
Conversation
|
@llvm/pr-subscribers-lldb Author: Ely Ronnen (eronnen) ChangesAdds a setting that makes lldb generate synthetic symbol names according to the file address of the function instead of the index, this could make it easier when debugging crashes and stack traces to understand which function the unnamed symbols corrresponds to Full diff: https://github.com/llvm/llvm-project/pull/137512.diff 6 Files Affected:
diff --git a/lldb/include/lldb/Core/ModuleList.h b/lldb/include/lldb/Core/ModuleList.h
index 909ee08f9ba62..23f64a153d47d 100644
--- a/lldb/include/lldb/Core/ModuleList.h
+++ b/lldb/include/lldb/Core/ModuleList.h
@@ -67,6 +67,19 @@ static constexpr OptionEnumValueElement g_auto_download_enum_values[] = {
},
};
+static constexpr OptionEnumValueElement g_synthetic_symbols_name_style_values[] = {
+ {
+ lldb::eSyntheticSymbolsNameStyleIndex,
+ "index",
+ "Function index style",
+ },
+ {
+ lldb::eSyntheticSymbolsNameStyleFileAddress,
+ "file-address",
+ "Function file address in module style",
+ },
+};
+
class ModuleListProperties : public Properties {
mutable llvm::sys::RWMutex m_symlink_paths_mutex;
PathMappingList m_symlink_paths;
@@ -91,6 +104,7 @@ class ModuleListProperties : public Properties {
bool GetLoadSymbolOnDemand();
lldb::SymbolDownload GetSymbolAutoDownload() const;
+ lldb::SyntheticSymbolsNameStyle GetSyntheticSymbolsNameStyle() const;
PathMappingList GetSymlinkMappings() const;
};
diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h
index e05c845a69f3e..5c37b33e7442e 100644
--- a/lldb/include/lldb/Symbol/Symbol.h
+++ b/lldb/include/lldb/Symbol/Symbol.h
@@ -339,6 +339,7 @@ class Symbol : public SymbolContextScope {
m_is_weak : 1,
m_type : 6; // Values from the lldb::SymbolType enum.
mutable Mangled m_mangled; // uniqued symbol name/mangled name pair
+
AddressRange m_addr_range; // Contains the value, or the section offset
// address when the value is an address in a
// section, and the size (if any)
diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h
index 6d10cc8bcffcb..26e83cefbe571 100644
--- a/lldb/include/lldb/lldb-enumerations.h
+++ b/lldb/include/lldb/lldb-enumerations.h
@@ -1391,6 +1391,12 @@ enum StopDisassemblyType {
eStopDisassemblyTypeAlways
};
+/// Format to use for unknown symbols.
+enum SyntheticSymbolsNameStyle {
+ eSyntheticSymbolsNameStyleIndex = 0,
+ eSyntheticSymbolsNameStyleFileAddress = 1,
+};
+
} // namespace lldb
#endif // LLDB_LLDB_ENUMERATIONS_H
diff --git a/lldb/source/Core/CoreProperties.td b/lldb/source/Core/CoreProperties.td
index a1a4e994c3b9c..7eaecb729c36d 100644
--- a/lldb/source/Core/CoreProperties.td
+++ b/lldb/source/Core/CoreProperties.td
@@ -46,6 +46,11 @@ let Definition = "modulelist" in {
Global,
DefaultFalse,
Desc<"Enable on demand symbol loading in LLDB. LLDB will load debug info on demand for each module based on various conditions (e.g. matched breakpoint, resolved stack frame addresses and matched global variables/function symbols in symbol table) to improve performance. Please refer to docs/use/ondemand.rst for details.">;
+ def SyntheticSymbolsNameStyle: Property<"synthetic-symbols-name-style", "Enum">,
+ Global,
+ DefaultEnumValue<"eSyntheticSymbolsNameStyleIndex">,
+ EnumValues<"OptionEnumValues(g_synthetic_symbols_name_style_values)">,
+ Desc<"Determines the way synthetic symbol names are generated for unknown symbols">;
}
let Definition = "debugger" in {
diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp
index 6052cc151744d..a507d1c2efaf5 100644
--- a/lldb/source/Core/ModuleList.cpp
+++ b/lldb/source/Core/ModuleList.cpp
@@ -115,6 +115,13 @@ SymbolDownload ModuleListProperties::GetSymbolAutoDownload() const {
g_modulelist_properties[idx].default_uint_value));
}
+lldb::SyntheticSymbolsNameStyle ModuleListProperties::GetSyntheticSymbolsNameStyle() const {
+ const uint32_t idx = ePropertySyntheticSymbolsNameStyle;
+ return GetPropertyAtIndexAs<lldb::SyntheticSymbolsNameStyle>(
+ idx, static_cast<lldb::SyntheticSymbolsNameStyle>(
+ g_modulelist_properties[idx].default_uint_value));
+}
+
FileSpec ModuleListProperties::GetClangModulesCachePath() const {
const uint32_t idx = ePropertyClangModulesCachePath;
return GetPropertyAtIndexAs<FileSpec>(idx, {});
diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp
index 4828de4fdfa37..0e17662ee14ba 100644
--- a/lldb/source/Symbol/Symbol.cpp
+++ b/lldb/source/Symbol/Symbol.cpp
@@ -639,7 +639,15 @@ void Symbol::SynthesizeNameIfNeeded() const {
// breakpoints on them.
llvm::SmallString<256> name;
llvm::raw_svector_ostream os(name);
- os << GetSyntheticSymbolPrefix() << GetID();
+ os << GetSyntheticSymbolPrefix();
+ switch (ModuleList::GetGlobalModuleListProperties().GetSyntheticSymbolsNameStyle()) {
+ case eSyntheticSymbolsNameStyleIndex:
+ os << GetID();
+ break;
+ case eSyntheticSymbolsNameStyleFileAddress:
+ os << "_" << llvm::format_hex_no_prefix(m_addr_range.GetBaseAddress().GetFileAddress(), 0);
+ break;
+ }
m_mangled.SetDemangledName(ConstString(os.str()));
}
}
|
62d7d66 to
10d4c24
Compare
lldb/source/Symbol/Symbol.cpp
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure whether it's better to cache this setting for the session or not
10d4c24 to
fd74de7
Compare
|
I have zero context for the problem this aims to solve, but the way you describe it, it seems like this shouldn't be a setting, it should just be how we do it.
If it makes it easier then why not always do this? Is it that one style is good for debugging the internals of lldb and one style is good for users who are dealing mostly with problems from their own code? Could you add an example of one (or both) of those situations, in the PR description? This will help us understand the motivation. |
|
I'm not sure whether this synthetic name style is preferred by everyone, so that's why I made it a setting and not default. The problem it's trying to solve is that stack traces from crashes may contain unnamed symbols from binaries without debug info, and if these unnamed symbols had their file address in their synthetic name it would be easy to check their disassembly with static disassemblers like |
|
I see. Thanks for explaining, I'll let the experts weigh in on that topic then :) |
|
FWIW, my first though was also: "why not just do it all the time". I'm somewhat unclear about the use case though. In the normal lldb backtrace you should see both the PC value and the (synthetic) function name. What's there to be gained by putting it in the name itself. Support for IDEs which don't display the PC value? And how much does it help to see a random hex value as a function name? |
|
The use case is to make it easier to work with static disassemblers: |
|
Okay, so IIUC, you have a tool (the "static disassembler"), which you're cross-referencing the lldb output with, and this makes it easier because the tool does not use the same made up names for stripped symbols. Makes sense, I guess. I don't think there's any grand scheme behind the current symbol naming/numbering scheme, so I think it'd make sense to just use the file address unconditionally. @jasonmolenda, what do you think about all this? |
|
Today we add a made up name, I think this change is fine, but I agree I wouldn't bother with a setting. Both approaches achieve the most important goal - having the same synthetic names when lldb is re-run on the same binary - and there's one use case where having it based on a file address helps that workflow. I'd change lldb to use the file addresses. |
|
Cool, changed the patch to have it by default |
jasonmolenda
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, thanks for finishing this. Are you able to merge the PR?
|
Currently not, waiting for #137382 |
|
I can merge it, but I notice the PR description that will become the commit msg describes the setting style, you should prob update that. |
ed6a678 to
032d431
Compare
|
Changed the commit message and the title I don't mind if you merge now, I have no idea how long my merge access request is going to take :) |
|
@eronnen I believe this is breaking the macOS bots, on the test TestFoundationDisassembly.py |
|
I'm going to revert this for now, as I see your commit access is still pending (hopefully soon!) |
|
@felipepiovezan Thanks for noticing, I'll try to fix it soon |
|
Aperrantly I missed that searching unnamed symbols is dependant on the symbol index at https://github.com/llvm/llvm-project/blob/main/lldb/source/Symbol/Symtab.cpp#L668, so changing it will require a deeper change |
Changes the default synthetic symbol names to contain their file address
…vm#137512)" This reverts commit b69957f.
Changes the default synthetic symbol names to contain their file address
…vm#137512)" This reverts commit b69957f.
* Changes the default synthetic symbol names to contain their file address This is a new PR after the first PR (#137512) was reverted because it didn't update the way unnamed symbols were searched in the symbol table, which relied on the index being in the name. This time also added extra test to make sure the symbol is found as expected
Changes the default synthetic symbol names to contain their file address