Skip to content

Commit 5f91976

Browse files
committed
[lldb][Expression] Encode Module and DIE UIDs into function AsmLabels (llvm#148877)
LLDB currently attaches `AsmLabel`s to `FunctionDecl`s such that that the `IRExecutionUnit` can determine which mangled name to call (we can't rely on Clang deriving the correct mangled name to call because the debug-info AST doesn't contain all the info that would be encoded in the DWARF linkage names). However, we don't attach `AsmLabel`s for structors because they have multiple variants and thus it's not clear which mangled name to use. In the [RFC on fixing expression evaluation of abi-tagged structors](https://discourse.llvm.org/t/rfc-lldb-handling-abi-tagged-constructors-destructors-in-expression-evaluator/82816) we discussed encoding the structor variant into the `AsmLabel`s. Specifically in [this thread](https://discourse.llvm.org/t/rfc-lldb-handling-abi-tagged-constructors-destructors-in-expression-evaluator/82816/7) we discussed that the contents of the `AsmLabel` are completely under LLDB's control and we could make use of it to uniquely identify a function by encoding the exact module and DIE that the function is associated with (mangled names need not be enough since two identical mangled symbols may live in different modules). So if we already have a custom `AsmLabel` format, we can encode the structor variant in a follow-up (the current idea is to append the structor variant as a suffix to our custom `AsmLabel` when Clang emits the mangled name into the JITted IR). Then we would just have to teach the `IRExecutionUnit` to pick the correct structor variant DIE during symbol resolution. The draft of this is available [here](llvm#149827) This patch sets up the infrastructure for the custom `AsmLabel` format by encoding the module id, DIE id and mangled name in it. **Implementation** The flow is as follows: 1. Create the label in `DWARFASTParserClang`. The format is: `$__lldb_func:module_id:die_id:mangled_name` 2. When resolving external symbols in `IRExecutionUnit`, we parse this label and then do a lookup by DIE ID (or mangled name into the module if the encoded DIE is a declaration). Depends on llvm#151355 (cherry picked from commit f890591)
1 parent 261a9f2 commit 5f91976

File tree

23 files changed

+638
-7
lines changed

23 files changed

+638
-7
lines changed

lldb/include/lldb/Core/Module.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ struct ModuleFunctionSearchOptions {
8888
///
8989
/// The module will parse more detailed information as more queries are made.
9090
class Module : public std::enable_shared_from_this<Module>,
91-
public SymbolContextScope {
91+
public SymbolContextScope,
92+
public UserID {
9293
public:
9394
class LookupInfo;
9495
// Static functions that can track the lifetime of module objects. This is

lldb/include/lldb/Core/ModuleList.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,14 @@ class ModuleList {
375375
// UUID values is very efficient and accurate.
376376
lldb::ModuleSP FindModule(const UUID &uuid) const;
377377

378+
/// Find a module by LLDB-specific unique identifier.
379+
///
380+
/// \param[in] uid The UID of the module assigned to it on construction.
381+
///
382+
/// \returns ModuleSP of module with \c uid. Returns nullptr if no such
383+
/// module could be found.
384+
lldb::ModuleSP FindModule(lldb::user_id_t uid) const;
385+
378386
/// Finds the first module whose file specification matches \a module_spec.
379387
lldb::ModuleSP FindFirstModule(const ModuleSpec &module_spec) const;
380388

lldb/include/lldb/Expression/Expression.h

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <string>
1414
#include <vector>
1515

16+
#include "llvm/Support/FormatProviders.h"
1617

1718
#include "lldb/Expression/ExpressionTypeSystemHelper.h"
1819
#include "lldb/lldb-forward.h"
@@ -96,6 +97,62 @@ class Expression {
9697
///invalid.
9798
};
9899

100+
/// Holds parsed information about a function call label that
101+
/// LLDB attaches as an AsmLabel to function AST nodes it parses
102+
/// from debug-info.
103+
///
104+
/// The format being:
105+
///
106+
/// <prefix>:<module uid>:<symbol uid>:<name>
107+
///
108+
/// The label string needs to stay valid for the entire lifetime
109+
/// of this object.
110+
struct FunctionCallLabel {
111+
/// Unique identifier of the lldb_private::Module
112+
/// which contains the symbol identified by \c symbol_id.
113+
lldb::user_id_t module_id;
114+
115+
/// Unique identifier of the function symbol on which to
116+
/// perform the function call. For example, for DWARF this would
117+
/// be the DIE UID.
118+
lldb::user_id_t symbol_id;
119+
120+
/// Name to use when searching for the function symbol in
121+
/// \c module_id. For most function calls this will be a
122+
/// mangled name. In cases where a mangled name can't be used,
123+
/// this will be the function name.
124+
///
125+
/// NOTE: kept as last element so we don't have to worry about
126+
/// ':' in the mangled name when parsing the label.
127+
llvm::StringRef lookup_name;
128+
129+
/// Decodes the specified function \c label into a \c FunctionCallLabel.
130+
static llvm::Expected<FunctionCallLabel> fromString(llvm::StringRef label);
131+
132+
/// Encode this FunctionCallLabel into its string representation.
133+
///
134+
/// The representation roundtrips through \c fromString:
135+
/// \code{.cpp}
136+
/// llvm::StringRef encoded = "$__lldb_func:0x0:0x0:_Z3foov";
137+
/// FunctionCallLabel label = *fromString(label);
138+
///
139+
/// assert (label.toString() == encoded);
140+
/// assert (*fromString(label.toString()) == label);
141+
/// \endcode
142+
std::string toString() const;
143+
};
144+
145+
/// LLDB attaches this prefix to mangled names of functions that get called
146+
/// from JITted expressions.
147+
inline constexpr llvm::StringRef FunctionCallLabelPrefix = "$__lldb_func";
148+
99149
} // namespace lldb_private
100150

151+
namespace llvm {
152+
template <> struct format_provider<lldb_private::FunctionCallLabel> {
153+
static void format(const lldb_private::FunctionCallLabel &label,
154+
raw_ostream &OS, StringRef Style);
155+
};
156+
} // namespace llvm
157+
101158
#endif // LLDB_EXPRESSION_EXPRESSION_H

lldb/include/lldb/Symbol/SymbolFile.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,18 @@ class SymbolFile : public PluginInterface {
349349
GetMangledNamesForFunction(const std::string &scope_qualified_name,
350350
std::vector<ConstString> &mangled_names);
351351

352+
/// Resolves the function corresponding to the specified LLDB function
353+
/// call \c label.
354+
///
355+
/// \param[in] label The FunctionCallLabel to be resolved.
356+
///
357+
/// \returns An llvm::Error if the specified \c label couldn't be resolved.
358+
/// Returns the resolved function (as a SymbolContext) otherwise.
359+
virtual llvm::Expected<SymbolContext>
360+
ResolveFunctionCallLabel(const FunctionCallLabel &label) {
361+
return llvm::createStringError("Not implemented");
362+
}
363+
352364
virtual void GetTypes(lldb_private::SymbolContextScope *sc_scope,
353365
lldb::TypeClass type_mask,
354366
lldb_private::TypeList &type_list) = 0;

lldb/source/Core/Module.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,10 @@ Module *Module::GetAllocatedModuleAtIndex(size_t idx) {
135135
return nullptr;
136136
}
137137

138+
static std::atomic<lldb::user_id_t> g_unique_id = 1;
139+
138140
Module::Module(const ModuleSpec &module_spec)
139-
: m_unwind_table(*this), m_file_has_changed(false),
141+
: UserID(g_unique_id++), m_unwind_table(*this), m_file_has_changed(false),
140142
m_first_file_changed_log(false) {
141143
// Scope for locker below...
142144
{
@@ -241,7 +243,8 @@ Module::Module(const ModuleSpec &module_spec)
241243
Module::Module(const FileSpec &file_spec, const ArchSpec &arch,
242244
ConstString object_name, lldb::offset_t object_offset,
243245
const llvm::sys::TimePoint<> &object_mod_time)
244-
: m_mod_time(FileSystem::Instance().GetModificationTime(file_spec)),
246+
: UserID(g_unique_id++),
247+
m_mod_time(FileSystem::Instance().GetModificationTime(file_spec)),
245248
m_arch(arch), m_file(file_spec), m_object_name(object_name),
246249
m_object_offset(object_offset), m_object_mod_time(object_mod_time),
247250
m_unwind_table(*this), m_file_has_changed(false),
@@ -262,7 +265,7 @@ Module::Module(const FileSpec &file_spec, const ArchSpec &arch,
262265
}
263266

264267
Module::Module()
265-
: m_unwind_table(*this), m_file_has_changed(false),
268+
: UserID(g_unique_id++), m_unwind_table(*this), m_file_has_changed(false),
266269
m_first_file_changed_log(false) {
267270
std::lock_guard<std::recursive_mutex> guard(
268271
GetAllocationModuleCollectionMutex());

lldb/source/Core/ModuleList.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,20 @@ ModuleSP ModuleList::FindModule(const UUID &uuid) const {
832832
return module_sp;
833833
}
834834

835+
ModuleSP ModuleList::FindModule(lldb::user_id_t uid) const {
836+
ModuleSP module_sp;
837+
ForEach([&](const ModuleSP &m) {
838+
if (m->GetID() == uid) {
839+
module_sp = m;
840+
return IterationAction::Stop;
841+
}
842+
843+
return IterationAction::Continue;
844+
});
845+
846+
return module_sp;
847+
}
848+
835849
void ModuleList::FindTypes(Module *search_first, const TypeQuery &query,
836850
TypeResults &results) const {
837851
std::lock_guard<std::recursive_mutex> guard(m_modules_mutex);

lldb/source/Expression/Expression.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
#include "lldb/Target/ExecutionContextScope.h"
1111
#include "lldb/Target/Target.h"
1212

13+
#include "llvm/ADT/SmallVector.h"
14+
#include "llvm/ADT/StringExtras.h"
15+
#include "llvm/ADT/StringRef.h"
16+
#include "llvm/Support/Error.h"
17+
1318
using namespace lldb_private;
1419

1520
Expression::Expression(Target &target)
@@ -26,3 +31,47 @@ Expression::Expression(ExecutionContextScope &exe_scope)
2631
m_jit_end_addr(LLDB_INVALID_ADDRESS) {
2732
assert(m_target_wp.lock());
2833
}
34+
35+
llvm::Expected<FunctionCallLabel>
36+
lldb_private::FunctionCallLabel::fromString(llvm::StringRef label) {
37+
llvm::SmallVector<llvm::StringRef, 4> components;
38+
label.split(components, ":", /*MaxSplit=*/3);
39+
40+
if (components.size() != 4)
41+
return llvm::createStringError("malformed function call label.");
42+
43+
if (components[0] != FunctionCallLabelPrefix)
44+
return llvm::createStringError(llvm::formatv(
45+
"expected function call label prefix '{0}' but found '{1}' instead.",
46+
FunctionCallLabelPrefix, components[0]));
47+
48+
llvm::StringRef module_label = components[1];
49+
llvm::StringRef die_label = components[2];
50+
51+
lldb::user_id_t module_id = 0;
52+
if (!llvm::to_integer(module_label, module_id))
53+
return llvm::createStringError(
54+
llvm::formatv("failed to parse module ID from '{0}'.", module_label));
55+
56+
lldb::user_id_t die_id;
57+
if (!llvm::to_integer(die_label, die_id))
58+
return llvm::createStringError(
59+
llvm::formatv("failed to parse symbol ID from '{0}'.", die_label));
60+
61+
return FunctionCallLabel{/*.module_id=*/module_id,
62+
/*.symbol_id=*/die_id,
63+
/*.lookup_name=*/components[3]};
64+
}
65+
66+
std::string lldb_private::FunctionCallLabel::toString() const {
67+
return llvm::formatv("{0}:{1:x}:{2:x}:{3}", FunctionCallLabelPrefix,
68+
module_id, symbol_id, lookup_name)
69+
.str();
70+
}
71+
72+
void llvm::format_provider<FunctionCallLabel>::format(
73+
const FunctionCallLabel &label, raw_ostream &OS, StringRef Style) {
74+
OS << llvm::formatv("FunctionCallLabel{ module_id: {0:x}, symbol_id: {1:x}, "
75+
"lookup_name: {2} }",
76+
label.module_id, label.symbol_id, label.lookup_name);
77+
}

lldb/source/Expression/IRExecutionUnit.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/IR/DiagnosticInfo.h"
1616
#include "llvm/IR/LLVMContext.h"
1717
#include "llvm/IR/Module.h"
18+
#include "llvm/Support/Error.h"
1819
#include "llvm/Support/SourceMgr.h"
1920
#include "llvm/Support/raw_ostream.h"
2021

@@ -23,6 +24,7 @@
2324
#include "lldb/Core/JITSection.h"
2425
#include "lldb/Core/Module.h"
2526
#include "lldb/Core/Section.h"
27+
#include "lldb/Expression/Expression.h"
2628
#include "lldb/Expression/IRExecutionUnit.h"
2729
#include "lldb/Expression/ObjectFileJIT.h"
2830
#include "lldb/Host/HostInfo.h"
@@ -39,6 +41,7 @@
3941
#include "lldb/Utility/LLDBAssert.h"
4042
#include "lldb/Utility/LLDBLog.h"
4143
#include "lldb/Utility/Log.h"
44+
#include "lldb/lldb-defines.h"
4245

4346
#include <optional>
4447

@@ -810,6 +813,40 @@ class LoadAddressResolver {
810813
lldb::addr_t m_best_internal_load_address = LLDB_INVALID_ADDRESS;
811814
};
812815

816+
/// Returns address of the function referred to by the special function call
817+
/// label \c label.
818+
static llvm::Expected<lldb::addr_t>
819+
ResolveFunctionCallLabel(const FunctionCallLabel &label,
820+
const lldb_private::SymbolContext &sc,
821+
bool &symbol_was_missing_weak) {
822+
symbol_was_missing_weak = false;
823+
824+
if (!sc.target_sp)
825+
return llvm::createStringError("target not available.");
826+
827+
auto module_sp = sc.target_sp->GetImages().FindModule(label.module_id);
828+
if (!module_sp)
829+
return llvm::createStringError(
830+
llvm::formatv("failed to find module by UID {0}", label.module_id));
831+
832+
auto *symbol_file = module_sp->GetSymbolFile();
833+
if (!symbol_file)
834+
return llvm::createStringError(
835+
llvm::formatv("no SymbolFile found on module {0:x}.", module_sp.get()));
836+
837+
auto sc_or_err = symbol_file->ResolveFunctionCallLabel(label);
838+
if (!sc_or_err)
839+
return llvm::joinErrors(
840+
llvm::createStringError("failed to resolve function by UID"),
841+
sc_or_err.takeError());
842+
843+
SymbolContextList sc_list;
844+
sc_list.Append(*sc_or_err);
845+
846+
LoadAddressResolver resolver(*sc.target_sp, symbol_was_missing_weak);
847+
return resolver.Resolve(sc_list).value_or(LLDB_INVALID_ADDRESS);
848+
}
849+
813850
lldb::addr_t
814851
IRExecutionUnit::FindInSymbols(const std::vector<ConstString> &names,
815852
const lldb_private::SymbolContext &sc,
@@ -954,6 +991,34 @@ lldb::addr_t IRExecutionUnit::FindInUserDefinedSymbols(
954991

955992
lldb::addr_t IRExecutionUnit::FindSymbol(lldb_private::ConstString name,
956993
bool &missing_weak) {
994+
if (name.GetStringRef().starts_with(FunctionCallLabelPrefix)) {
995+
auto label_or_err = FunctionCallLabel::fromString(name);
996+
if (!label_or_err) {
997+
LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions), label_or_err.takeError(),
998+
"failed to create FunctionCallLabel from '{1}': {0}",
999+
name.GetStringRef());
1000+
return LLDB_INVALID_ADDRESS;
1001+
}
1002+
1003+
if (auto addr_or_err =
1004+
ResolveFunctionCallLabel(*label_or_err, m_sym_ctx, missing_weak)) {
1005+
return *addr_or_err;
1006+
} else {
1007+
LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions), addr_or_err.takeError(),
1008+
"Failed to resolve function call label '{1}': {0}",
1009+
name.GetStringRef());
1010+
1011+
// Fall back to lookup by name despite error in resolving the label.
1012+
// May happen in practice if the definition of a function lives in
1013+
// a different lldb_private::Module than it's declaration. Meaning
1014+
// we couldn't pin-point it using the information encoded in the label.
1015+
name.SetString(label_or_err->lookup_name);
1016+
}
1017+
}
1018+
1019+
// TODO: now with function call labels, do we still need to
1020+
// generate alternate manglings?
1021+
9571022
std::vector<ConstString> candidate_C_names;
9581023
std::vector<ConstString> candidate_CPlusPlus_names;
9591024

lldb/source/Expression/IRInterpreter.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,9 @@ class InterpreterStackFrame {
259259
break;
260260
case Value::FunctionVal:
261261
if (const Function *constant_func = dyn_cast<Function>(constant)) {
262-
lldb_private::ConstString name(constant_func->getName());
262+
lldb_private::ConstString name(
263+
llvm::GlobalValue::dropLLVMManglingEscape(
264+
constant_func->getName()));
263265
bool missing_weak = false;
264266
lldb::addr_t addr = m_execution_unit.FindSymbol(name, missing_weak);
265267
if (addr == LLDB_INVALID_ADDRESS)

lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "Plugins/Language/ObjC/ObjCLanguage.h"
2727
#include "lldb/Core/Module.h"
2828
#include "lldb/Core/Value.h"
29+
#include "lldb/Expression/Expression.h"
2930
#include "lldb/Host/Host.h"
3031
#include "lldb/Symbol/CompileUnit.h"
3132
#include "lldb/Symbol/CompilerType.h"
@@ -259,7 +260,40 @@ static std::string MakeLLDBFuncAsmLabel(const DWARFDIE &die) {
259260
if (!name)
260261
return {};
261262

262-
return name;
263+
SymbolFileDWARF *dwarf = die.GetDWARF();
264+
if (!dwarf)
265+
return {};
266+
267+
auto get_module_id = [&](SymbolFile *sym) {
268+
if (!sym)
269+
return LLDB_INVALID_UID;
270+
271+
auto *obj = sym->GetMainObjectFile();
272+
if (!obj)
273+
return LLDB_INVALID_UID;
274+
275+
auto module_sp = obj->GetModule();
276+
if (!module_sp)
277+
return LLDB_INVALID_UID;
278+
279+
return module_sp->GetID();
280+
};
281+
282+
lldb::user_id_t module_id = get_module_id(dwarf->GetDebugMapSymfile());
283+
if (module_id == LLDB_INVALID_UID)
284+
module_id = get_module_id(dwarf);
285+
286+
if (module_id == LLDB_INVALID_UID)
287+
return {};
288+
289+
const auto die_id = die.GetID();
290+
if (die_id == LLDB_INVALID_UID)
291+
return {};
292+
293+
return FunctionCallLabel{/*module_id=*/module_id,
294+
/*symbol_id=*/die_id,
295+
/*.lookup_name=*/name}
296+
.toString();
263297
}
264298

265299
TypeSP DWARFASTParserClang::ParseTypeFromClangModule(const SymbolContext &sc,

0 commit comments

Comments
 (0)