Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
aba4890
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Oct 26, 2025
199a909
Update lldb/include/lldb/Core/Disassembler.h
n2h9 Oct 28, 2025
ea1bf46
Update lldb/include/lldb/Core/Disassembler.h
n2h9 Oct 28, 2025
0331516
Update lldb/test/API/functionalities/disassembler-variables/TestVaria…
n2h9 Oct 28, 2025
2449b6c
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Oct 30, 2025
839ac8e
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Oct 31, 2025
5278024
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Oct 31, 2025
7055cc0
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 1, 2025
13afa00
Merge branch 'main' into rich-disassembler-structured-variable-annota…
n2h9 Nov 1, 2025
4cee0d7
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 1, 2025
c955af3
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 1, 2025
4d8706d
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 1, 2025
573a6a1
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 1, 2025
c688f70
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 1, 2025
31e91cf
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 1, 2025
f805dbb
Update lldb/include/lldb/Core/Disassembler.h
n2h9 Nov 6, 2025
f03a448
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 6, 2025
f10922f
[lldb] [disassembler] chore: enhance VariableAnnotator to return stru…
n2h9 Nov 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions lldb/include/lldb/API/SBInstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "lldb/API/SBData.h"
#include "lldb/API/SBDefines.h"
#include "lldb/API/SBStructuredData.h"

#include <cstdio>

Expand Down Expand Up @@ -73,6 +74,23 @@ class LLDB_API SBInstruction {

bool TestEmulation(lldb::SBStream &output_stream, const char *test_file);

/// Get variable annotations for this instruction as structured data.
/// Returns an array of dictionaries, each containing:
/// - "variable_name": string name of the variable
/// - "location_description": string description of where variable is stored
/// ("RDI", "R15", "undef", etc.)
/// - "is_live": boolean indicates if variable is live at this instruction
/// - "start_address": unsigned integer address where this annotation becomes
/// valid
/// - "end_address": unsigned integer address where this annotation becomes
/// invalid
/// - "register_kind": unsigned integer indicating the register numbering
/// scheme
/// - "decl_file": string path to the file where variable is declared
/// - "decl_line": unsigned integer line number where variable is declared
/// - "type_name": string type name of the variable
lldb::SBStructuredData GetVariableAnnotations(lldb::SBTarget target);

protected:
friend class SBInstructionList;

Expand Down
1 change: 1 addition & 0 deletions lldb/include/lldb/API/SBStructuredData.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ class SBStructuredData {
friend class SBBreakpointLocation;
friend class SBBreakpointName;
friend class SBTrace;
friend class SBInstruction;
friend class lldb_private::python::SWIGBridge;
friend class lldb_private::lua::SWIGBridge;
friend class SBCommandInterpreter;
Expand Down
38 changes: 38 additions & 0 deletions lldb/include/lldb/Core/Disassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,21 @@ class Disassembler : public std::enable_shared_from_this<Disassembler>,
const Disassembler &operator=(const Disassembler &) = delete;
};

/// Structured data for a single variable annotation.
struct VariableAnnotation {
std::string variable_name;
std::string location_description; // e.g., "r15", "undef", "const_0"
lldb::addr_t start_address; // Where this annotation starts being valid
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should be Doxygen-style comments and go on the line before the member.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated ✅ , thank you 🙇‍♀️

lldb::addr_t end_address; // Where this annotation ends being valid
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be represented by an AddressRange?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Valid point, updated ✅ , thank you 🙇‍♀️

bool is_live; // Whether variable is live at this instruction
lldb::RegisterKind
register_kind; // Register numbering scheme for location interpretation
std::optional<std::string>
decl_file; // Source file where variable was declared
std::optional<uint32_t> decl_line; // Line number where variable was declared
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be represented by a LineEntry?

Copy link
Author

@n2h9 n2h9 Nov 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is to provide an information in this fields on where the variable is declared, like we we have in a DWARF output.
The data comes from Declaration object which returns line number as uint.

We only have (and probably only need for the purpose of variable annotation) information on file name, line number and column number from Declaration object. While LineEntry suppose to contain more information in it.

Should we maybe keep this as a uint for now, what do you think?

std::optional<std::string> type_name; // Variable's type name
};

/// Tracks live variable annotations across instructions and produces
/// per-instruction "events" like `name = RDI` or `name = <undef>`.
class VariableAnnotator {
Expand All @@ -574,16 +589,39 @@ class VariableAnnotator {
std::string name;
/// Last printed location (empty means <undef>).
std::string last_loc;
/// Address range where this variable state is valid.
lldb::addr_t start_address;
lldb::addr_t end_address;
/// Register numbering scheme for location interpretation.
lldb::RegisterKind register_kind;

std::optional<std::string> decl_file;
std::optional<uint32_t> decl_line;
std::optional<std::string> type_name;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of duplicating these fields, should this hold onto a VariableAnnotation and populate it as we go?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree. The idea was to have different structs for internal and external usage to have flexibility to have different structure of data. But probably this not necessary at all.

Updated ✅ , thank you 🙇‍♀️.

};

// Live state from the previous instruction, keyed by Variable::GetID().
llvm::DenseMap<lldb::user_id_t, VarState> Live_;

static constexpr const char *kUndefLocation = "undef";

public:
/// Compute annotation strings for a single instruction and update `Live_`.
/// Returns only the events that should be printed *at this instruction*.
std::vector<std::string> annotate(Instruction &inst, Target &target,
const lldb::ModuleSP &module_sp);

/// Compute structured annotation data for a single instruction and update
/// `Live_`. Returns structured data for all variables relevant at this
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is out of date. This returns a list of VariableAnnotations.

/// instruction.
std::vector<VariableAnnotation>
AnnotateStructured(Instruction &inst, Target &target,
Copy link

Copilot AI Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Method name AnnotateStructured uses UpperCamelCase, but the implementation in Disassembler.cpp uses annotateStructured (line 327). According to LLDB's coding style, methods should be UpperCamelCase. The implementation should be renamed to match the declaration: AnnotateStructured.

Copilot generated this review using guidance from repository custom instructions.
const lldb::ModuleSP &module_sp);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why take a const ref to a shared pointer? Either take a const ref to the module or take the SP by value and bump the ref count.

Copy link
Author

@n2h9 n2h9 Nov 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point 👍 .
That is because I followed the same signature as existing annotate function line 303 😊 .
Updated to use shared pointer by value (also updated in annotate function as well).

Thank you 🙇‍♀️ .


private:
VariableAnnotation createAnnotation(
const VarState &var_state, bool is_live,
const std::optional<std::string> &location_desc = std::nullopt);
};

} // namespace lldb_private
Expand Down
65 changes: 63 additions & 2 deletions lldb/source/API/SBInstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
#include "lldb/Utility/Instrumentation.h"

#include "lldb/API/SBAddress.h"
#include "lldb/API/SBFrame.h"
#include "lldb/API/SBFile.h"
#include "lldb/API/SBFrame.h"

#include "lldb/API/SBStream.h"
#include "lldb/API/SBStructuredData.h"
#include "lldb/API/SBTarget.h"
#include "lldb/Core/Disassembler.h"
#include "lldb/Core/EmulateInstruction.h"
Expand All @@ -26,6 +27,7 @@
#include "lldb/Utility/ArchSpec.h"
#include "lldb/Utility/DataBufferHeap.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/StructuredData.h"

#include <memory>

Expand Down Expand Up @@ -163,7 +165,8 @@ const char *SBInstruction::GetComment(SBTarget target) {
return ConstString(inst_sp->GetComment(&exe_ctx)).GetCString();
}

lldb::InstructionControlFlowKind SBInstruction::GetControlFlowKind(lldb::SBTarget target) {
lldb::InstructionControlFlowKind
SBInstruction::GetControlFlowKind(lldb::SBTarget target) {
LLDB_INSTRUMENT_VA(this, target);

lldb::InstructionSP inst_sp(GetOpaque());
Expand Down Expand Up @@ -347,3 +350,61 @@ bool SBInstruction::TestEmulation(lldb::SBStream &output_stream,
return inst_sp->TestEmulation(output_stream.ref(), test_file);
return false;
}

lldb::SBStructuredData
SBInstruction::GetVariableAnnotations(lldb::SBTarget target) {
LLDB_INSTRUMENT_VA(this, target);

SBStructuredData result;

if (!m_opaque_sp || !m_opaque_sp->IsValid() || !target.IsValid())
return result;

lldb::InstructionSP inst_sp = m_opaque_sp->GetSP();
lldb::TargetSP target_sp = target.GetSP();

if (!inst_sp || !target_sp)
return result;

const Address &addr = inst_sp->GetAddress();
ModuleSP module_sp = addr.GetModule();

if (!module_sp)
return result;

VariableAnnotator annotator;
std::vector<VariableAnnotation> annotations =
annotator.annotateStructured(*inst_sp, *target_sp, module_sp);

auto array_sp = std::make_shared<StructuredData::Array>();

for (const auto &ann : annotations) {
auto dict_sp = std::make_shared<StructuredData::Dictionary>();

dict_sp->AddStringItem("variable_name", ann.variable_name);
dict_sp->AddStringItem("location_description", ann.location_description);
dict_sp->AddBooleanItem("is_live", ann.is_live);
dict_sp->AddItem(
"start_address",
std::make_shared<StructuredData::UnsignedInteger>(ann.start_address));
dict_sp->AddItem(
"end_address",
std::make_shared<StructuredData::UnsignedInteger>(ann.end_address));
dict_sp->AddItem(
"register_kind",
std::make_shared<StructuredData::UnsignedInteger>(ann.register_kind));
if (ann.decl_file.has_value())
dict_sp->AddStringItem("decl_file", *ann.decl_file);
if (ann.decl_line.has_value())
dict_sp->AddItem(
"decl_line",
std::make_shared<StructuredData::UnsignedInteger>(*ann.decl_line));
if (ann.type_name.has_value())
dict_sp->AddStringItem("type_name", *ann.type_name);

array_sp->AddItem(dict_sp);
}

result.m_impl_up->SetObjectSP(array_sp);
return result;
}
95 changes: 79 additions & 16 deletions lldb/source/Core/Disassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,14 +302,38 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine(
std::vector<std::string>
VariableAnnotator::annotate(Instruction &inst, Target &target,
const lldb::ModuleSP &module_sp) {
auto structured_annotations = annotateStructured(inst, target, module_sp);

std::vector<std::string> events;
events.reserve(structured_annotations.size());

for (const auto &annotation : structured_annotations) {
std::string display_string;
display_string =
llvm::formatv(
"{0} = {1}", annotation.variable_name,
annotation.location_description == VariableAnnotator::kUndefLocation
? llvm::formatv("<{0}>", VariableAnnotator::kUndefLocation)
.str()
: annotation.location_description)
.str();
Comment on lines +312 to +319
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like it could benefit from writing this as a stream:

  std::string display_string;
  llvm::raw_string_ostream os(display_string);
  os << annotation.variable_name;
  [...]
  events.push_back(std::move(display_string));

events.push_back(display_string);
}

return events;
}

std::vector<VariableAnnotation>
VariableAnnotator::annotateStructured(Instruction &inst, Target &target,
Copy link

Copilot AI Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Method name annotateStructured should be AnnotateStructured to match the declaration in Disassembler.h (line 618) and follow LLDB's UpperCamelCase naming convention for methods.

Copilot generated this review using guidance from repository custom instructions.
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Copilot, updated ✅ .

Also updated existing annotate method to be Annotate to follow the LLDB's UpperCamelCase naming convention for methods 🤗 .

const lldb::ModuleSP &module_sp) {
std::vector<VariableAnnotation> annotations;

// If we lost module context, everything becomes <undef>.
// If we lost module context, mark all live variables as undefined.
if (!module_sp) {
for (const auto &KV : Live_)
events.emplace_back(llvm::formatv("{0} = <undef>", KV.second.name).str());
annotations.push_back(createAnnotation(KV.second, false, kUndefLocation));
Live_.clear();
return events;
return annotations;
}

// Resolve function/block at this *file* address.
Expand All @@ -320,9 +344,9 @@ VariableAnnotator::annotate(Instruction &inst, Target &target,
!sc.function) {
// No function context: everything dies here.
for (const auto &KV : Live_)
events.emplace_back(llvm::formatv("{0} = <undef>", KV.second.name).str());
annotations.push_back(createAnnotation(KV.second, false, kUndefLocation));
Live_.clear();
return events;
return annotations;
}

// Collect in-scope variables for this instruction into Current.
Expand Down Expand Up @@ -376,8 +400,32 @@ VariableAnnotator::annotate(Instruction &inst, Target &target,
if (loc.empty())
continue;

Current.try_emplace(v->GetID(),
VarState{std::string(name), std::string(loc)});
lldb::addr_t start_addr = inst.GetAddress().GetFileAddress();
lldb::addr_t end_addr = LLDB_INVALID_ADDRESS;
if (entry.file_range.has_value()) {
start_addr = entry.file_range->GetBaseAddress().GetFileAddress();
end_addr = start_addr + entry.file_range->GetByteSize();
}

std::optional<std::string> decl_file;
std::optional<uint32_t> decl_line;
std::optional<std::string> type_name;

const Declaration &decl = v->GetDeclaration();
if (decl.GetFile()) {
decl_file = decl.GetFile().GetFilename().AsCString();
if (decl.GetLine() > 0)
decl_line = decl.GetLine();
}

if (Type *type = v->GetType())
if (const char *type_str = type->GetName().AsCString())
type_name = type_str;

Current.try_emplace(
v->GetID(), VarState{std::string(name), std::string(loc), start_addr,
end_addr, entry.expr->GetRegisterKind(), decl_file,
decl_line, type_name});
}

// Diff Live_ → Current.
Expand All @@ -387,24 +435,39 @@ VariableAnnotator::annotate(Instruction &inst, Target &target,
auto it = Live_.find(KV.first);
if (it == Live_.end()) {
// Newly live.
events.emplace_back(
llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str());
annotations.push_back(createAnnotation(KV.second, true));
} else if (it->second.last_loc != KV.second.last_loc) {
// Location changed.
events.emplace_back(
llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str());
annotations.push_back(createAnnotation(KV.second, true));
}
}

// 2) Ends: anything that was live but is not in Current becomes <undef>.
for (const auto &KV : Live_) {
// 2) Ends: anything that was live but is not in Current becomes
// <kUndefLocation>.
for (const auto &KV : Live_)
if (!Current.count(KV.first))
events.emplace_back(llvm::formatv("{0} = <undef>", KV.second.name).str());
}
annotations.push_back(createAnnotation(KV.second, false, kUndefLocation));

// Commit new state.
Live_ = std::move(Current);
return events;
return annotations;
}

VariableAnnotation VariableAnnotator::createAnnotation(
const VarState &var_state, bool is_live,
const std::optional<std::string> &location_desc) {
VariableAnnotation annotation;
annotation.variable_name = var_state.name;
annotation.location_description =
location_desc.has_value() ? *location_desc : var_state.last_loc;
annotation.start_address = var_state.start_address;
annotation.end_address = var_state.end_address;
annotation.is_live = is_live;
annotation.register_kind = var_state.register_kind;
annotation.decl_file = var_state.decl_file;
annotation.decl_line = var_state.decl_line;
annotation.type_name = var_state.type_name;
return annotation;
}

void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch,
Expand Down
Loading