diff --git a/lldb/include/lldb/API/SBInstruction.h b/lldb/include/lldb/API/SBInstruction.h index 755e3b4a47c9b..05e7087f2e679 100644 --- a/lldb/include/lldb/API/SBInstruction.h +++ b/lldb/include/lldb/API/SBInstruction.h @@ -11,6 +11,7 @@ #include "lldb/API/SBData.h" #include "lldb/API/SBDefines.h" +#include "lldb/API/SBStructuredData.h" #include @@ -73,6 +74,23 @@ class LLDB_API SBInstruction { bool TestEmulation(lldb::SBStream &output_stream, const char *test_file); + /// Get variable annotations for this instruction as structured data. + /// Returns an array of dictionaries, each containing: + /// - "variable_name": string name of the variable + /// - "location_description": string description of where variable is stored + /// ("RDI", "R15", "undef", etc.) + /// - "is_live": boolean indicates if variable is live at this instruction + /// - "start_address": unsigned integer address where this annotation becomes + /// valid + /// - "end_address": unsigned integer address where this annotation becomes + /// invalid + /// - "register_kind": unsigned integer indicating the register numbering + /// scheme + /// - "decl_file": string path to the file where variable is declared + /// - "decl_line": unsigned integer line number where variable is declared + /// - "type_name": string type name of the variable + lldb::SBStructuredData GetVariableAnnotations(lldb::SBTarget target); + protected: friend class SBInstructionList; diff --git a/lldb/include/lldb/API/SBStructuredData.h b/lldb/include/lldb/API/SBStructuredData.h index dfd8ec0e180ce..75fb16b795a5a 100644 --- a/lldb/include/lldb/API/SBStructuredData.h +++ b/lldb/include/lldb/API/SBStructuredData.h @@ -153,6 +153,7 @@ class SBStructuredData { friend class SBBreakpointLocation; friend class SBBreakpointName; friend class SBTrace; + friend class SBInstruction; friend class lldb_private::python::SWIGBridge; friend class lldb_private::lua::SWIGBridge; friend class SBCommandInterpreter; diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h index db186dd33d774..d24a8bf593661 100644 --- a/lldb/include/lldb/Core/Disassembler.h +++ b/lldb/include/lldb/Core/Disassembler.h @@ -566,24 +566,43 @@ class Disassembler : public std::enable_shared_from_this, const Disassembler &operator=(const Disassembler &) = delete; }; +/// Structured data for a single variable annotation. +struct VariableAnnotation { + std::string variable_name; + /// Location description (e.g., "r15", "undef", "const_0"). + std::string location_description; + /// Whether variable is live at this instruction. + bool is_live; + /// Register numbering scheme for location interpretation. + lldb::RegisterKind register_kind; + /// Where this annotation is valid. + std::optional address_range; + /// Source file where variable was declared. + std::optional decl_file; + /// Line number where variable was declared. + std::optional decl_line; + /// Variable's type name. + std::optional type_name; +}; + /// Tracks live variable annotations across instructions and produces /// per-instruction "events" like `name = RDI` or `name = `. class VariableAnnotator { - struct VarState { - /// Display name. - std::string name; - /// Last printed location (empty means ). - std::string last_loc; - }; - // Live state from the previous instruction, keyed by Variable::GetID(). - llvm::DenseMap Live_; + llvm::DenseMap m_live_vars; + + static constexpr const char *kUndefLocation = "undef"; public: /// Compute annotation strings for a single instruction and update `Live_`. /// Returns only the events that should be printed *at this instruction*. - std::vector annotate(Instruction &inst, Target &target, - const lldb::ModuleSP &module_sp); + std::vector Annotate(Instruction &inst, Target &target, + lldb::ModuleSP module_sp); + + /// Returns structured data for all variables relevant at this instruction. + std::vector AnnotateStructured(Instruction &inst, + Target &target, + lldb::ModuleSP module_sp); }; } // namespace lldb_private diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp index 6755089af39a4..992bd3c3fdd5d 100644 --- a/lldb/source/API/SBInstruction.cpp +++ b/lldb/source/API/SBInstruction.cpp @@ -10,10 +10,11 @@ #include "lldb/Utility/Instrumentation.h" #include "lldb/API/SBAddress.h" -#include "lldb/API/SBFrame.h" #include "lldb/API/SBFile.h" +#include "lldb/API/SBFrame.h" #include "lldb/API/SBStream.h" +#include "lldb/API/SBStructuredData.h" #include "lldb/API/SBTarget.h" #include "lldb/Core/Disassembler.h" #include "lldb/Core/EmulateInstruction.h" @@ -26,6 +27,7 @@ #include "lldb/Utility/ArchSpec.h" #include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/StructuredData.h" #include @@ -163,7 +165,8 @@ const char *SBInstruction::GetComment(SBTarget target) { return ConstString(inst_sp->GetComment(&exe_ctx)).GetCString(); } -lldb::InstructionControlFlowKind SBInstruction::GetControlFlowKind(lldb::SBTarget target) { +lldb::InstructionControlFlowKind +SBInstruction::GetControlFlowKind(lldb::SBTarget target) { LLDB_INSTRUMENT_VA(this, target); lldb::InstructionSP inst_sp(GetOpaque()); @@ -347,3 +350,65 @@ bool SBInstruction::TestEmulation(lldb::SBStream &output_stream, return inst_sp->TestEmulation(output_stream.ref(), test_file); return false; } + +lldb::SBStructuredData +SBInstruction::GetVariableAnnotations(lldb::SBTarget target) { + LLDB_INSTRUMENT_VA(this, target); + + SBStructuredData result; + + if (!m_opaque_sp || !m_opaque_sp->IsValid() || !target.IsValid()) + return result; + + lldb::InstructionSP inst_sp = m_opaque_sp->GetSP(); + lldb::TargetSP target_sp = target.GetSP(); + + if (!inst_sp || !target_sp) + return result; + + const Address &addr = inst_sp->GetAddress(); + ModuleSP module_sp = addr.GetModule(); + + if (!module_sp) + return result; + + VariableAnnotator annotator; + std::vector annotations = + annotator.AnnotateStructured(*inst_sp, *target_sp, module_sp); + + auto array_sp = std::make_shared(); + + for (const auto &ann : annotations) { + auto dict_sp = std::make_shared(); + + dict_sp->AddStringItem("variable_name", ann.variable_name); + dict_sp->AddStringItem("location_description", ann.location_description); + dict_sp->AddBooleanItem("is_live", ann.is_live); + if (ann.address_range.has_value()) { + const auto &range = *ann.address_range; + dict_sp->AddItem("start_address", + std::make_shared( + range.GetBaseAddress().GetFileAddress())); + dict_sp->AddItem( + "end_address", + std::make_shared( + range.GetBaseAddress().GetFileAddress() + range.GetByteSize())); + } + dict_sp->AddItem( + "register_kind", + std::make_shared(ann.register_kind)); + if (ann.decl_file.has_value()) + dict_sp->AddStringItem("decl_file", *ann.decl_file); + if (ann.decl_line.has_value()) + dict_sp->AddItem( + "decl_line", + std::make_shared(*ann.decl_line)); + if (ann.type_name.has_value()) + dict_sp->AddStringItem("type_name", *ann.type_name); + + array_sp->AddItem(dict_sp); + } + + result.m_impl_up->SetObjectSP(array_sp); + return result; +} diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index f2ed1f7395346..6e259326cb887 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -299,17 +299,45 @@ bool Disassembler::ElideMixedSourceAndDisassemblyLine( // The goal is to give users helpful live variable hints alongside the // disassembled instruction stream, similar to how debug information // enhances source-level debugging. -std::vector -VariableAnnotator::annotate(Instruction &inst, Target &target, - const lldb::ModuleSP &module_sp) { +std::vector VariableAnnotator::Annotate(Instruction &inst, + Target &target, + lldb::ModuleSP module_sp) { + auto structured_annotations = AnnotateStructured(inst, target, module_sp); + std::vector events; + events.reserve(structured_annotations.size()); + + for (const auto &annotation : structured_annotations) { + std::string display_string; + display_string = + llvm::formatv( + "{0} = {1}", annotation.variable_name, + annotation.location_description == VariableAnnotator::kUndefLocation + ? llvm::formatv("<{0}>", VariableAnnotator::kUndefLocation) + .str() + : annotation.location_description) + .str(); + events.push_back(display_string); + } + + return events; +} + +std::vector +VariableAnnotator::AnnotateStructured(Instruction &inst, Target &target, + lldb::ModuleSP module_sp) { + std::vector annotations; - // If we lost module context, everything becomes . + // If we lost module context, mark all live variables as undefined. if (!module_sp) { - for (const auto &KV : Live_) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); - Live_.clear(); - return events; + for (const auto &KV : m_live_vars) { + auto annotation_entity = KV.second; + annotation_entity.is_live = false; + annotation_entity.location_description = kUndefLocation; + annotations.push_back(annotation_entity); + } + m_live_vars.clear(); + return annotations; } // Resolve function/block at this *file* address. @@ -319,10 +347,14 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, if (!module_sp->ResolveSymbolContextForAddress(iaddr, mask, sc) || !sc.function) { // No function context: everything dies here. - for (const auto &KV : Live_) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); - Live_.clear(); - return events; + for (const auto &KV : m_live_vars) { + auto annotation_entity = KV.second; + annotation_entity.is_live = false; + annotation_entity.location_description = kUndefLocation; + annotations.push_back(annotation_entity); + } + m_live_vars.clear(); + return annotations; } // Collect in-scope variables for this instruction into Current. @@ -349,7 +381,7 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, // Prefer "register-only" output when we have an ABI. opts.PrintRegisterOnly = static_cast(abi_sp); - llvm::DenseMap Current; + llvm::DenseMap current_vars; for (size_t i = 0, e = var_list.GetSize(); i != e; ++i) { lldb::VariableSP v = var_list.GetVariableAtIndex(i); @@ -376,35 +408,60 @@ VariableAnnotator::annotate(Instruction &inst, Target &target, if (loc.empty()) continue; - Current.try_emplace(v->GetID(), - VarState{std::string(name), std::string(loc)}); + std::optional decl_file; + std::optional decl_line; + std::optional type_name; + + const Declaration &decl = v->GetDeclaration(); + if (decl.GetFile()) { + decl_file = decl.GetFile().GetFilename().AsCString(); + if (decl.GetLine() > 0) + decl_line = decl.GetLine(); + } + + if (Type *type = v->GetType()) + if (const char *type_str = type->GetName().AsCString()) + type_name = type_str; + + current_vars.try_emplace(v->GetID(), + VariableAnnotation{std::string(name), std::string(loc), + true, entry.expr->GetRegisterKind(), + entry.file_range, decl_file, + decl_line, type_name}); } - // Diff Live_ → Current. + // Diff m_live_vars → current_vars. - // 1) Starts/changes: iterate Current and compare with Live_. - for (const auto &KV : Current) { - auto it = Live_.find(KV.first); - if (it == Live_.end()) { + // 1) Starts/changes: iterate current_vars and compare with m_live_vars. + for (const auto &KV : current_vars) { + auto it = m_live_vars.find(KV.first); + if (it == m_live_vars.end()) { // Newly live. - events.emplace_back( - llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str()); - } else if (it->second.last_loc != KV.second.last_loc) { + auto annotation_entity = KV.second; + annotation_entity.is_live = true; + annotations.push_back(annotation_entity); + } else if (it->second.location_description != + KV.second.location_description) { // Location changed. - events.emplace_back( - llvm::formatv("{0} = {1}", KV.second.name, KV.second.last_loc).str()); + auto annotation_entity = KV.second; + annotation_entity.is_live = true; + annotations.push_back(annotation_entity); } } - // 2) Ends: anything that was live but is not in Current becomes . - for (const auto &KV : Live_) { - if (!Current.count(KV.first)) - events.emplace_back(llvm::formatv("{0} = ", KV.second.name).str()); - } + // 2) Ends: anything that was live but is not in current_vars becomes + // . + for (const auto &KV : m_live_vars) + if (!current_vars.count(KV.first)) { + auto annotation_entity = KV.second; + annotation_entity.is_live = false; + annotation_entity.location_description = kUndefLocation; + annotations.push_back(annotation_entity); + } // Commit new state. - Live_ = std::move(Current); - return events; + m_live_vars = std::move(current_vars); + return annotations; } void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, @@ -676,7 +733,7 @@ void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, address_text_size); if ((options & eOptionVariableAnnotations) && target_sp) { - auto annotations = annot.annotate(*inst, *target_sp, module_sp); + auto annotations = annot.Annotate(*inst, *target_sp, module_sp); if (!annotations.empty()) { const size_t annotation_column = 100; inst_line.FillLastLineToColumn(annotation_column, ' '); diff --git a/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py b/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py index f107efbddddeb..4fe49a2b44cbd 100644 --- a/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py +++ b/lldb/test/API/functionalities/disassembler-variables/TestVariableAnnotationsDisassembler.py @@ -116,3 +116,177 @@ def test_seed_reg_const_undef(self): print(out) self.assertRegex(out, r"\b(i|argc)\s*=\s*(DW_OP_reg\d+\b|R[A-Z0-9]+)") self.assertNotIn("", out) + + @no_debug_info_test + @skipIf(archs=no_match(["x86_64"])) + def test_structured_annotations_api(self): + """Test GetVariableAnnotations() API returns structured data""" + obj = self._build_obj("d_original_example.o") + target = self._create_target(obj) + + main_symbols = target.FindSymbols("main") + self.assertTrue(main_symbols.IsValid() and main_symbols.GetSize() > 0, + "Could not find 'main' symbol") + + main_symbol = main_symbols.GetContextAtIndex(0).GetSymbol() + start_addr = main_symbol.GetStartAddress() + self.assertTrue(start_addr.IsValid(), "Invalid start address for main") + + instructions = target.ReadInstructions(start_addr, 16) + self.assertGreater(instructions.GetSize(), 0, "No instructions read") + + if self.TraceOn(): + print(f"\nTesting GetVariableAnnotations() API on {instructions.GetSize()} instructions") + + # Track what we find. + found_annotations = False + found_variables = set() + + # Track variable locations to detect changes (for selective printing). + prev_locations = {} + + # Test each instruction + for i in range(instructions.GetSize()): + inst = instructions.GetInstructionAtIndex(i) + self.assertTrue(inst.IsValid(), f"Invalid instruction at index {i}") + + annotations = inst.GetVariableAnnotations(target) + + self.assertIsInstance(annotations, lldb.SBStructuredData, + "GetVariableAnnotations should return SBStructuredData") + + if annotations.GetSize() > 0: + found_annotations = True + + # Track current locations and detect changes. + current_locations = {} + should_print = False + + # Validate each annotation. + for j in range(annotations.GetSize()): + ann = annotations.GetItemAtIndex(j) + self.assertTrue(ann.IsValid(), + f"Invalid annotation at index {j}") + + self.assertEqual(ann.GetType(), lldb.eStructuredDataTypeDictionary, + "Each annotation should be a dictionary") + + var_name_obj = ann.GetValueForKey("variable_name") + self.assertTrue(var_name_obj.IsValid(), + "Missing 'variable_name' field") + + location_obj = ann.GetValueForKey("location_description") + self.assertTrue(location_obj.IsValid(), + "Missing 'location_description' field") + + is_live_obj = ann.GetValueForKey("is_live") + self.assertTrue(is_live_obj.IsValid(), + "Missing 'is_live' field") + + start_addr_obj = ann.GetValueForKey("start_address") + self.assertTrue(start_addr_obj.IsValid(), + "Missing 'start_address' field") + + end_addr_obj = ann.GetValueForKey("end_address") + self.assertTrue(end_addr_obj.IsValid(), + "Missing 'end_address' field") + + register_kind_obj = ann.GetValueForKey("register_kind") + self.assertTrue(register_kind_obj.IsValid(), + "Missing 'register_kind' field") + + # Extract and validate values. + var_name = var_name_obj.GetStringValue(1024) + location = location_obj.GetStringValue(1024) + is_live = is_live_obj.GetBooleanValue() + start_addr = start_addr_obj.GetUnsignedIntegerValue() + end_addr = end_addr_obj.GetUnsignedIntegerValue() + register_kind = register_kind_obj.GetUnsignedIntegerValue() + + # Validate types and values. + self.assertIsInstance(var_name, str, "variable_name should be string") + self.assertGreater(len(var_name), 0, "variable_name should not be empty") + + self.assertIsInstance(location, str, "location_description should be string") + self.assertGreater(len(location), 0, "location_description should not be empty") + + self.assertIsInstance(is_live, bool, "is_live should be boolean") + + self.assertIsInstance(start_addr, int, "start_address should be integer") + self.assertIsInstance(end_addr, int, "end_address should be integer") + self.assertGreater(end_addr, start_addr, + "end_address should be greater than start_address") + + self.assertIsInstance(register_kind, int, "register_kind should be integer") + + # Check for expected variables in this function. + self.assertIn(var_name, ["argc", "argv", "i"], + f"Unexpected variable name: {var_name}") + + found_variables.add(var_name) + + # Track current location. + current_locations[var_name] = location + + # Detect if this is a new variable or location changed. + if var_name not in prev_locations or prev_locations[var_name] != location: + should_print = True + + # Check optional fields (may or may not be present). + decl_file_obj = ann.GetValueForKey("decl_file") + if decl_file_obj.IsValid(): + decl_file = decl_file_obj.GetStringValue(1024) + self.assertIsInstance(decl_file, str) + self.assertIn("d_original_example.c", decl_file, + f"Expected source file d_original_example.c in {decl_file}") + + decl_line_obj = ann.GetValueForKey("decl_line") + if decl_line_obj.IsValid(): + decl_line = decl_line_obj.GetUnsignedIntegerValue() + self.assertIsInstance(decl_line, int) + + # Validate declaration line matches the source code (according to d_original_example.c). + if var_name == "argc": + self.assertEqual(decl_line, 3, "argc should be declared on line 3") + elif var_name == "argv": + self.assertEqual(decl_line, 3, "argv should be declared on line 3") + elif var_name == "i": + self.assertEqual(decl_line, 4, "i should be declared on line 4") + + type_name_obj = ann.GetValueForKey("type_name") + if type_name_obj.IsValid(): + type_name = type_name_obj.GetStringValue(1024) + self.assertIsInstance(type_name, str) + + # Validate declaration line matches the source code (according to d_original_example.c). + if var_name == "argc": + self.assertEqual(type_name, "int", "argc should be type 'int'") + elif var_name == "argv": + self.assertEqual(type_name, "char **", "argv should be type 'char **'") + elif var_name == "i": + self.assertEqual(type_name, "int", "i should be type 'int'") + + if self.TraceOn(): + # Only print if something happened (location changed or variable appeared/disappeared). + if should_print or len(current_locations) != len(prev_locations): + print(f"\nInstruction {i} at {inst.GetAddress()}: {annotations.GetSize()} annotations") + for var_name, location in current_locations.items(): + change_marker = " <- CHANGED" if var_name in prev_locations and prev_locations[var_name] != location else "" + new_marker = " <- NEW" if var_name not in prev_locations else "" + print(f" {var_name} = {location}{change_marker}{new_marker}") + # Check for disappeared variables. + for var_name in prev_locations: + if var_name not in current_locations: + print(f" {var_name} <- GONE") + + # Update tracking. + prev_locations = current_locations.copy() + + self.assertTrue(found_annotations, + "Should find at least one instruction with variable annotations") + + self.assertGreater(len(found_variables), 0, + "Should find at least one variable") + + if self.TraceOn(): + print(f"\nTest complete. Found variables: {found_variables}")