Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 171 additions & 30 deletions lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <mutex>
#include <set>

#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Demangle/ItaniumDemangle.h"

Expand Down Expand Up @@ -538,15 +539,173 @@ void CPlusPlusLanguage::CxxMethodName::Parse() {
}
}

llvm::StringRef
CPlusPlusLanguage::CxxMethodName::GetBasenameNoTemplateParameters() {
llvm::StringRef basename = GetBasename();
size_t arg_start, arg_end;
llvm::StringRef parens("<>", 2);
if (ReverseFindMatchingChars(basename, parens, arg_start, arg_end))
return basename.substr(0, arg_start);
bool CPlusPlusLanguage::CxxMethodName::NameMatches(llvm::StringRef full_name,
llvm::StringRef pattern,
MatchOptions options) {
constexpr llvm::StringRef abi_prefix = "[abi:";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we simplify the changes to this code by splitting the "full_name" like:

// Remove any encoded ABI tags from the C++ name before looking for matches
full_name = full_name.split("[abi::").first;

Then nothing else in this function needs to change right? Or are we trying to do something with the ABI name later?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the abi_tag may be in the template i.e Module<SomeType[abi:TAG]>::find and will fail.
and we also compare the tag names in case in the future we add support for breakpoint set --name foo[abi:TAG]

Copy link
Member

@Michael137 Michael137 Dec 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remind me why we didnt modify the CPlusPlusNameParser? Not saying we should, but it does parse/skip ABI tags to some extent already

Alternatively, could we search and replace all ABI tags in the name with a "simple" regex? (ignoring the question of whether we want to support setting breakpoint by specific tags, which i have some doubts about). We might be able to get away with a regex because it isnt ever valid c++ syntax. If it does clash, we could change the demangler to output something even less likely to clash with real c++. Just some ideas to avoid hand-parsing these tags.

constexpr char abi_end = ']';
size_t f_idx = 0;
size_t p_idx = 0;

while (f_idx < full_name.size()) {
const char in_char = full_name[f_idx];
// Input may have extra abi_tag / template so we still loop.
const bool match_empty = p_idx >= pattern.size();
const char ma_char = match_empty ? '\0' : pattern[p_idx];

// skip abi_tags.
if (options.skip_tags && in_char == '[' &&
full_name.substr(f_idx).starts_with(abi_prefix)) {

const size_t tag_end = full_name.find(abi_end, f_idx);
if (tag_end != llvm::StringRef::npos) {
const size_t in_tag_len = tag_end - f_idx + 1;

if (!match_empty && pattern.substr(p_idx).starts_with(abi_prefix)) {
const size_t match_tag_end = pattern.find(abi_end, p_idx);
if (match_tag_end != llvm::StringRef::npos) {
const size_t ma_tag_len = match_tag_end - p_idx + 1;

// Match may only have only one of the input's abi_tags.
// we only skip if the abi_tag matches.
if ((in_tag_len == ma_tag_len) &&
full_name.substr(f_idx, in_tag_len) ==
pattern.substr(p_idx, ma_tag_len)) {
p_idx += ma_tag_len;
}
}
}

f_idx += in_tag_len;
continue;
}
}

// Skip template_tags.
if (options.skip_templates && in_char == '<' && ma_char != '<') {
size_t depth = 1;
size_t tmp_idx = f_idx + 1;
bool found_end = false;
for (; tmp_idx < full_name.size(); ++tmp_idx) {
const char cur = full_name[tmp_idx];
if (cur == '<')
depth++;
else if (cur == '>') {
depth--;

if (depth == 0) {
found_end = true;
break;
}
}
}

if (found_end) {
f_idx = tmp_idx + 1;
continue;
}
}

// Input contains characters that are not in match.
if (match_empty || in_char != ma_char)
return false;

f_idx++;
p_idx++;
}

// Ensure we fully consumed the match string.
return p_idx == pattern.size();
}

/// Extracts the next context component from a C++ scope resolution string.
///
/// This function parses a C++ qualified name (e.g., "ns::Class<T>::method")
/// from right to left, extracting one scope context at a time. It handles
/// nested templates, abi_tags and array brackets while searching
/// for scope resolution operators (::).
/// \param context The full context string to parse (e.g.,
/// "std::vector<int>::size")
/// \param end_pos [in,out] The position to start searching backwards from. On
/// return, contains the position of the previous scope
/// separator (::), or llvm::StringRef::npos if no more
/// components exist.
///
/// Example:
/// llvm::StringRef scope = "ns::inner::Class<int>";
/// size_t pos = scope.size();
///
/// ctx1 = NextContext(context, pos); // returns "Class<int>", pos = 9
/// ctx2 = NextContext(context, pos); // returns "inner", pos = 2
/// ctx3 = NextContext(context, pos); // returns "ns", pos = StringRef::npos
static llvm::StringRef NextContext(llvm::StringRef context, size_t &end_pos) {
if (end_pos == llvm::StringRef::npos)
return {};

const int start = 0;
const int end = static_cast<int>(end_pos) - 1;
int depth = 0;

if (end >= static_cast<int>(context.size())) {
end_pos = llvm::StringRef::npos;
return {};
}

for (int idx = end; idx >= start; --idx) {
const char val = context[idx];

if (depth == 0 && val == ':' && (idx != 0) && (idx - 1 >= 0) &&
context[idx - 1] == ':') {
end_pos = idx - 1;
return context.substr(idx + 1, end_pos - idx);
}

return basename;
// In contexts, you cannot have a standlone bracket such
// as `operator<` use only one variable to track depth.
if (val == '<' || val == '(' || val == '[')
depth++;
else if (val == '>' || val == ')' || val == ']')
depth--;
}

end_pos = llvm::StringRef::npos;
return context.substr(start, end_pos - start);
}

bool CPlusPlusLanguage::CxxMethodName::ContainsContext(
llvm::StringRef full_name, llvm::StringRef pattern, MatchOptions options) {
size_t full_pos = full_name.size();
size_t pat_pos = pattern.size();

// We loop as long as there are contexts left in the full_name.
while (full_pos != llvm::StringRef::npos) {
size_t next_full_pos = full_pos;
const llvm::StringRef full_ctx = NextContext(full_name, next_full_pos);

size_t next_pat_pos = pat_pos;
const llvm::StringRef pat_ctx = NextContext(pattern, next_pat_pos);

if (NameMatches(full_ctx, pat_ctx, options)) {
// we matched all characters in part_str.
if (next_pat_pos == llvm::StringRef::npos)
return true;

// context matches: advance both cursors.
full_pos = next_full_pos;
pat_pos = next_pat_pos;
continue;
}

if (next_pat_pos == llvm::StringRef::npos)
return false;

// context does not match. advance the full_name pos (consume the
// current full_name context) and reset the pat_pos to the beginning.
full_pos = next_full_pos;
pat_pos = 0;
}

return false;
}

bool CPlusPlusLanguage::CxxMethodName::ContainsPath(llvm::StringRef path) {
Expand All @@ -564,21 +723,9 @@ bool CPlusPlusLanguage::CxxMethodName::ContainsPath(llvm::StringRef path) {
if (!success)
return m_full.GetStringRef().contains(path);

// Basename may include template arguments.
// E.g.,
// GetBaseName(): func<int>
// identifier : func
//
// ...but we still want to account for identifiers with template parameter
// lists, e.g., when users set breakpoints on template specializations.
//
// E.g.,
// GetBaseName(): func<uint32_t>
// identifier : func<int32_t*>
//
// Try to match the basename with or without template parameters.
if (GetBasename() != identifier &&
GetBasenameNoTemplateParameters() != identifier)
const MatchOptions options{/*skip_templates*/ true, /*skip_tags*/ true};
const llvm::StringRef basename = GetBasename();
if (!NameMatches(basename, identifier, options))
return false;

// Incoming path only had an identifier, so we match.
Expand All @@ -588,13 +735,7 @@ bool CPlusPlusLanguage::CxxMethodName::ContainsPath(llvm::StringRef path) {
if (m_context.empty())
return false;

llvm::StringRef haystack = m_context;
if (!haystack.consume_back(context))
return false;
if (haystack.empty() || !isalnum(haystack.back()))
return true;

return false;
return ContainsContext(m_context, context, options);
}

bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path,
Expand Down
48 changes: 36 additions & 12 deletions lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,43 @@ class CPlusPlusLanguage : public Language {
bool ContainsPath(llvm::StringRef path);

private:
/// Returns the Basename of this method without a template parameter
/// list, if any.
struct MatchOptions {
bool skip_templates;
bool skip_tags;
};

/// Compare method name with the pattern with the option to skip over ABI
/// tags and template parameters in the full_name when they don't appear in
/// pattern.
///
// Examples:
//
// +--------------------------------+---------+
// | MethodName | Returns |
// +--------------------------------+---------+
// | void func() | func |
// | void func<int>() | func |
// | void func<std::vector<int>>() | func |
// +--------------------------------+---------+
llvm::StringRef GetBasenameNoTemplateParameters();
/// \param full_name The complete method name that may contain ABI tags and
/// templates
/// \param pattern The name pattern to match against
/// \param options Configuration for what to skip during matching
/// \return true if the names match (ignoring skipped parts), false
/// otherwise
///
/// Examples:
// | MethodName | Pattern | Option | Returns |
// |----------------------|-------------|-----------------------|---------|
// | vector<int>() | vector | skip_template | true |
// | foo[abi:aTag]<int>() | foo | skip_template_and_tag | true |
// | MyClass::foo() | OClass::foo | | false |
// | bar::foo<int> | foo | no_skip_template | false |
///
static bool NameMatches(llvm::StringRef full_name, llvm::StringRef pattern,
MatchOptions options);

/// Checks if a pattern appears as a suffix of contexts within a full C++
/// name, uses the same \a MatchOption as \a NameMatches.
///
/// \param full_name The fully qualified C++ name to search within
/// \param pattern The pattern to search for (can be partial scope path)
/// \param options Configuration for name matching (passed to NameMatches)
/// \return true if the pattern is found as a suffix context or the whole
/// context, false otherwise
static bool ContainsContext(llvm::StringRef full_name,
llvm::StringRef pattern, MatchOptions options);

protected:
void Parse() override;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CXX_SOURCES := main.cpp

include Makefile.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""
Test breakpoint on function with abi_tags.
"""

import lldb
from typing import List, Set, TypedDict
from lldbsuite.test.decorators import skipIfWindows
from lldbsuite.test.lldbtest import VALID_TARGET, TestBase


class Case(TypedDict, total=True):
name: str
matches: Set[str]


@skipIfWindows # abi_tags is not supported
class TestCPPBreakpointLocationsAbiTag(TestBase):
def verify_breakpoint_names(self, target: lldb.SBTarget, bp_dict: Case):
name = bp_dict["name"]
matches = bp_dict["matches"]
bp: lldb.SBBreakpoint = target.BreakpointCreateByName(name)

for location in bp:
self.assertTrue(location.IsValid(), f"Expected valid location {location}")

expected_matches = set(location.addr.function.name for location in bp)

self.assertSetEqual(expected_matches, matches)

def test_breakpoint_name_with_abi_tag(self):
self.build()
exe = self.getBuildArtifact("a.out")
target: lldb.SBTarget = self.dbg.CreateTarget(exe)
self.assertTrue(target, VALID_TARGET)

test_cases: List[Case] = [
Case(
name="foo",
matches={
"foo[abi:FOO]()",
"StaticStruct[abi:STATIC_STRUCT]::foo[abi:FOO][abi:FOO2]()",
"Struct[abi:STRUCT]::foo[abi:FOO]()",
"ns::NamespaceStruct[abi:NAMESPACE_STRUCT]::foo[abi:FOO]()",
"ns::foo[abi:NAMESPACE_FOO]()",
"TemplateStruct[abi:TEMPLATE_STRUCT]<int>::foo[abi:FOO]()",
"void TemplateStruct[abi:TEMPLATE_STRUCT]<int>::foo[abi:FOO_TEMPLATE]<long>(long)",
},
),
Case(
name="StaticStruct::foo",
matches={"StaticStruct[abi:STATIC_STRUCT]::foo[abi:FOO][abi:FOO2]()"},
),
Case(name="Struct::foo", matches={"Struct[abi:STRUCT]::foo[abi:FOO]()"}),
Case(
name="TemplateStruct::foo",
matches={
"TemplateStruct[abi:TEMPLATE_STRUCT]<int>::foo[abi:FOO]()",
"void TemplateStruct[abi:TEMPLATE_STRUCT]<int>::foo[abi:FOO_TEMPLATE]<long>(long)",
},
),
Case(name="ns::foo", matches={"ns::foo[abi:NAMESPACE_FOO]()"}),
# operators
Case(
name="operator<",
matches={
"Struct[abi:STRUCT]::operator<(int)",
"bool TemplateStruct[abi:TEMPLATE_STRUCT]<int>::operator<[abi:OPERATOR]<int>(int)",
},
),
Case(
name="TemplateStruct::operator<<",
matches={
"bool TemplateStruct[abi:TEMPLATE_STRUCT]<int>::operator<<[abi:operator]<int>(int)"
},
),
Case(
name="operator<<",
matches={
"bool TemplateStruct[abi:TEMPLATE_STRUCT]<int>::operator<<[abi:operator]<int>(int)"
},
),
Case(
name="operator==",
matches={"operator==[abi:OPERATOR](wrap_int const&, wrap_int const&)"},
),
]

for case in test_cases:
self.verify_breakpoint_names(target, case)
Loading
Loading