Skip to content

Commit 91fc35b

Browse files
committed
[lldb][CPlusPlus] Remove TrySimplifiedParse when hand-parsing C++ function names
When doing function lookup in an LLDB module by name, we may need to hand-parse C++ method names to determine properties of the function name such as its "basename". LLDB currently has two ways of parsing methods: 1. Using `TrySimplifiedParse`, which was historically the go-to parser 2. Using the `CPlusPlusNameParser`, which is the more recent parser and has more knowledge of C++ syntax When `CPlusPlusNameParser` was introduced (https://reviews.llvm.org/D31451), `TrySimplifiedParse` was kept around as the first parser to keep performance in check. Though the performance difference didn't seem very convincing from the numbers provided. 11s vs 10s when setting a breakpoint in a project with 500k functions. The main difference being that we're invoking the clang::Lexer in the new parser. It would be nice to just get rid of it entirely. There's an edge-case for which I added a test-case where the `TrySimplifiedParse` code-path succeeds but incorrectly deduces the basename of `operator()`. Happy to do benchmarking to see what the performance impact of this is these days.
1 parent 9651902 commit 91fc35b

File tree

2 files changed

+11
-83
lines changed

2 files changed

+11
-83
lines changed

lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp

Lines changed: 9 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -173,34 +173,6 @@ static bool ReverseFindMatchingChars(const llvm::StringRef &s,
173173
return false;
174174
}
175175

176-
static bool IsTrivialBasename(const llvm::StringRef &basename) {
177-
// Check that the basename matches with the following regular expression
178-
// "^~?([A-Za-z_][A-Za-z_0-9]*)$" We are using a hand written implementation
179-
// because it is significantly more efficient then using the general purpose
180-
// regular expression library.
181-
size_t idx = 0;
182-
if (basename.starts_with('~'))
183-
idx = 1;
184-
185-
if (basename.size() <= idx)
186-
return false; // Empty string or "~"
187-
188-
if (!std::isalpha(basename[idx]) && basename[idx] != '_')
189-
return false; // First character (after removing the possible '~'') isn't in
190-
// [A-Za-z_]
191-
192-
// Read all characters matching [A-Za-z_0-9]
193-
++idx;
194-
while (idx < basename.size()) {
195-
if (!std::isalnum(basename[idx]) && basename[idx] != '_')
196-
break;
197-
++idx;
198-
}
199-
200-
// We processed all characters. It is a vaild basename.
201-
return idx == basename.size();
202-
}
203-
204176
/// Writes out the function name in 'full_name' to 'out_stream'
205177
/// but replaces each argument type with the variable name
206178
/// and the corresponding pretty-printed value
@@ -235,66 +207,20 @@ static bool PrettyPrintFunctionNameWithArgs(Stream &out_stream,
235207
return true;
236208
}
237209

238-
bool CPlusPlusLanguage::CxxMethodName::TrySimplifiedParse() {
239-
// This method tries to parse simple method definitions which are presumably
240-
// most comman in user programs. Definitions that can be parsed by this
241-
// function don't have return types and templates in the name.
242-
// A::B::C::fun(std::vector<T> &) const
243-
size_t arg_start, arg_end;
244-
llvm::StringRef full(m_full.GetCString());
245-
llvm::StringRef parens("()", 2);
246-
if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
247-
m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
248-
if (arg_end + 1 < full.size())
249-
m_qualifiers = full.substr(arg_end + 1).ltrim();
250-
251-
if (arg_start == 0)
252-
return false;
253-
size_t basename_end = arg_start;
254-
size_t context_start = 0;
255-
size_t context_end = full.rfind(':', basename_end);
256-
if (context_end == llvm::StringRef::npos)
257-
m_basename = full.substr(0, basename_end);
258-
else {
259-
if (context_start < context_end)
260-
m_context = full.substr(context_start, context_end - 1 - context_start);
261-
const size_t basename_begin = context_end + 1;
262-
m_basename = full.substr(basename_begin, basename_end - basename_begin);
263-
}
264-
265-
if (IsTrivialBasename(m_basename)) {
266-
return true;
267-
} else {
268-
// The C++ basename doesn't match our regular expressions so this can't
269-
// be a valid C++ method, clear everything out and indicate an error
270-
m_context = llvm::StringRef();
271-
m_basename = llvm::StringRef();
272-
m_arguments = llvm::StringRef();
273-
m_qualifiers = llvm::StringRef();
274-
m_return_type = llvm::StringRef();
275-
return false;
276-
}
277-
}
278-
return false;
279-
}
280-
281210
void CPlusPlusLanguage::CxxMethodName::Parse() {
282211
if (!m_parsed && m_full) {
283-
if (TrySimplifiedParse()) {
212+
CPlusPlusNameParser parser(m_full.GetStringRef());
213+
if (auto function = parser.ParseAsFunctionDefinition()) {
214+
m_basename = function->name.basename;
215+
m_context = function->name.context;
216+
m_arguments = function->arguments;
217+
m_qualifiers = function->qualifiers;
218+
m_return_type = function->return_type;
284219
m_parse_error = false;
285220
} else {
286-
CPlusPlusNameParser parser(m_full.GetStringRef());
287-
if (auto function = parser.ParseAsFunctionDefinition()) {
288-
m_basename = function->name.basename;
289-
m_context = function->name.context;
290-
m_arguments = function->arguments;
291-
m_qualifiers = function->qualifiers;
292-
m_return_type = function->return_type;
293-
m_parse_error = false;
294-
} else {
295-
m_parse_error = true;
296-
}
221+
m_parse_error = true;
297222
}
223+
298224
if (m_context.empty()) {
299225
m_scope_qualified = std::string(m_basename);
300226
} else {

lldb/test/API/lang/cpp/operators/main.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ int main(int argc, char **argv) {
174174
//% self.expect("expr (new struct C[1]); side_effect", endstr=" = 4\n")
175175
//% self.expect("expr delete c2; side_effect", endstr=" = 1\n")
176176
//% self.expect("expr delete[] c3; side_effect", endstr=" = 2\n")
177+
//% self.expect("image lookup -n operator()", substrs=["C::operator()(int)"])
178+
//% self.expect("image lookup -n C::operator()", substrs=["C::operator()(int)"])
177179
delete c2;
178180
delete[] c3;
179181
return 0;

0 commit comments

Comments
 (0)