Skip to content

Commit f9968f9

Browse files
committed
Index unqualified part of the (demangled C++) symbol name
* bdx/index.py: (SymbolNameField.tokenize_value): Find namespace-qualified parts of the symbol and index the unqualified parts. * tests/test_index.py (test_tokenize_symbol): Update test.
1 parent 6de17d6 commit f9968f9

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

bdx/index.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,9 @@ def tokenize_value(value: str) -> set[str]:
309309
# Find lowercase words
310310
lower_case_words = re.findall("[a-z]{2,}", " ".join(letters_only))
311311

312+
# Symbol names after '::' (C++)
313+
unqualified_symbols = re.findall("::(\\w+)", value)
314+
312315
numbers = re.findall("[0-9]+", value)
313316
words_with_numbers = re.findall("[a-zA-Z]+[0-9]+", value)
314317

@@ -320,6 +323,7 @@ def tokenize_value(value: str) -> set[str]:
320323
lower_case_words,
321324
numbers,
322325
words_with_numbers,
326+
unqualified_symbols,
323327
]:
324328
tokens.update(tokenlist)
325329

tests/test_index.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,3 +791,26 @@ def test_tokenize_symbol():
791791
"vv",
792792
]
793793
)
794+
795+
tokens = SymbolNameField.tokenize_value(
796+
"std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >::_Rep::_M_destroy(std::allocator<wchar_t> const&)"
797+
)
798+
assert tokens == set(
799+
[
800+
"Rep",
801+
"_M_destroy",
802+
"_Rep",
803+
"allocator",
804+
"basic",
805+
"basic_string",
806+
"char",
807+
"char_traits",
808+
"const",
809+
"destroy",
810+
"ep",
811+
"std",
812+
"string",
813+
"traits",
814+
"wchar",
815+
]
816+
)

0 commit comments

Comments
 (0)