Skip to content

Commit 2024d6d

Browse files
donaldhjakobandersen
authored andcommitted
C domain, use a dict for fast child symbol lookup
Sphinx 3.0.0 onwards have a significant performance regression from 2.4.4 for the C domain. The cProfile output shows that this is due to _find_named_symbols using a linear search. Here's the profile information for the Linux kernel 'make htmldocs': ncalls tottime percall cumtime percall filename:lineno(function) 770364892 165.102 0.000 165.102 0.000 sphinx/domains/c.py:153(__eq__) 104124 163.968 0.002 544.788 0.005 sphinx/domains/c.py:1731(_find_named_symbols) 543888397 123.767 0.000 176.685 0.000 sphinx/domains/c.py:1679(children_recurse_anon) 4292 74.081 0.017 74.081 0.017 {method 'poll' of 'select.poll' objects} 631233096 69.389 0.000 246.017 0.000 sphinx/domains/c.py:1746(candidates) 121406721/3359598 65.689 0.000 76.762 0.000 docutils/nodes.py:202(_fast_findall) 3477076 64.387 0.000 65.758 0.000 sphinx/util/nodes.py:633(_copy_except__document) 544032973 52.950 0.000 52.950 0.000 sphinx/domains/c.py:156(is_anon) 79012597/3430 36.395 0.000 36.395 0.011 sphinx/domains/c.py:1656(clear_doc) 286882978 31.271 0.000 31.279 0.000 {built-in method builtins.isinstance} Function was called by... ncalls tottime cumtime sphinx/domains/c.py:153(__eq__) <- 631153346 134.803 134.803 sphinx/domains/c.py:1731(_find_named_symbols) 154878 0.041 0.041 sphinx/domains/c.py:2085(find_identifier) 139056533 30.259 30.259 sphinx/domains/c.py:2116(direct_lookup) 135 0.000 0.000 sphinx/util/cfamily.py:89(__eq__) Replace Symbol._children with dicts, one keyed by ident and one keyed by docname. This lets us remove _find_named_symbols and replace these usage patterns with fast child lookup. Also use the _anonChildren list to speed up searching through anonymous children. Note that dict is guaranteed to maintain insertion order since Python 3.7 so we retain iteration. Whenever iteration is required, we use _childrenByName.values(). Before – with Sphinx 7.2.6: % time make htmldocs ... real 9m0.533s user 15m38.397s sys 1m0.907s After - with Sphinx 7.3.0+: % time make htmldocs ... real 4m27.063s user 10m54.985s sys 0m57.702s Signed-off-by: Donald Hunter <[email protected]>
1 parent e352a67 commit 2024d6d

File tree

1 file changed

+81
-121
lines changed

1 file changed

+81
-121
lines changed

sphinx/domains/c/_symbol.py

Lines changed: 81 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __str__(self) -> str:
3232

3333

3434
class SymbolLookupResult:
35-
def __init__(self, symbols: Iterator[Symbol], parentSymbol: Symbol,
35+
def __init__(self, symbols: list[Symbol], parentSymbol: Symbol,
3636
ident: ASTIdentifier) -> None:
3737
self.symbols = symbols
3838
self.parentSymbol = parentSymbol
@@ -103,11 +103,18 @@ def __init__(
103103
self._assert_invariants()
104104

105105
# Remember to modify Symbol.remove if modifications to the parent change.
106-
self._children: list[Symbol] = []
106+
self._childrenByName: dict[str, Symbol] = {}
107+
self._childrenByDocname: dict[str, list[Symbol]] = {}
107108
self._anonChildren: list[Symbol] = []
108109
# note: _children includes _anonChildren
109110
if self.parent:
110-
self.parent._children.append(self)
111+
self.parent._childrenByName[str(self.ident)] = self
112+
if self.docname in self.parent._childrenByDocname:
113+
self.parent._childrenByDocname[self.docname].append(self)
114+
else:
115+
self.parent._childrenByDocname[self.docname] = [self]
116+
if ident.is_anon():
117+
self.parent._anonChildren.append(self)
111118
if self.declaration:
112119
self.declaration.symbol = self
113120

@@ -156,36 +163,49 @@ def _add_function_params(self) -> None:
156163
def remove(self) -> None:
157164
if self.parent is None:
158165
return
159-
assert self in self.parent._children
160-
self.parent._children.remove(self)
166+
name = str(self.ident)
167+
assert name in self.parent._childrenByName
168+
del self.parent._childrenByName[name]
169+
if self.docname in self.parent._childrenByDocname:
170+
del self.parent._childrenByDocname[self.docname]
171+
if self.ident.is_anon():
172+
self.parent._anonChildren.remove(self)
161173
self.parent = None
162174

163175
def clear_doc(self, docname: str) -> None:
164-
for sChild in self._children:
165-
sChild.clear_doc(docname)
166-
if sChild.declaration and sChild.docname == docname:
167-
sChild.declaration = None
168-
sChild.docname = None
169-
sChild.line = None
170-
if sChild.siblingAbove is not None:
171-
sChild.siblingAbove.siblingBelow = sChild.siblingBelow
172-
if sChild.siblingBelow is not None:
173-
sChild.siblingBelow.siblingAbove = sChild.siblingAbove
174-
sChild.siblingAbove = None
175-
sChild.siblingBelow = None
176+
if docname not in self._childrenByDocname:
177+
for child in self._childrenByName.values():
178+
child.clear_doc(docname)
179+
return
180+
for sChild in self._childrenByDocname[docname]:
181+
sChild.declaration = None
182+
sChild.docname = None
183+
sChild.line = None
184+
if sChild.siblingAbove is not None:
185+
sChild.siblingAbove.siblingBelow = sChild.siblingBelow
186+
if sChild.siblingBelow is not None:
187+
sChild.siblingBelow.siblingAbove = sChild.siblingAbove
188+
sChild.siblingAbove = None
189+
sChild.siblingBelow = None
190+
name = str(sChild.ident)
191+
if name in self._childrenByName:
192+
del self._childrenByName[name]
193+
if sChild.ident.is_anon():
194+
self._anonChildren.remove(sChild)
195+
del self._childrenByDocname[docname]
176196

177197
def get_all_symbols(self) -> Iterator[Symbol]:
178198
yield self
179-
for sChild in self._children:
199+
for sChild in self._childrenByName.values():
180200
yield from sChild.get_all_symbols()
181201

182202
@property
183203
def children(self) -> Iterator[Symbol]:
184-
yield from self._children
204+
yield from self._childrenByName.values()
185205

186206
@property
187207
def children_recurse_anon(self) -> Iterator[Symbol]:
188-
for c in self._children:
208+
for c in self._childrenByName.values():
189209
yield c
190210
if not c.ident.is_anon():
191211
continue
@@ -221,68 +241,6 @@ def get_full_nested_name(self) -> ASTNestedName:
221241
names = [s.ident for s in symbols]
222242
return ASTNestedName(names, rooted=False)
223243

224-
def _find_first_named_symbol(self, ident: ASTIdentifier,
225-
matchSelf: bool, recurseInAnon: bool) -> Symbol | None:
226-
# TODO: further simplification from C++ to C
227-
if Symbol.debug_lookup:
228-
Symbol.debug_print("_find_first_named_symbol ->")
229-
res = self._find_named_symbols(ident, matchSelf, recurseInAnon,
230-
searchInSiblings=False)
231-
try:
232-
return next(res)
233-
except StopIteration:
234-
return None
235-
236-
def _find_named_symbols(self, ident: ASTIdentifier,
237-
matchSelf: bool, recurseInAnon: bool,
238-
searchInSiblings: bool) -> Iterator[Symbol]:
239-
# TODO: further simplification from C++ to C
240-
if Symbol.debug_lookup:
241-
Symbol.debug_indent += 1
242-
Symbol.debug_print("_find_named_symbols:")
243-
Symbol.debug_indent += 1
244-
Symbol.debug_print("self:")
245-
logger.debug(self.to_string(Symbol.debug_indent + 1, addEndNewline=False))
246-
Symbol.debug_print("ident: ", ident)
247-
Symbol.debug_print("matchSelf: ", matchSelf)
248-
Symbol.debug_print("recurseInAnon: ", recurseInAnon)
249-
Symbol.debug_print("searchInSiblings: ", searchInSiblings)
250-
251-
def candidates() -> Iterator[Symbol]:
252-
s = self
253-
if Symbol.debug_lookup:
254-
Symbol.debug_print("searching in self:")
255-
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
256-
while True:
257-
if matchSelf:
258-
yield s
259-
if recurseInAnon:
260-
yield from s.children_recurse_anon
261-
else:
262-
yield from s._children
263-
264-
if s.siblingAbove is None:
265-
break
266-
s = s.siblingAbove
267-
if Symbol.debug_lookup:
268-
Symbol.debug_print("searching in sibling:")
269-
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
270-
271-
for s in candidates():
272-
if Symbol.debug_lookup:
273-
Symbol.debug_print("candidate:")
274-
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
275-
if s.ident == ident:
276-
if Symbol.debug_lookup:
277-
Symbol.debug_indent += 1
278-
Symbol.debug_print("matches")
279-
Symbol.debug_indent -= 3
280-
yield s
281-
if Symbol.debug_lookup:
282-
Symbol.debug_indent += 2
283-
if Symbol.debug_lookup:
284-
Symbol.debug_indent -= 2
285-
286244
def _symbol_lookup(
287245
self,
288246
nestedName: ASTNestedName,
@@ -311,16 +269,14 @@ def _symbol_lookup(
311269
# find the right starting point for lookup
312270
parentSymbol = self
313271
if nestedName.rooted:
314-
while parentSymbol.parent:
272+
while parentSymbol.parent is not None:
315273
parentSymbol = parentSymbol.parent
274+
316275
if ancestorLookupType is not None:
317276
# walk up until we find the first identifier
318277
firstName = names[0]
319278
while parentSymbol.parent:
320-
if parentSymbol.find_identifier(firstName,
321-
matchSelf=matchSelf,
322-
recurseInAnon=recurseInAnon,
323-
searchInSiblings=searchInSiblings):
279+
if str(firstName) in parentSymbol._childrenByName:
324280
break
325281
parentSymbol = parentSymbol.parent
326282

@@ -330,18 +286,13 @@ def _symbol_lookup(
330286

331287
# and now the actual lookup
332288
for ident in names[:-1]:
333-
symbol = parentSymbol._find_first_named_symbol(
334-
ident, matchSelf=matchSelf, recurseInAnon=recurseInAnon)
335-
if symbol is None:
289+
name = str(ident)
290+
if name in parentSymbol._childrenByName:
291+
symbol = parentSymbol._childrenByName[name]
292+
else:
336293
symbol = onMissingQualifiedSymbol(parentSymbol, ident)
337294
if symbol is None:
338-
if Symbol.debug_lookup:
339-
Symbol.debug_indent -= 2
340295
return None
341-
# We have now matched part of a nested name, and need to match more
342-
# so even if we should matchSelf before, we definitely shouldn't
343-
# even more. (see also issue #2666)
344-
matchSelf = False
345296
parentSymbol = symbol
346297

347298
if Symbol.debug_lookup:
@@ -350,15 +301,20 @@ def _symbol_lookup(
350301

351302
# handle the last name
352303
ident = names[-1]
353-
354-
symbols = parentSymbol._find_named_symbols(
355-
ident, matchSelf=matchSelf,
356-
recurseInAnon=recurseInAnon,
357-
searchInSiblings=searchInSiblings)
358-
if Symbol.debug_lookup:
359-
symbols = list(symbols) # type: ignore[assignment]
360-
Symbol.debug_indent -= 2
361-
return SymbolLookupResult(symbols, parentSymbol, ident)
304+
name = str(ident)
305+
symbol = None
306+
if name in parentSymbol._childrenByName:
307+
symbol = parentSymbol._childrenByName[name]
308+
309+
if not symbol and recurseInAnon:
310+
for child in parentSymbol._anonChildren:
311+
if name in child._childrenByName:
312+
symbol = child._childrenByName[name]
313+
break
314+
if symbol:
315+
return SymbolLookupResult([symbol], parentSymbol, ident)
316+
else:
317+
return SymbolLookupResult([], parentSymbol, ident)
362318

363319
def _add_symbols(
364320
self,
@@ -532,17 +488,17 @@ def merge_with(self, other: Symbol, docnames: list[str],
532488
if Symbol.debug_lookup:
533489
Symbol.debug_indent += 1
534490
Symbol.debug_print("merge_with:")
491+
535492
assert other is not None
536-
for otherChild in other._children:
537-
ourChild = self._find_first_named_symbol(
538-
ident=otherChild.ident, matchSelf=False,
539-
recurseInAnon=False)
540-
if ourChild is None:
493+
for otherChild in other._childrenByName.values():
494+
otherName = str(otherChild.ident)
495+
if otherName not in self._childrenByName:
541496
# TODO: hmm, should we prune by docnames?
542-
self._children.append(otherChild)
497+
self._childrenByName[otherName] = otherChild
543498
otherChild.parent = self
544499
otherChild._assert_invariants()
545500
continue
501+
ourChild = self._childrenByName[otherName]
546502
if otherChild.declaration and otherChild.docname in docnames:
547503
if not ourChild.declaration:
548504
ourChild._fill_empty(otherChild.declaration,
@@ -560,6 +516,7 @@ def merge_with(self, other: Symbol, docnames: list[str],
560516
# just ignore it, right?
561517
pass
562518
ourChild.merge_with(otherChild, docnames, env)
519+
563520
if Symbol.debug_lookup:
564521
Symbol.debug_indent -= 1
565522

@@ -608,10 +565,13 @@ def find_identifier(self, ident: ASTIdentifier,
608565
Symbol.debug_indent -= 2
609566
if matchSelf and current.ident == ident:
610567
return current
611-
children = current.children_recurse_anon if recurseInAnon else current._children
612-
for s in children:
613-
if s.ident == ident:
614-
return s
568+
name = str(ident)
569+
if name in current._childrenByName:
570+
return current._childrenByName[name]
571+
if recurseInAnon:
572+
for child in current._anonChildren:
573+
if name in child._childrenByName:
574+
return child._childrenByName[name]
615575
if not searchInSiblings:
616576
break
617577
current = current.siblingAbove
@@ -623,12 +583,11 @@ def direct_lookup(self, key: LookupKey) -> Symbol | None:
623583
Symbol.debug_print("direct_lookup:")
624584
Symbol.debug_indent += 1
625585
s = self
626-
for name, id_ in key.data:
586+
for ident, id_ in key.data:
627587
res = None
628-
for cand in s._children:
629-
if cand.ident == name:
630-
res = cand
631-
break
588+
name = str(ident)
589+
if name in s._childrenByName:
590+
res = s._childrenByName[name]
632591
s = res
633592
if Symbol.debug_lookup:
634593
Symbol.debug_print("name: ", name)
@@ -697,4 +656,5 @@ def to_string(self, indent: int, *, addEndNewline: bool = True) -> str:
697656
return ''.join(res)
698657

699658
def dump(self, indent: int) -> str:
700-
return ''.join([self.to_string(indent), *(c.dump(indent + 1) for c in self._children)])
659+
return ''.join([self.to_string(indent),
660+
*(c.dump(indent + 1) for c in self._childrenByName.values())])

0 commit comments

Comments
 (0)