Skip to content

Commit b3422ac

Browse files
committed
C domain, use a dict for fast child symbol lookup
Sphinx 3.0.0 onwards have a significant performance regression from 2.4.4 for the C domain. The cProfile output shows that this is due to _find_named_symbols using a linear search. Here's the profile information for the Linux kernel 'make htmldocs': ncalls tottime percall cumtime percall filename:lineno(function) 770364892 165.102 0.000 165.102 0.000 sphinx/domains/c.py:153(__eq__) 104124 163.968 0.002 544.788 0.005 sphinx/domains/c.py:1731(_find_named_symbols) 543888397 123.767 0.000 176.685 0.000 sphinx/domains/c.py:1679(children_recurse_anon) 4292 74.081 0.017 74.081 0.017 {method 'poll' of 'select.poll' objects} 631233096 69.389 0.000 246.017 0.000 sphinx/domains/c.py:1746(candidates) 121406721/3359598 65.689 0.000 76.762 0.000 docutils/nodes.py:202(_fast_findall) 3477076 64.387 0.000 65.758 0.000 sphinx/util/nodes.py:633(_copy_except__document) 544032973 52.950 0.000 52.950 0.000 sphinx/domains/c.py:156(is_anon) 79012597/3430 36.395 0.000 36.395 0.011 sphinx/domains/c.py:1656(clear_doc) 286882978 31.271 0.000 31.279 0.000 {built-in method builtins.isinstance} Function was called by... ncalls tottime cumtime sphinx/domains/c.py:153(__eq__) <- 631153346 134.803 134.803 sphinx/domains/c.py:1731(_find_named_symbols) 154878 0.041 0.041 sphinx/domains/c.py:2085(find_identifier) 139056533 30.259 30.259 sphinx/domains/c.py:2116(direct_lookup) 135 0.000 0.000 sphinx/util/cfamily.py:89(__eq__) Replace Symbol._children with dicts, one keyed by ident and one keyed by docname. This lets us remove _find_named_symbols and replace these usage patterns with fast child lookup. Also use the _anonChildren list to speed up searching through anonymous children. Note that dict is guaranteed to maintain insertion order since Python 3.7 so we retain iteration. Whenever iteration is required, we use _childrenByName.values(). Before – with Sphinx 7.2.6: % time make htmldocs ... real 9m0.533s user 15m38.397s sys 1m0.907s After - with Sphinx 7.3.0+: % time make htmldocs ... real 4m27.063s user 10m54.985s sys 0m57.702s Signed-off-by: Donald Hunter <[email protected]>
1 parent 3bedde2 commit b3422ac

File tree

1 file changed

+82
-130
lines changed

1 file changed

+82
-130
lines changed

sphinx/domains/c/_symbol.py

Lines changed: 82 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from sphinx.util import logging
1212

1313
if TYPE_CHECKING:
14-
from collections.abc import Generator, Iterator
14+
from collections.abc import Iterator
1515

1616
from typing_extensions import Self
1717

@@ -32,7 +32,7 @@ def __str__(self) -> str:
3232

3333

3434
class SymbolLookupResult:
35-
def __init__(self, symbols: Iterator[Symbol], parentSymbol: Symbol,
35+
def __init__(self, symbols: list[Symbol], parentSymbol: Symbol,
3636
ident: ASTIdentifier) -> None:
3737
self.symbols = symbols
3838
self.parentSymbol = parentSymbol
@@ -102,11 +102,18 @@ def __init__(
102102
self._assert_invariants()
103103

104104
# Remember to modify Symbol.remove if modifications to the parent change.
105-
self._children: list[Symbol] = []
105+
self._childrenByName: dict[str, Symbol] = {}
106+
self._childrenByDocname: dict[str, list[Symbol]] = {}
106107
self._anonChildren: list[Symbol] = []
107108
# note: _children includes _anonChildren
108109
if self.parent:
109-
self.parent._children.append(self)
110+
self.parent._childrenByName[str(self.ident)] = self
111+
if self.docname in self.parent._childrenByDocname:
112+
self.parent._childrenByDocname[self.docname].append(self)
113+
else:
114+
self.parent._childrenByDocname[self.docname] = [self]
115+
if ident.is_anon():
116+
self.parent._anonChildren.append(self)
110117
if self.declaration:
111118
self.declaration.symbol = self
112119

@@ -155,36 +162,49 @@ def _add_function_params(self) -> None:
155162
def remove(self) -> None:
156163
if self.parent is None:
157164
return
158-
assert self in self.parent._children
159-
self.parent._children.remove(self)
165+
name = str(self.ident)
166+
assert name in self.parent._childrenByName
167+
del self.parent._childrenByName[name]
168+
if self.docname in self.parent._childrenByDocname:
169+
del self.parent._childrenByDocname[self.docname]
170+
if self.ident.is_anon():
171+
self.parent._anonChildren.remove(self)
160172
self.parent = None
161173

162174
def clear_doc(self, docname: str) -> None:
163-
for sChild in self._children:
164-
sChild.clear_doc(docname)
165-
if sChild.declaration and sChild.docname == docname:
166-
sChild.declaration = None
167-
sChild.docname = None
168-
sChild.line = None
169-
if sChild.siblingAbove is not None:
170-
sChild.siblingAbove.siblingBelow = sChild.siblingBelow
171-
if sChild.siblingBelow is not None:
172-
sChild.siblingBelow.siblingAbove = sChild.siblingAbove
173-
sChild.siblingAbove = None
174-
sChild.siblingBelow = None
175+
if docname not in self._childrenByDocname:
176+
for child in self._childrenByName.values():
177+
child.clear_doc(docname)
178+
return
179+
for sChild in self._childrenByDocname[docname]:
180+
sChild.declaration = None
181+
sChild.docname = None
182+
sChild.line = None
183+
if sChild.siblingAbove is not None:
184+
sChild.siblingAbove.siblingBelow = sChild.siblingBelow
185+
if sChild.siblingBelow is not None:
186+
sChild.siblingBelow.siblingAbove = sChild.siblingAbove
187+
sChild.siblingAbove = None
188+
sChild.siblingBelow = None
189+
name = str(sChild.ident)
190+
if name in self._childrenByName:
191+
del self._childrenByName[name]
192+
if sChild.ident.is_anon():
193+
self._anonChildren.remove(sChild)
194+
del self._childrenByDocname[docname]
175195

176196
def get_all_symbols(self) -> Iterator[Symbol]:
177197
yield self
178-
for sChild in self._children:
198+
for sChild in self._childrenByName.values():
179199
yield from sChild.get_all_symbols()
180200

181201
@property
182202
def children(self) -> Iterator[Symbol]:
183-
yield from self._children
203+
yield from self._childrenByName.values()
184204

185205
@property
186206
def children_recurse_anon(self) -> Iterator[Symbol]:
187-
for c in self._children:
207+
for c in self._childrenByName.values():
188208
yield c
189209
if not c.ident.is_anon():
190210
continue
@@ -220,68 +240,6 @@ def get_full_nested_name(self) -> ASTNestedName:
220240
names = [s.ident for s in symbols]
221241
return ASTNestedName(names, rooted=False)
222242

223-
def _find_first_named_symbol(self, ident: ASTIdentifier,
224-
matchSelf: bool, recurseInAnon: bool) -> Symbol | None:
225-
# TODO: further simplification from C++ to C
226-
if Symbol.debug_lookup:
227-
Symbol.debug_print("_find_first_named_symbol ->")
228-
res = self._find_named_symbols(ident, matchSelf, recurseInAnon,
229-
searchInSiblings=False)
230-
try:
231-
return next(res)
232-
except StopIteration:
233-
return None
234-
235-
def _find_named_symbols(self, ident: ASTIdentifier,
236-
matchSelf: bool, recurseInAnon: bool,
237-
searchInSiblings: bool) -> Iterator[Symbol]:
238-
# TODO: further simplification from C++ to C
239-
if Symbol.debug_lookup:
240-
Symbol.debug_indent += 1
241-
Symbol.debug_print("_find_named_symbols:")
242-
Symbol.debug_indent += 1
243-
Symbol.debug_print("self:")
244-
logger.debug(self.to_string(Symbol.debug_indent + 1), end="")
245-
Symbol.debug_print("ident: ", ident)
246-
Symbol.debug_print("matchSelf: ", matchSelf)
247-
Symbol.debug_print("recurseInAnon: ", recurseInAnon)
248-
Symbol.debug_print("searchInSiblings: ", searchInSiblings)
249-
250-
def candidates() -> Generator[Symbol, None, None]:
251-
s = self
252-
if Symbol.debug_lookup:
253-
Symbol.debug_print("searching in self:")
254-
logger.debug(s.to_string(Symbol.debug_indent + 1), end="")
255-
while True:
256-
if matchSelf:
257-
yield s
258-
if recurseInAnon:
259-
yield from s.children_recurse_anon
260-
else:
261-
yield from s._children
262-
263-
if s.siblingAbove is None:
264-
break
265-
s = s.siblingAbove
266-
if Symbol.debug_lookup:
267-
Symbol.debug_print("searching in sibling:")
268-
logger.debug(s.to_string(Symbol.debug_indent + 1), end="")
269-
270-
for s in candidates():
271-
if Symbol.debug_lookup:
272-
Symbol.debug_print("candidate:")
273-
logger.debug(s.to_string(Symbol.debug_indent + 1), end="")
274-
if s.ident == ident:
275-
if Symbol.debug_lookup:
276-
Symbol.debug_indent += 1
277-
Symbol.debug_print("matches")
278-
Symbol.debug_indent -= 3
279-
yield s
280-
if Symbol.debug_lookup:
281-
Symbol.debug_indent += 2
282-
if Symbol.debug_lookup:
283-
Symbol.debug_indent -= 2
284-
285243
def _symbol_lookup(
286244
self,
287245
nestedName: ASTNestedName,
@@ -310,54 +268,44 @@ def _symbol_lookup(
310268
# find the right starting point for lookup
311269
parentSymbol = self
312270
if nestedName.rooted:
313-
while parentSymbol.parent:
271+
while parentSymbol.parent is not None:
314272
parentSymbol = parentSymbol.parent
273+
315274
if ancestorLookupType is not None:
316275
# walk up until we find the first identifier
317276
firstName = names[0]
318277
while parentSymbol.parent:
319-
if parentSymbol.find_identifier(firstName,
320-
matchSelf=matchSelf,
321-
recurseInAnon=recurseInAnon,
322-
searchInSiblings=searchInSiblings):
278+
if str(firstName) in parentSymbol._childrenByName:
323279
break
324280
parentSymbol = parentSymbol.parent
325281

326-
if Symbol.debug_lookup:
327-
Symbol.debug_print("starting point:")
328-
logger.debug(parentSymbol.to_string(Symbol.debug_indent + 1), end="")
329-
330282
# and now the actual lookup
331283
for ident in names[:-1]:
332-
symbol = parentSymbol._find_first_named_symbol(
333-
ident, matchSelf=matchSelf, recurseInAnon=recurseInAnon)
334-
if symbol is None:
284+
name = str(ident)
285+
if name in parentSymbol._childrenByName:
286+
symbol = parentSymbol._childrenByName[name]
287+
else:
335288
symbol = onMissingQualifiedSymbol(parentSymbol, ident)
336289
if symbol is None:
337-
if Symbol.debug_lookup:
338-
Symbol.debug_indent -= 2
339290
return None
340-
# We have now matched part of a nested name, and need to match more
341-
# so even if we should matchSelf before, we definitely shouldn't
342-
# even more. (see also issue #2666)
343-
matchSelf = False
344291
parentSymbol = symbol
345292

346-
if Symbol.debug_lookup:
347-
Symbol.debug_print("handle last name from:")
348-
logger.debug(parentSymbol.to_string(Symbol.debug_indent + 1), end="")
349-
350293
# handle the last name
351294
ident = names[-1]
352-
353-
symbols = parentSymbol._find_named_symbols(
354-
ident, matchSelf=matchSelf,
355-
recurseInAnon=recurseInAnon,
356-
searchInSiblings=searchInSiblings)
357-
if Symbol.debug_lookup:
358-
symbols = list(symbols) # type: ignore[assignment]
359-
Symbol.debug_indent -= 2
360-
return SymbolLookupResult(symbols, parentSymbol, ident)
295+
name = str(ident)
296+
symbol = None
297+
if name in parentSymbol._childrenByName:
298+
symbol = parentSymbol._childrenByName[name]
299+
300+
if not symbol and recurseInAnon:
301+
for child in parentSymbol._anonChildren:
302+
if name in child._childrenByName:
303+
symbol = child._childrenByName[name]
304+
break
305+
if symbol:
306+
return SymbolLookupResult([symbol], parentSymbol, ident)
307+
else:
308+
return SymbolLookupResult([], parentSymbol, ident)
361309

362310
def _add_symbols(
363311
self,
@@ -531,17 +479,17 @@ def merge_with(self, other: Symbol, docnames: list[str],
531479
if Symbol.debug_lookup:
532480
Symbol.debug_indent += 1
533481
Symbol.debug_print("merge_with:")
482+
534483
assert other is not None
535-
for otherChild in other._children:
536-
ourChild = self._find_first_named_symbol(
537-
ident=otherChild.ident, matchSelf=False,
538-
recurseInAnon=False)
539-
if ourChild is None:
484+
for otherChild in other._childrenByName.values():
485+
otherName = str(otherChild.ident)
486+
if otherName not in self._childrenByName:
540487
# TODO: hmm, should we prune by docnames?
541-
self._children.append(otherChild)
488+
self._childrenByName[otherName] = otherChild
542489
otherChild.parent = self
543490
otherChild._assert_invariants()
544491
continue
492+
ourChild = self._childrenByName[otherName]
545493
if otherChild.declaration and otherChild.docname in docnames:
546494
if not ourChild.declaration:
547495
ourChild._fill_empty(otherChild.declaration,
@@ -559,6 +507,7 @@ def merge_with(self, other: Symbol, docnames: list[str],
559507
# just ignore it, right?
560508
pass
561509
ourChild.merge_with(otherChild, docnames, env)
510+
562511
if Symbol.debug_lookup:
563512
Symbol.debug_indent -= 1
564513

@@ -607,10 +556,13 @@ def find_identifier(self, ident: ASTIdentifier,
607556
Symbol.debug_indent -= 2
608557
if matchSelf and current.ident == ident:
609558
return current
610-
children = current.children_recurse_anon if recurseInAnon else current._children
611-
for s in children:
612-
if s.ident == ident:
613-
return s
559+
name = str(ident)
560+
if name in current._childrenByName:
561+
return current._childrenByName[name]
562+
if recurseInAnon:
563+
for child in current._anonChildren:
564+
if name in child._childrenByName:
565+
return child._childrenByName[name]
614566
if not searchInSiblings:
615567
break
616568
current = current.siblingAbove
@@ -622,12 +574,11 @@ def direct_lookup(self, key: LookupKey) -> Symbol | None:
622574
Symbol.debug_print("direct_lookup:")
623575
Symbol.debug_indent += 1
624576
s = self
625-
for name, id_ in key.data:
577+
for ident, id_ in key.data:
626578
res = None
627-
for cand in s._children:
628-
if cand.ident == name:
629-
res = cand
630-
break
579+
name = str(ident)
580+
if name in s._childrenByName:
581+
res = s._childrenByName[name]
631582
s = res
632583
if Symbol.debug_lookup:
633584
Symbol.debug_print("name: ", name)
@@ -695,4 +646,5 @@ def to_string(self, indent: int) -> str:
695646
return ''.join(res)
696647

697648
def dump(self, indent: int) -> str:
698-
return ''.join([self.to_string(indent), *(c.dump(indent + 1) for c in self._children)])
649+
return ''.join([self.to_string(indent),
650+
*(c.dump(indent + 1) for c in self._childrenByName.values())])

0 commit comments

Comments
 (0)