Skip to content

Commit 85fd284

Browse files
donaldhjakobandersenAA-Turner
authored
C domain: fix performance regression since 3.0.0 (#12162)
Sphinx 3.0.0 onwards have a significant performance regression from 2.4.4 for the C domain. The cProfile output shows that this is due to _find_named_symbols using a linear search. Replace Symbol._children with dicts, one keyed by ident and one keyed by docname. This lets us remove _find_named_symbols and replace these usage patterns with fast child lookup. Also use the _anonChildren list to speed up searching through anonymous children. Note that dict is guaranteed to maintain insertion order since Python 3.7 so we retain iteration. Whenever iteration is required, we use _childrenByName.values(). Signed-off-by: Donald Hunter <[email protected]> Co-authored-by: Jakob Lykke Andersen <[email protected]> Co-authored-by: Adam Turner <[email protected]>
1 parent 0806a00 commit 85fd284

File tree

2 files changed

+90
-134
lines changed

2 files changed

+90
-134
lines changed

CHANGES.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ Features added
2222
Bugs fixed
2323
----------
2424

25+
* #12162: Fix a performance regression in the C domain that has
26+
been present since version 3.0.0.
27+
Patch by Donald Hunter.
28+
2529
Testing
2630
-------
2731

sphinx/domains/c/_symbol.py

Lines changed: 86 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import TYPE_CHECKING, Any, Callable
3+
from typing import TYPE_CHECKING, Any
44

55
from sphinx.domains.c._ast import (
66
ASTDeclaration,
@@ -11,7 +11,7 @@
1111
from sphinx.util import logging
1212

1313
if TYPE_CHECKING:
14-
from collections.abc import Iterator
14+
from collections.abc import Callable, Iterable, Iterator, Sequence
1515

1616
from typing_extensions import Self
1717

@@ -32,7 +32,7 @@ def __str__(self) -> str:
3232

3333

3434
class SymbolLookupResult:
35-
def __init__(self, symbols: Iterator[Symbol], parentSymbol: Symbol,
35+
def __init__(self, symbols: Sequence[Symbol], parentSymbol: Symbol,
3636
ident: ASTIdentifier) -> None:
3737
self.symbols = symbols
3838
self.parentSymbol = parentSymbol
@@ -102,12 +102,14 @@ def __init__(
102102
self.isRedeclaration = False
103103
self._assert_invariants()
104104

105-
# Remember to modify Symbol.remove if modifications to the parent change.
106-
self._children: list[Symbol] = []
107-
self._anonChildren: list[Symbol] = []
108-
# note: _children includes _anonChildren
105+
# These properties store the same children for different access patterns.
106+
# ``_add_child()`` and ``_remove_child()`` should be used for modifying them.
107+
self._children_by_name: dict[str, Symbol] = {}
108+
self._children_by_docname: dict[str, dict[str, Symbol]] = {}
109+
self._anon_children: set[Symbol] = set()
110+
109111
if self.parent:
110-
self.parent._children.append(self)
112+
self.parent._add_child(self)
111113
if self.declaration:
112114
self.declaration.symbol = self
113115

@@ -117,6 +119,27 @@ def __init__(
117119
def __repr__(self) -> str:
118120
return f'<Symbol {self.to_string(indent=0)!r}>'
119121

122+
@property
123+
def _children(self) -> Iterable[Symbol]:
124+
return self._children_by_name.values()
125+
126+
def _add_child(self, child: Symbol) -> None:
127+
name = child.ident.name
128+
if name in self._children_by_name:
129+
# Duplicate so don't add - will be reported in _add_symbols()
130+
return
131+
self._children_by_name[name] = child
132+
self._children_by_docname.setdefault(child.docname, {})[name] = child
133+
if child.ident.is_anonymous:
134+
self._anon_children.add(child)
135+
136+
def _remove_child(self, child: Symbol) -> None:
137+
name = child.ident.name
138+
self._children_by_name.pop(name, None)
139+
self._children_by_docname.get(child.docname, {}).pop(name, None)
140+
if child.ident.is_anonymous:
141+
self._anon_children.discard(child)
142+
120143
def _fill_empty(self, declaration: ASTDeclaration, docname: str, line: int) -> None:
121144
self._assert_invariants()
122145
assert self.declaration is None
@@ -157,25 +180,28 @@ def _add_function_params(self) -> None:
157180
Symbol.debug_indent -= 1
158181

159182
def remove(self) -> None:
160-
if self.parent is None:
161-
return
162-
assert self in self.parent._children
163-
self.parent._children.remove(self)
164-
self.parent = None
183+
if self.parent:
184+
self.parent._remove_child(self)
185+
self.parent = None
165186

166187
def clear_doc(self, docname: str) -> None:
167-
for sChild in self._children:
168-
sChild.clear_doc(docname)
169-
if sChild.declaration and sChild.docname == docname:
170-
sChild.declaration = None
171-
sChild.docname = None
172-
sChild.line = None
173-
if sChild.siblingAbove is not None:
174-
sChild.siblingAbove.siblingBelow = sChild.siblingBelow
175-
if sChild.siblingBelow is not None:
176-
sChild.siblingBelow.siblingAbove = sChild.siblingAbove
177-
sChild.siblingAbove = None
178-
sChild.siblingBelow = None
188+
if docname not in self._children_by_docname:
189+
for child in self._children:
190+
child.clear_doc(docname)
191+
return
192+
193+
children: dict[str, Symbol] = self._children_by_docname.pop(docname)
194+
for child in children.values():
195+
child.declaration = None
196+
child.docname = None
197+
child.line = None
198+
if child.siblingAbove is not None:
199+
child.siblingAbove.siblingBelow = child.siblingBelow
200+
if child.siblingBelow is not None:
201+
child.siblingBelow.siblingAbove = child.siblingAbove
202+
child.siblingAbove = None
203+
child.siblingBelow = None
204+
self._remove_child(child)
179205

180206
def get_all_symbols(self) -> Iterator[Symbol]:
181207
yield self
@@ -186,14 +212,6 @@ def get_all_symbols(self) -> Iterator[Symbol]:
186212
def children(self) -> Iterator[Symbol]:
187213
yield from self._children
188214

189-
@property
190-
def children_recurse_anon(self) -> Iterator[Symbol]:
191-
for c in self._children:
192-
yield c
193-
if not c.ident.is_anon():
194-
continue
195-
yield from c.children_recurse_anon
196-
197215
def get_lookup_key(self) -> LookupKey:
198216
# The pickle files for the environment and for each document are distinct.
199217
# The environment has all the symbols, but the documents has xrefs that
@@ -224,68 +242,6 @@ def get_full_nested_name(self) -> ASTNestedName:
224242
names = [s.ident for s in symbols]
225243
return ASTNestedName(names, rooted=False)
226244

227-
def _find_first_named_symbol(self, ident: ASTIdentifier,
228-
matchSelf: bool, recurseInAnon: bool) -> Symbol | None:
229-
# TODO: further simplification from C++ to C
230-
if Symbol.debug_lookup:
231-
Symbol.debug_print("_find_first_named_symbol ->")
232-
res = self._find_named_symbols(ident, matchSelf, recurseInAnon,
233-
searchInSiblings=False)
234-
try:
235-
return next(res)
236-
except StopIteration:
237-
return None
238-
239-
def _find_named_symbols(self, ident: ASTIdentifier,
240-
matchSelf: bool, recurseInAnon: bool,
241-
searchInSiblings: bool) -> Iterator[Symbol]:
242-
# TODO: further simplification from C++ to C
243-
if Symbol.debug_lookup:
244-
Symbol.debug_indent += 1
245-
Symbol.debug_print("_find_named_symbols:")
246-
Symbol.debug_indent += 1
247-
Symbol.debug_print("self:")
248-
logger.debug(self.to_string(Symbol.debug_indent + 1, addEndNewline=False))
249-
Symbol.debug_print("ident: ", ident)
250-
Symbol.debug_print("matchSelf: ", matchSelf)
251-
Symbol.debug_print("recurseInAnon: ", recurseInAnon)
252-
Symbol.debug_print("searchInSiblings: ", searchInSiblings)
253-
254-
def candidates() -> Iterator[Symbol]:
255-
s = self
256-
if Symbol.debug_lookup:
257-
Symbol.debug_print("searching in self:")
258-
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
259-
while True:
260-
if matchSelf:
261-
yield s
262-
if recurseInAnon:
263-
yield from s.children_recurse_anon
264-
else:
265-
yield from s._children
266-
267-
if s.siblingAbove is None:
268-
break
269-
s = s.siblingAbove
270-
if Symbol.debug_lookup:
271-
Symbol.debug_print("searching in sibling:")
272-
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
273-
274-
for s in candidates():
275-
if Symbol.debug_lookup:
276-
Symbol.debug_print("candidate:")
277-
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
278-
if s.ident == ident:
279-
if Symbol.debug_lookup:
280-
Symbol.debug_indent += 1
281-
Symbol.debug_print("matches")
282-
Symbol.debug_indent -= 3
283-
yield s
284-
if Symbol.debug_lookup:
285-
Symbol.debug_indent += 2
286-
if Symbol.debug_lookup:
287-
Symbol.debug_indent -= 2
288-
289245
def _symbol_lookup(
290246
self,
291247
nestedName: ASTNestedName,
@@ -314,16 +270,14 @@ def _symbol_lookup(
314270
# find the right starting point for lookup
315271
parentSymbol = self
316272
if nestedName.rooted:
317-
while parentSymbol.parent:
273+
while parentSymbol.parent is not None:
318274
parentSymbol = parentSymbol.parent
275+
319276
if ancestorLookupType is not None:
320277
# walk up until we find the first identifier
321278
firstName = names[0]
322279
while parentSymbol.parent:
323-
if parentSymbol.find_identifier(firstName,
324-
matchSelf=matchSelf,
325-
recurseInAnon=recurseInAnon,
326-
searchInSiblings=searchInSiblings):
280+
if firstName.name in parentSymbol._children_by_name:
327281
break
328282
parentSymbol = parentSymbol.parent
329283

@@ -333,18 +287,15 @@ def _symbol_lookup(
333287

334288
# and now the actual lookup
335289
for ident in names[:-1]:
336-
symbol = parentSymbol._find_first_named_symbol(
337-
ident, matchSelf=matchSelf, recurseInAnon=recurseInAnon)
338-
if symbol is None:
290+
name = ident.name
291+
if name in parentSymbol._children_by_name:
292+
symbol = parentSymbol._children_by_name[name]
293+
else:
339294
symbol = onMissingQualifiedSymbol(parentSymbol, ident)
340295
if symbol is None:
341296
if Symbol.debug_lookup:
342297
Symbol.debug_indent -= 2
343298
return None
344-
# We have now matched part of a nested name, and need to match more
345-
# so even if we should matchSelf before, we definitely shouldn't
346-
# even more. (see also issue #2666)
347-
matchSelf = False
348299
parentSymbol = symbol
349300

350301
if Symbol.debug_lookup:
@@ -353,15 +304,19 @@ def _symbol_lookup(
353304

354305
# handle the last name
355306
ident = names[-1]
307+
name = ident.name
308+
symbol = parentSymbol._children_by_name.get(name)
309+
if not symbol and recurseInAnon:
310+
for child in parentSymbol._anon_children:
311+
if name in child._children_by_name:
312+
symbol = child._children_by_name[name]
313+
break
356314

357-
symbols = parentSymbol._find_named_symbols(
358-
ident, matchSelf=matchSelf,
359-
recurseInAnon=recurseInAnon,
360-
searchInSiblings=searchInSiblings)
361315
if Symbol.debug_lookup:
362-
symbols = list(symbols) # type: ignore[assignment]
363316
Symbol.debug_indent -= 2
364-
return SymbolLookupResult(symbols, parentSymbol, ident)
317+
318+
result = [symbol] if symbol else []
319+
return SymbolLookupResult(result, parentSymbol, ident)
365320

366321
def _add_symbols(
367322
self,
@@ -535,17 +490,17 @@ def merge_with(self, other: Symbol, docnames: list[str],
535490
if Symbol.debug_lookup:
536491
Symbol.debug_indent += 1
537492
Symbol.debug_print("merge_with:")
493+
538494
assert other is not None
539495
for otherChild in other._children:
540-
ourChild = self._find_first_named_symbol(
541-
ident=otherChild.ident, matchSelf=False,
542-
recurseInAnon=False)
543-
if ourChild is None:
496+
otherName = otherChild.ident.name
497+
if otherName not in self._children_by_name:
544498
# TODO: hmm, should we prune by docnames?
545-
self._children.append(otherChild)
546499
otherChild.parent = self
500+
self._add_child(otherChild)
547501
otherChild._assert_invariants()
548502
continue
503+
ourChild = self._children_by_name[otherName]
549504
if otherChild.declaration and otherChild.docname in docnames:
550505
if not ourChild.declaration:
551506
ourChild._fill_empty(otherChild.declaration,
@@ -563,6 +518,7 @@ def merge_with(self, other: Symbol, docnames: list[str],
563518
# just ignore it, right?
564519
pass
565520
ourChild.merge_with(otherChild, docnames, env)
521+
566522
if Symbol.debug_lookup:
567523
Symbol.debug_indent -= 1
568524

@@ -611,10 +567,13 @@ def find_identifier(self, ident: ASTIdentifier,
611567
Symbol.debug_indent -= 2
612568
if matchSelf and current.ident == ident:
613569
return current
614-
children = current.children_recurse_anon if recurseInAnon else current._children
615-
for s in children:
616-
if s.ident == ident:
617-
return s
570+
name = ident.name
571+
if name in current._children_by_name:
572+
return current._children_by_name[name]
573+
if recurseInAnon:
574+
for child in current._anon_children:
575+
if name in child._children_by_name:
576+
return child._children_by_name[name]
618577
if not searchInSiblings:
619578
break
620579
current = current.siblingAbove
@@ -626,24 +585,17 @@ def direct_lookup(self, key: LookupKey) -> Symbol | None:
626585
Symbol.debug_print("direct_lookup:")
627586
Symbol.debug_indent += 1
628587
s = self
629-
for name, id_ in key.data:
630-
res = None
631-
for cand in s._children:
632-
if cand.ident == name:
633-
res = cand
634-
break
635-
s = res
588+
for ident, id_ in key.data:
589+
s = s._children_by_name.get(ident.name)
636590
if Symbol.debug_lookup:
637-
Symbol.debug_print("name: ", name)
591+
Symbol.debug_print("name: ", ident.name)
638592
Symbol.debug_print("id: ", id_)
639593
if s is not None:
640594
logger.debug(s.to_string(Symbol.debug_indent + 1, addEndNewline=False))
641595
else:
642596
Symbol.debug_print("not found")
643597
if s is None:
644-
if Symbol.debug_lookup:
645-
Symbol.debug_indent -= 2
646-
return None
598+
break
647599
if Symbol.debug_lookup:
648600
Symbol.debug_indent -= 2
649601
return s
@@ -683,7 +635,7 @@ def to_string(self, indent: int, *, addEndNewline: bool = True) -> str:
683635
res.append('::')
684636
else:
685637
if self.ident:
686-
res.append(str(self.ident))
638+
res.append(self.ident.name)
687639
else:
688640
res.append(str(self.declaration))
689641
if self.declaration:

0 commit comments

Comments
 (0)