Skip to content

Commit 0104685

Browse files
authored
Improve the lookup speed for Keywords by pre-computing hash (#592)
This PR includes a change to pre-compute the hash of a Keyword a new instance is created. Keyword hashes are used as the lookup key for Keywords inside the intern cache. Keywords are also commonly used as map keys and in other collections requiring hashability, so having a pre-computed hash should help improve performance by not requiring repeated computations of the otherwise static hash. The compiler now also uses the pre-computed Keyword hash to lookup Keywords directly from the intern cache, which should reduce the cost of repeated lookups (particularly if a function makes heavy use of keywords). This change appears to have improved the lookup speed by around 9%. Before these changes: ``` basilisp.user=> (import timeit) <module 'timeit' from '/Users/christopher/.pyenv/versions/3.6.9/lib/python3.6/timeit.py'> basilisp.user=> (timeit/timeit (fn [] :abc)) 0.7336159450060222 ``` Afterwards: ``` basilisp.user=> (import timeit) <module 'timeit' from '/Users/christopher/.pyenv/versions/3.6.9/lib/python3.6/timeit.py'> basilisp.user=> (timeit/timeit (fn [] :abc)) 0.668459995999001 ```
1 parent 217782e commit 0104685

File tree

4 files changed

+50
-15
lines changed

4 files changed

+50
-15
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1616

1717
### Changed
1818
* Moved `basilisp.lang.runtime.to_seq` to `basilisp.lang.seq` so it can be used within that module and by `basilisp.lang.runtime` without circular import (#588)
19+
* Keyword hashes are now pre-computed when they are created, so they do not need to be recomputed again to be fetched from the intern cache (#592)
20+
* The compiler now uses the pre-computed hash to lookup keywords directly, which should improve lookup time for repeated invocations (#592)
21+
* Symbol hashes are now pre-computed when they are created (#592)
1922

2023
### Fixed
2124
* Fixed a bug where `def` forms did not permit recursive references to the `def`'ed Vars (#578)

src/basilisp/lang/compiler/generator.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@
132132
_TRY_PREFIX = "lisp_try"
133133
_NS_VAR = "_NS"
134134

135+
# Keyword constants used in generating code
136+
_INTERFACE_KW = kw.keyword("interface")
137+
_REST_KW = kw.keyword("rest")
138+
135139

136140
GeneratorException = partial(CompilerException, phase=CompilerPhase.CODE_GENERATION)
137141

@@ -629,7 +633,7 @@ def _var_ns_as_python_sym(name: str) -> str:
629633
_NEW_DECIMAL_FN_NAME = _load_attr(f"{_UTIL_ALIAS}.decimal_from_str")
630634
_NEW_FRACTION_FN_NAME = _load_attr(f"{_UTIL_ALIAS}.fraction")
631635
_NEW_INST_FN_NAME = _load_attr(f"{_UTIL_ALIAS}.inst_from_str")
632-
_NEW_KW_FN_NAME = _load_attr(f"{_KW_ALIAS}.keyword")
636+
_NEW_KW_FN_NAME = _load_attr(f"{_KW_ALIAS}.keyword_from_hash")
633637
_NEW_LIST_FN_NAME = _load_attr(f"{_LIST_ALIAS}.list")
634638
_EMPTY_LIST_FN_NAME = _load_attr(f"{_LIST_ALIAS}.List.empty")
635639
_NEW_MAP_FN_NAME = _load_attr(f"{_MAP_ALIAS}.map")
@@ -1302,7 +1306,10 @@ def __deftype_or_reify_bases_to_py_ast(
13021306
args=[
13031307
ast.Call(
13041308
func=_NEW_KW_FN_NAME,
1305-
args=[ast.Constant("interface")],
1309+
args=[
1310+
ast.Constant(hash(_INTERFACE_KW)),
1311+
ast.Constant("interface"),
1312+
],
13061313
keywords=[],
13071314
)
13081315
],
@@ -1530,7 +1537,10 @@ def __fn_decorator(arities: Iterable[int], has_rest_arg: bool = False,) -> ast.C
15301537
[
15311538
ast.Call(
15321539
func=_NEW_KW_FN_NAME,
1533-
args=[ast.Constant("rest")],
1540+
args=[
1541+
ast.Constant(hash(_REST_KW)),
1542+
ast.Constant("rest"),
1543+
],
15341544
keywords=[],
15351545
)
15361546
]
@@ -3160,7 +3170,9 @@ def _kw_to_py_ast(_: GeneratorContext, form: kw.Keyword) -> ast.AST:
31603170
.or_else(list)
31613171
)
31623172
return ast.Call(
3163-
func=_NEW_KW_FN_NAME, args=[ast.Constant(form.name)], keywords=kwarg
3173+
func=_NEW_KW_FN_NAME,
3174+
args=[ast.Constant(hash(form)), ast.Constant(form.name)],
3175+
keywords=kwarg,
31643176
)
31653177

31663178

src/basilisp/lang/keyword.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99

1010

1111
class Keyword(ILispObject):
12-
__slots__ = ("_name", "_ns")
12+
__slots__ = ("_name", "_ns", "_hash")
1313

1414
def __init__(self, name: str, ns: Optional[str] = None) -> None:
1515
self._name = name
1616
self._ns = ns
17+
self._hash = hash_kw(name, ns)
1718

1819
@property
1920
def name(self) -> str:
@@ -35,7 +36,7 @@ def __eq__(self, other):
3536
)
3637

3738
def __hash__(self):
38-
return hash((self._name, self._ns))
39+
return self._hash
3940

4041
def __call__(self, m: IAssociative, default=None):
4142
try:
@@ -44,7 +45,7 @@ def __call__(self, m: IAssociative, default=None):
4445
return None
4546

4647
def __reduce__(self):
47-
return keyword, (self._name, self._ns)
48+
return keyword_from_hash, (self._hash, self._name, self._ns)
4849

4950

5051
def complete(
@@ -74,20 +75,38 @@ def complete(
7475
return map(str, results)
7576

7677

77-
def keyword(name: str, ns: Optional[str] = None) -> Keyword:
78-
"""Create a new keyword with name and optional namespace.
78+
def hash_kw(name: str, ns: Optional[str] = None) -> int:
79+
"""Return the hash of a potential Keyword instance by its name and namespace."""
80+
return hash((name, ns))
81+
82+
83+
def keyword_from_hash(kw_hash: int, name: str, ns: Optional[str] = None) -> Keyword:
84+
"""Return the interned keyword with the hash `kw_hash` or create and intern a new
85+
keyword with name `name` and optional namespace `ns`.
7986
8087
Keywords are stored in a global cache by their hash. If a keyword with the same
8188
hash already exists in the cache, that keyword will be returned. If no keyword
8289
exists in the global cache, one will be created, entered into the cache, and then
83-
returned."""
90+
returned.
91+
92+
This function is an optimization primarily meant for the compiler. Keyword hashes
93+
are pre-computed during compilation so repeated lookups for the same keyword do not
94+
require recomputing the hash. In some brief testing, this yielded significant
95+
performance improvements when creating the same keyword repeatedly."""
8496
global _INTERN
8597

86-
h = hash((name, ns))
8798
with _LOCK:
88-
found = _INTERN.val_at(h)
99+
found = _INTERN.val_at(kw_hash)
89100
if found:
90101
return found
91102
kw = Keyword(name, ns=ns)
92-
_INTERN = _INTERN.assoc(h, kw)
103+
_INTERN = _INTERN.assoc(kw_hash, kw)
93104
return kw
105+
106+
107+
def keyword(name: str, ns: Optional[str] = None) -> Keyword:
108+
"""Return a keyword with name `name` and optional namespace `ns`.
109+
110+
Keyword instances are interned, so an existing object may be returned if one
111+
with the same name and namespace are already interned."""
112+
return keyword_from_hash(hash_kw(name, ns), name, ns=ns)

src/basilisp/lang/symbol.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,15 @@
66

77

88
class Symbol(ILispObject, IWithMeta):
9-
__slots__ = ("_name", "_ns", "_meta")
9+
__slots__ = ("_name", "_ns", "_meta", "_hash")
1010

1111
def __init__(
1212
self, name: str, ns: Optional[str] = None, meta: Optional[IPersistentMap] = None
1313
) -> None:
1414
self._name = name
1515
self._ns = ns
1616
self._meta = meta
17+
self._hash = hash((ns, name))
1718

1819
def _lrepr(self, **kwargs) -> str:
1920
print_meta = kwargs["print_meta"]
@@ -53,7 +54,7 @@ def __eq__(self, other):
5354
return self._ns == other._ns and self._name == other._name
5455

5556
def __hash__(self):
56-
return hash((self._ns, self._name))
57+
return self._hash
5758

5859

5960
def symbol(name: str, ns: Optional[str] = None, meta=None) -> Symbol:

0 commit comments

Comments
 (0)