Skip to content

Commit 82463fd

Browse files
committed
Optimize Jmespath with key lookup
1 parent 53efa1e commit 82463fd

File tree

2 files changed

+60
-16
lines changed

2 files changed

+60
-16
lines changed

nodestream/pipeline/value_providers/jmespath_value_provider.py

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from abc import ABC, abstractmethod
12
from typing import Any, Iterable, Type
23

34
import jmespath
@@ -8,6 +9,51 @@
89
from .value_provider import ValueProvider, ValueProviderException
910

1011

12+
# `QueryStrategy` is here to provide the seam for different optimizations
13+
# for executing jmespath queries. We can either execute a "fully fledged"
14+
# jmespath query or we can implement some simple access patterns that
15+
# are faster to execute. For example, if the expression is a simple key
16+
# lookup, we can just use the key directly instead of compiling the
17+
# jmespath expression and then executing it with all the weight and
18+
# overhead that comes with it.
19+
20+
21+
class QueryStrategy(ABC):
22+
@classmethod
23+
def from_string_expression(cls, expression: str):
24+
if expression.isalpha():
25+
return KeyLookup(expression)
26+
27+
compiled_query = jmespath.compile(expression)
28+
return ExecuteJmespath(compiled_query)
29+
30+
@abstractmethod
31+
def search(self, context: ProviderContext):
32+
pass
33+
34+
35+
class ExecuteJmespath(QueryStrategy):
36+
def __init__(self, compiled_query: ParsedResult) -> None:
37+
self.compiled_query = compiled_query
38+
39+
def search(self, context: ProviderContext):
40+
return self.compiled_query.search(context.document)
41+
42+
def __str__(self) -> str:
43+
return str(self.compiled_query.expression)
44+
45+
46+
class KeyLookup(QueryStrategy):
47+
def __init__(self, key: str) -> None:
48+
self.key = key
49+
50+
def search(self, context: ProviderContext):
51+
return context.document.get(self.key, None)
52+
53+
def __str__(self) -> str:
54+
return self.key
55+
56+
1157
class JmespathValueProvider(ValueProvider):
1258
"""A `ValueProvider` that uses JMESPath to extract values from a document."""
1359

@@ -24,13 +70,13 @@ def install_yaml_tag(cls, loader: Type[SafeLoader]):
2470

2571
@classmethod
2672
def from_string_expression(cls, expression: str):
27-
return cls(jmespath.compile(expression))
73+
return cls(QueryStrategy.from_string_expression(expression))
2874

29-
def __init__(self, compiled_query: ParsedResult) -> None:
30-
self.compiled_query = compiled_query
75+
def __init__(self, strategy: QueryStrategy) -> None:
76+
self.strategy = strategy
3177

3278
def search(self, context: ProviderContext):
33-
raw_search = self.compiled_query.search(context.document)
79+
raw_search = self.strategy.search(context)
3480
if raw_search is None:
3581
return
3682
if isinstance(raw_search, list):
@@ -51,14 +97,12 @@ def many_values(self, context: ProviderContext) -> Iterable[Any]:
5197
raise ValueProviderException(str(context.document), self) from e
5298

5399
def __str__(self) -> str:
54-
return (
55-
f"JmespathValueProvider: { {'expression': self.compiled_query.expression} }"
56-
)
100+
return f"JmespathValueProvider: { {'expression': str(self.strategy)} }"
57101

58102

59103
SafeDumper.add_representer(
60104
JmespathValueProvider,
61105
lambda dumper, jmespath: dumper.represent_scalar(
62-
"!jmespath", jmespath.compiled_query.expression
106+
"!jmespath", str(jmespath.strategy)
63107
),
64108
)

tests/unit/pipeline/value_providers/test_jmespath_value_provider.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,37 +7,37 @@
77

88

99
def test_single_value_present(blank_context_with_document):
10-
subject = JmespathValueProvider(jmespath.compile("team.name"))
10+
subject = JmespathValueProvider.from_string_expression("team.name")
1111
assert_that(
1212
subject.single_value(blank_context_with_document), equal_to("nodestream")
1313
)
1414

1515

1616
def test_single_value_missing(blank_context_with_document):
17-
subject = JmespathValueProvider(jmespath.compile("team.description"))
17+
subject = JmespathValueProvider.from_string_expression("team.description")
1818
assert_that(subject.single_value(blank_context_with_document), none())
1919

2020

2121
def test_single_value_is_list(blank_context_with_document):
22-
subject = JmespathValueProvider(jmespath.compile("project.tags"))
22+
subject = JmespathValueProvider.from_string_expression("project.tags")
2323
result = subject.single_value(blank_context_with_document)
2424
assert_that(result, equal_to("graphdb"))
2525

2626

2727
def test_multiple_values_missing(blank_context_with_document):
28-
subject = JmespathValueProvider(jmespath.compile("project.labels"))
28+
subject = JmespathValueProvider.from_string_expression("team.description")
2929
assert_that(list(subject.many_values(blank_context_with_document)), has_length(0))
3030

3131

3232
def test_multiple_values_returns_one_value(blank_context_with_document):
33-
subject = JmespathValueProvider(jmespath.compile("team.name"))
33+
subject = JmespathValueProvider.from_string_expression("team.name")
3434
result = list(subject.many_values(blank_context_with_document))
3535
assert_that(result, has_length(1))
3636
assert_that(result[0], equal_to("nodestream"))
3737

3838

3939
def test_multiple_values_hit(blank_context_with_document):
40-
subject = JmespathValueProvider(jmespath.compile("project.tags"))
40+
subject = JmespathValueProvider.from_string_expression("project.tags")
4141
result = subject.many_values(blank_context_with_document)
4242
assert_that(list(result), equal_to(["graphdb", "python"]))
4343

@@ -46,7 +46,7 @@ def test_single_value_error(blank_context_with_document):
4646
some_text_from_document = blank_context_with_document.document["team"]["name"]
4747
# this will error because team2 does not exist causing the join to throw an error
4848
expression_with_error = "join('/', [team.name || '', team2.name])"
49-
subject = JmespathValueProvider(jmespath.compile(expression_with_error))
49+
subject = JmespathValueProvider.from_string_expression(expression_with_error)
5050

5151
with pytest.raises(ValueProviderException) as e_info:
5252
subject.single_value(blank_context_with_document)
@@ -59,7 +59,7 @@ def test_single_value_error(blank_context_with_document):
5959
def test_multiple_values_error(blank_context_with_document):
6060
# this will error because team2 does not exist causing the join to throw an error
6161
expression_with_error = "join('/', [team.name || '', team2.name])"
62-
subject = JmespathValueProvider(jmespath.compile(expression_with_error))
62+
subject = JmespathValueProvider.from_string_expression(expression_with_error)
6363

6464
with pytest.raises(Exception) as e_info:
6565
generator = subject.many_values(blank_context_with_document)

0 commit comments

Comments
 (0)