Skip to content

Commit 9884b7c

Browse files
authored
Merge pull request #404 from nodestream-proj/optimization/jmespath-query
Optimize Jmespath with key lookup
2 parents 53efa1e + 86d4341 commit 9884b7c

File tree

2 files changed

+59
-17
lines changed

2 files changed

+59
-17
lines changed

nodestream/pipeline/value_providers/jmespath_value_provider.py

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from abc import ABC, abstractmethod
12
from typing import Any, Iterable, Type
23

34
import jmespath
@@ -7,6 +8,50 @@
78
from .context import ProviderContext
89
from .value_provider import ValueProvider, ValueProviderException
910

11+
# `QueryStrategy` is here to provide the seam for different optimizations
12+
# for executing jmespath queries. We can either execute a "fully fledged"
13+
# jmespath query or we can implement some simple access patterns that
14+
# are faster to execute. For example, if the expression is a simple key
15+
# lookup, we can just use the key directly instead of compiling the
16+
# jmespath expression and then executing it with all the weight and
17+
# overhead that comes with it.
18+
19+
20+
class QueryStrategy(ABC):
21+
@classmethod
22+
def from_string_expression(cls, expression: str):
23+
if expression.isalpha():
24+
return KeyLookup(expression)
25+
26+
compiled_query = jmespath.compile(expression)
27+
return ExecuteJmespath(compiled_query)
28+
29+
@abstractmethod
30+
def search(self, context: ProviderContext):
31+
pass
32+
33+
34+
class ExecuteJmespath(QueryStrategy):
35+
def __init__(self, compiled_query: ParsedResult) -> None:
36+
self.compiled_query = compiled_query
37+
38+
def search(self, context: ProviderContext):
39+
return self.compiled_query.search(context.document)
40+
41+
def __str__(self) -> str:
42+
return str(self.compiled_query.expression)
43+
44+
45+
class KeyLookup(QueryStrategy):
46+
def __init__(self, key: str) -> None:
47+
self.key = key
48+
49+
def search(self, context: ProviderContext):
50+
return context.document.get(self.key, None)
51+
52+
def __str__(self) -> str:
53+
return self.key
54+
1055

1156
class JmespathValueProvider(ValueProvider):
1257
"""A `ValueProvider` that uses JMESPath to extract values from a document."""
@@ -24,13 +69,13 @@ def install_yaml_tag(cls, loader: Type[SafeLoader]):
2469

2570
@classmethod
2671
def from_string_expression(cls, expression: str):
27-
return cls(jmespath.compile(expression))
72+
return cls(QueryStrategy.from_string_expression(expression))
2873

29-
def __init__(self, compiled_query: ParsedResult) -> None:
30-
self.compiled_query = compiled_query
74+
def __init__(self, strategy: QueryStrategy) -> None:
75+
self.strategy = strategy
3176

3277
def search(self, context: ProviderContext):
33-
raw_search = self.compiled_query.search(context.document)
78+
raw_search = self.strategy.search(context)
3479
if raw_search is None:
3580
return
3681
if isinstance(raw_search, list):
@@ -51,14 +96,12 @@ def many_values(self, context: ProviderContext) -> Iterable[Any]:
5196
raise ValueProviderException(str(context.document), self) from e
5297

5398
def __str__(self) -> str:
54-
return (
55-
f"JmespathValueProvider: { {'expression': self.compiled_query.expression} }"
56-
)
99+
return f"JmespathValueProvider: { {'expression': str(self.strategy)} }"
57100

58101

59102
SafeDumper.add_representer(
60103
JmespathValueProvider,
61104
lambda dumper, jmespath: dumper.represent_scalar(
62-
"!jmespath", jmespath.compiled_query.expression
105+
"!jmespath", str(jmespath.strategy)
63106
),
64107
)

tests/unit/pipeline/value_providers/test_jmespath_value_provider.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import jmespath
21
import pytest
32
from hamcrest import assert_that, equal_to, has_length, none
43

@@ -7,37 +6,37 @@
76

87

98
def test_single_value_present(blank_context_with_document):
10-
subject = JmespathValueProvider(jmespath.compile("team.name"))
9+
subject = JmespathValueProvider.from_string_expression("team.name")
1110
assert_that(
1211
subject.single_value(blank_context_with_document), equal_to("nodestream")
1312
)
1413

1514

1615
def test_single_value_missing(blank_context_with_document):
17-
subject = JmespathValueProvider(jmespath.compile("team.description"))
16+
subject = JmespathValueProvider.from_string_expression("team.description")
1817
assert_that(subject.single_value(blank_context_with_document), none())
1918

2019

2120
def test_single_value_is_list(blank_context_with_document):
22-
subject = JmespathValueProvider(jmespath.compile("project.tags"))
21+
subject = JmespathValueProvider.from_string_expression("project.tags")
2322
result = subject.single_value(blank_context_with_document)
2423
assert_that(result, equal_to("graphdb"))
2524

2625

2726
def test_multiple_values_missing(blank_context_with_document):
28-
subject = JmespathValueProvider(jmespath.compile("project.labels"))
27+
subject = JmespathValueProvider.from_string_expression("team.description")
2928
assert_that(list(subject.many_values(blank_context_with_document)), has_length(0))
3029

3130

3231
def test_multiple_values_returns_one_value(blank_context_with_document):
33-
subject = JmespathValueProvider(jmespath.compile("team.name"))
32+
subject = JmespathValueProvider.from_string_expression("team.name")
3433
result = list(subject.many_values(blank_context_with_document))
3534
assert_that(result, has_length(1))
3635
assert_that(result[0], equal_to("nodestream"))
3736

3837

3938
def test_multiple_values_hit(blank_context_with_document):
40-
subject = JmespathValueProvider(jmespath.compile("project.tags"))
39+
subject = JmespathValueProvider.from_string_expression("project.tags")
4140
result = subject.many_values(blank_context_with_document)
4241
assert_that(list(result), equal_to(["graphdb", "python"]))
4342

@@ -46,7 +45,7 @@ def test_single_value_error(blank_context_with_document):
4645
some_text_from_document = blank_context_with_document.document["team"]["name"]
4746
# this will error because team2 does not exist causing the join to throw an error
4847
expression_with_error = "join('/', [team.name || '', team2.name])"
49-
subject = JmespathValueProvider(jmespath.compile(expression_with_error))
48+
subject = JmespathValueProvider.from_string_expression(expression_with_error)
5049

5150
with pytest.raises(ValueProviderException) as e_info:
5251
subject.single_value(blank_context_with_document)
@@ -59,7 +58,7 @@ def test_single_value_error(blank_context_with_document):
5958
def test_multiple_values_error(blank_context_with_document):
6059
# this will error because team2 does not exist causing the join to throw an error
6160
expression_with_error = "join('/', [team.name || '', team2.name])"
62-
subject = JmespathValueProvider(jmespath.compile(expression_with_error))
61+
subject = JmespathValueProvider.from_string_expression(expression_with_error)
6362

6463
with pytest.raises(Exception) as e_info:
6564
generator = subject.many_values(blank_context_with_document)

0 commit comments

Comments
 (0)