Skip to content

Commit 1c06da5

Browse files
committed
Refactor re caching in to abstrac base class
1 parent 4409905 commit 1c06da5

File tree

6 files changed

+162
-155
lines changed

6 files changed

+162
-155
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ These breaking changes apply to Python JSONPath in its default configuration. We
1515

1616
- Added the `startswith(value, prefix)` function extension. `startswith` returns `True` if both arguments are strings and the second argument is a prefix of the first argument. See the [filter functions](https://jg-rp.github.io/python-jsonpath/functions/#startswith) documentation.
1717
- The non-standard `keys()` function extension has been reimplemented. It used to be a simple Python function, `jsonpath.function_extensions.keys`. Now it is a "well-typed" class, `jsonpath.function_extensions.Keys`. See the [filter functions](https://jg-rp.github.io/python-jsonpath/functions/#keys) documentation.
18+
- Added `cache_capacity`, `debug` and `thread_safe` arguments to `jsonpath.function_extensions.Match` and `jsonpath.function_extensions.Search` constructors.
1819

1920
**JSONPath features**
2021

jsonpath/function_extensions/_pattern.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,88 @@
11
from typing import List
2+
from typing import Optional
3+
4+
try:
5+
import regex as re
6+
7+
REGEX_AVAILABLE = True
8+
except ImportError:
9+
import re # type: ignore
10+
11+
REGEX_AVAILABLE = False
12+
13+
try:
14+
from iregexp_check import check
15+
16+
IREGEXP_AVAILABLE = True
17+
except ImportError:
18+
IREGEXP_AVAILABLE = False
19+
20+
from jsonpath.exceptions import JSONPathError
21+
from jsonpath.function_extensions import ExpressionType
22+
from jsonpath.function_extensions import FilterFunction
23+
from jsonpath.lru_cache import LRUCache
24+
from jsonpath.lru_cache import ThreadSafeLRUCache
25+
26+
27+
class AbstractRegexFilterFunction(FilterFunction):
28+
"""Base class for filter function that accept regular expression arguments.
29+
30+
Arguments:
31+
cache_capacity: The size of the regular expression cache.
32+
debug: When `True`, raise an exception when regex pattern compilation
33+
fails. The default - as required by RFC 9535 - is `False`, which
34+
silently ignores bad patterns.
35+
thread_safe: When `True`, use a `ThreadSafeLRUCache` instead of an
36+
instance of `LRUCache`.
37+
"""
38+
39+
arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
40+
return_type = ExpressionType.LOGICAL
41+
42+
def __init__(
43+
self,
44+
*,
45+
cache_capacity: int = 300,
46+
debug: bool = False,
47+
thread_safe: bool = False,
48+
):
49+
self.cache: LRUCache[str, Optional[re.Pattern[str]]] = (
50+
ThreadSafeLRUCache(capacity=cache_capacity)
51+
if thread_safe
52+
else LRUCache(capacity=cache_capacity)
53+
)
54+
55+
self.debug = debug
56+
57+
def check_cache(self, pattern: str) -> Optional[re.Pattern[str]]:
58+
"""Return a compiled re pattern if `pattern` is valid, or `None` otherwise."""
59+
try:
60+
_pattern = self.cache[pattern]
61+
except KeyError:
62+
if IREGEXP_AVAILABLE and not check(pattern):
63+
if self.debug:
64+
raise JSONPathError(
65+
"search pattern is not a valid I-Regexp", token=None
66+
) from None
67+
_pattern = None
68+
else:
69+
if REGEX_AVAILABLE:
70+
pattern = map_re(pattern)
71+
72+
try:
73+
_pattern = re.compile(pattern)
74+
except re.error:
75+
if self.debug:
76+
raise
77+
_pattern = None
78+
79+
self.cache[pattern] = _pattern
80+
81+
return _pattern
282

383

484
def map_re(pattern: str) -> str:
85+
"""Convert an I-Regexp pattern into a Python re pattern."""
586
escaped = False
687
char_class = False
788
parts: List[str] = []

jsonpath/function_extensions/match.py

Lines changed: 4 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,17 @@
11
"""The standard `match` function extension."""
22

3-
from typing import Optional
3+
from ._pattern import AbstractRegexFilterFunction
44

5-
try:
6-
import regex as re
75

8-
REGEX_AVAILABLE = True
9-
except ImportError:
10-
import re # type: ignore
11-
12-
REGEX_AVAILABLE = False
13-
14-
try:
15-
from iregexp_check import check
16-
17-
IREGEXP_AVAILABLE = True
18-
except ImportError:
19-
IREGEXP_AVAILABLE = False
20-
21-
from jsonpath.exceptions import JSONPathError
22-
from jsonpath.function_extensions import ExpressionType
23-
from jsonpath.function_extensions import FilterFunction
24-
from jsonpath.lru_cache import LRUCache
25-
from jsonpath.lru_cache import ThreadSafeLRUCache
26-
27-
from ._pattern import map_re
28-
29-
30-
class Match(FilterFunction):
31-
"""The standard `match` function.
32-
33-
Arguments:
34-
cache_capacity: The size of the regular expression cache.
35-
debug: When `True`, raise an exception when regex pattern compilation
36-
fails. The default - as required by RFC 9535 - is `False`, which
37-
silently ignores bad patterns.
38-
thread_safe: When `True`, use a `ThreadSafeLRUCache` instead of an
39-
instance of `LRUCache`.
40-
"""
41-
42-
arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
43-
return_type = ExpressionType.LOGICAL
44-
45-
def __init__(
46-
self,
47-
*,
48-
cache_capacity: int = 300,
49-
debug: bool = False,
50-
thread_safe: bool = False,
51-
):
52-
self._cache: LRUCache[str, Optional[re.Pattern[str]]] = (
53-
ThreadSafeLRUCache(capacity=cache_capacity)
54-
if thread_safe
55-
else LRUCache(capacity=cache_capacity)
56-
)
57-
58-
self.debug = debug
6+
class Match(AbstractRegexFilterFunction):
7+
"""The standard `match` function."""
598

609
def __call__(self, value: object, pattern: object) -> bool:
6110
"""Return `True` if _value_ matches _pattern_, or `False` otherwise."""
6211
if not isinstance(value, str) or not isinstance(pattern, str):
6312
return False
6413

65-
try:
66-
_pattern = self._cache[pattern]
67-
except KeyError:
68-
if IREGEXP_AVAILABLE and not check(pattern):
69-
if self.debug:
70-
raise JSONPathError(
71-
"search pattern is not a valid I-Regexp", token=None
72-
) from None
73-
_pattern = None
74-
else:
75-
if REGEX_AVAILABLE:
76-
pattern = map_re(pattern)
77-
78-
try:
79-
_pattern = re.compile(pattern)
80-
except re.error:
81-
if self.debug:
82-
raise
83-
_pattern = None
84-
85-
self._cache[pattern] = _pattern
14+
_pattern = self.check_cache(pattern)
8615

8716
if _pattern is None:
8817
return False

jsonpath/function_extensions/search.py

Lines changed: 6 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,17 @@
11
"""The standard `search` function extension."""
22

3-
from typing import Optional
3+
from ._pattern import AbstractRegexFilterFunction
44

5-
try:
6-
import regex as re
75

8-
REGEX_AVAILABLE = True
9-
except ImportError: # pragma: no cover
10-
import re # type: ignore
6+
class Search(AbstractRegexFilterFunction):
7+
"""The standard `search` function."""
118

12-
REGEX_AVAILABLE = False
13-
14-
try:
15-
from iregexp_check import check
16-
17-
IREGEXP_AVAILABLE = True
18-
except ImportError: # pragma: no cover
19-
IREGEXP_AVAILABLE = False
20-
21-
from jsonpath.exceptions import JSONPathError
22-
from jsonpath.function_extensions import ExpressionType
23-
from jsonpath.function_extensions import FilterFunction
24-
from jsonpath.lru_cache import LRUCache
25-
from jsonpath.lru_cache import ThreadSafeLRUCache
26-
27-
from ._pattern import map_re
28-
29-
30-
class Search(FilterFunction):
31-
"""The standard `search` function.
32-
33-
Arguments:
34-
cache_capacity: The size of the regular expression cache.
35-
debug: When `True`, raise an exception when regex pattern compilation
36-
fails. The default - as required by RFC 9535 - is `False`, which
37-
silently ignores bad patterns.
38-
thread_safe: When `True`, use a `ThreadSafeLRUCache` instead of an
39-
instance of `LRUCache`.
40-
"""
41-
42-
arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
43-
return_type = ExpressionType.LOGICAL
44-
45-
def __init__(
46-
self,
47-
*,
48-
cache_capacity: int = 300,
49-
debug: bool = False,
50-
thread_safe: bool = False,
51-
):
52-
self._cache: LRUCache[str, Optional[re.Pattern[str]]] = (
53-
ThreadSafeLRUCache(capacity=cache_capacity)
54-
if thread_safe
55-
else LRUCache(capacity=cache_capacity)
56-
)
57-
58-
self.debug = debug
59-
60-
def __call__(self, value: str, pattern: object) -> bool:
61-
"""Return `True` if _value_ contains _pattern_, or `False` otherwise."""
9+
def __call__(self, value: object, pattern: object) -> bool:
10+
"""Return `True` if _value_ matches _pattern_, or `False` otherwise."""
6211
if not isinstance(value, str) or not isinstance(pattern, str):
6312
return False
6413

65-
try:
66-
_pattern = self._cache[pattern]
67-
except KeyError:
68-
if IREGEXP_AVAILABLE and not check(pattern):
69-
if self.debug:
70-
raise JSONPathError(
71-
"search pattern is not a valid I-Regexp", token=None
72-
) from None
73-
_pattern = None
74-
else:
75-
if REGEX_AVAILABLE:
76-
pattern = map_re(pattern)
77-
78-
try:
79-
_pattern = re.compile(pattern)
80-
except re.error:
81-
if self.debug:
82-
raise
83-
_pattern = None
84-
85-
self._cache[pattern] = _pattern
14+
_pattern = self.check_cache(pattern)
8615

8716
if _pattern is None:
8817
return False

pyproject.toml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,14 @@ dependencies = [
5959
]
6060

6161
[tool.hatch.envs.default.scripts]
62-
cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=jsonpath --cov=tests {args}"
63-
cov-html = "pytest --cov-report=html --cov-config=pyproject.toml --cov=jsonpath --cov=tests {args}"
62+
cov = [
63+
"hatch run no-regex:cov",
64+
"pytest --cov-append --cov-report=term-missing --cov-config=pyproject.toml --cov=jsonpath --cov=tests {args}"
65+
]
66+
cov-html = [
67+
"hatch run no-regex:cov",
68+
"pytest --cov-append --cov-report=html --cov-config=pyproject.toml --cov=jsonpath --cov=tests {args}",
69+
]
6470
no-cov = "cov --no-cov {args}"
6571
test = "pytest {args}"
6672
lint = "ruff check ."
@@ -80,7 +86,11 @@ build = "mkdocs build --clean --strict"
8086
serve = "mkdocs serve --dev-addr localhost:8000"
8187

8288
[tool.hatch.envs.no-regex]
83-
dependencies = ["pytest"]
89+
dependencies = ["pytest", "pytest-cov"]
90+
91+
[tool.hatch.envs.no-regex.scripts]
92+
cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=jsonpath --cov=tests tests/test_compliance.py {args}"
93+
8494

8595
[tool.coverage.run]
8696
branch = true

tests/test_regex_cache.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
try:
2+
import iregexp_check # noqa: F401
3+
4+
IREGEXP_AVAILABLE = True
5+
except ImportError:
6+
IREGEXP_AVAILABLE = False
7+
8+
import pytest
9+
10+
from jsonpath import JSONPathError
11+
from jsonpath.function_extensions import Search
12+
13+
14+
def test_patterns_are_cached() -> None:
15+
search_func = Search(cache_capacity=2)
16+
assert len(search_func.cache) == 0
17+
assert search_func("abcdef", "bc.")
18+
assert len(search_func.cache) == 1
19+
20+
21+
def test_malformed_patterns_are_cached() -> None:
22+
search_func = Search(cache_capacity=2)
23+
assert len(search_func.cache) == 0
24+
assert search_func("abcdef", "bc[") is False
25+
assert len(search_func.cache) == 1
26+
assert search_func.cache["bc["] is None
27+
28+
29+
@pytest.mark.skipif(IREGEXP_AVAILABLE is False, reason="requires iregexp_check")
30+
def test_invalid_iregexp_patterns_are_cached() -> None:
31+
search_func = Search(cache_capacity=2)
32+
assert len(search_func.cache) == 0
33+
assert search_func("ab123cdef", "\\d+") is False
34+
assert len(search_func.cache) == 1
35+
assert search_func.cache["\\d+"] is None
36+
37+
38+
def test_debug_regex_patterns() -> None:
39+
search_func = Search(cache_capacity=2, debug=True)
40+
assert len(search_func.cache) == 0
41+
42+
with pytest.raises(JSONPathError):
43+
search_func("abcdef", "bc[")
44+
45+
46+
def test_cache_capacity() -> None:
47+
search_func = Search(cache_capacity=2)
48+
assert len(search_func.cache) == 0
49+
assert search_func("1abcdef", "ab[a-z]")
50+
assert len(search_func.cache) == 1
51+
assert search_func("2abcdef", "bc[a-z]")
52+
assert len(search_func.cache) == 2 # noqa: PLR2004
53+
assert search_func("3abcdef", "cd[a-z]")
54+
assert len(search_func.cache) == 2 # noqa: PLR2004
55+
assert "cd[a-z]" in search_func.cache
56+
assert "bc[a-z]" in search_func.cache
57+
assert "ab[a-z]" not in search_func.cache

0 commit comments

Comments
 (0)