Skip to content

Commit 18059d7

Browse files
committed
fix: fix path mode & add unit tests
1 parent 3e2bcda commit 18059d7

File tree

3 files changed

+183
-111
lines changed

3 files changed

+183
-111
lines changed

jsonpath/__init__.py

Lines changed: 113 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2,60 +2,70 @@
22
Author : zhangxianbing1
33
Date : 2020-12-27 09:22:14
44
LastEditors : zhangxianbing1
5-
LastEditTime : 2021-01-04 12:40:59
5+
LastEditTime : 2021-01-04 14:10:00
66
Description : JSONPath
77
"""
88
__version__ = "0.0.3"
99
__author__ = "zhangxianbing"
1010

1111
import json
12+
import os
1213
import re
14+
import logging
1315
from collections import defaultdict
1416
from typing import Union
1517

16-
RESULT_TYPE = {
17-
"VALUE": "A list of specific values.",
18-
"FIELD": "A dict with specific fields.",
19-
"PATH": "All path of specific values.",
20-
}
18+
# pylint: disable=invalid-name,missing-function-docstring,missing-class-docstring,eval-used,logging-fstring-interpolation
2119

2220

23-
SEP = ";"
24-
# regex patterns
25-
REP_PICKUP_QUOTE = re.compile(r"['](.*?)[']")
26-
REP_PICKUP_BRACKET = re.compile(r"[\[](.*?)[\]]")
27-
REP_PUTBACK_QUOTE = re.compile(r"#Q(\d+)")
28-
REP_PUTBACK_BRACKET = re.compile(r"#B(\d+)")
29-
REP_DOUBLEDOT = re.compile(r"\.\.")
30-
REP_DOT = re.compile(r"(?<!\.)\.(?!\.)")
21+
def create_logger(name: str = None, level: Union[int, str] = logging.INFO):
22+
"""Get or create a logger used for local debug."""
3123

32-
# operators
33-
REP_SLICE_CONTENT = re.compile(r"^(-?\d*)?:(-?\d*)?(:-?\d*)?$")
34-
REP_SELECT_CONTENT = re.compile(r"^([\w.]+)(,[\w.]+)+$")
35-
REP_FILTER_CONTENT = re.compile(
36-
r"@\.(.*?)(?=<=|>=|==|!=|>|<| in| not| is)|len\(@\.(.*?)\)"
37-
)
24+
formater = logging.Formatter(
25+
f"%(asctime)s-%(levelname)s-[{name}] %(message)s", datefmt="[%Y-%m-%d %H:%M:%S]"
26+
)
3827

39-
# pylint: disable=invalid-name,missing-function-docstring,missing-class-docstring,eval-used
28+
handler = logging.StreamHandler()
29+
handler.setLevel(level)
30+
handler.setFormatter(formater)
4031

32+
logger = logging.getLogger(name)
33+
logger.setLevel(level)
34+
logger.addHandler(handler)
35+
36+
return logger
4137

42-
def _getattr(obj: dict, path: str):
43-
r = obj
44-
for k in path.split("."):
45-
try:
46-
r = r.get(k)
47-
except (AttributeError, KeyError) as err:
48-
print(err)
49-
return None
5038

51-
return r
39+
LOG = create_logger("jsonpath", os.getenv("PYLOGLEVEL", "INFO"))
5240

5341

5442
class ExprSyntaxError(Exception):
5543
pass
5644

5745

5846
class JSONPath:
47+
RESULT_TYPE = {
48+
"VALUE": "A list of specific values.",
49+
"FIELD": "A dict with specific fields.",
50+
"PATH": "All path of specific values.",
51+
}
52+
53+
SEP = ";"
54+
# regex patterns
55+
REP_PICKUP_QUOTE = re.compile(r"['](.*?)[']")
56+
REP_PICKUP_BRACKET = re.compile(r"[\[](.*?)[\]]")
57+
REP_PUTBACK_QUOTE = re.compile(r"#Q(\d+)")
58+
REP_PUTBACK_BRACKET = re.compile(r"#B(\d+)")
59+
REP_DOUBLEDOT = re.compile(r"\.\.")
60+
REP_DOT = re.compile(r"(?<!\.)\.(?!\.)")
61+
62+
# operators
63+
REP_SLICE_CONTENT = re.compile(r"^(-?\d*)?:(-?\d*)?(:-?\d*)?$")
64+
REP_SELECT_CONTENT = re.compile(r"^([\w.]+)(,[\w.]+)+$")
65+
REP_FILTER_CONTENT = re.compile(
66+
r"@\.(.*?)(?=<=|>=|==|!=|>|<| in| not| is)|len\(@\.(.*?)\)"
67+
)
68+
5969
# annotations
6070
steps: list
6171
lpath: int
@@ -65,25 +75,40 @@ class JSONPath:
6575

6676
def __init__(self, expr: str):
6777
expr = self._parse_expr(expr)
68-
self.steps = expr.split(SEP)
78+
self.steps = expr.split(JSONPath.SEP)
6979
self.lpath = len(self.steps)
70-
print(f"steps : {self.steps}")
80+
LOG.debug(f"steps : {self.steps}")
81+
82+
def parse(self, obj, result_type="VALUE"):
83+
if not isinstance(obj, (list, dict)):
84+
raise TypeError("obj must be a list or a dict.")
85+
if result_type not in JSONPath.RESULT_TYPE:
86+
raise ValueError(
87+
f"result_type must be one of {tuple(JSONPath.RESULT_TYPE.keys())}"
88+
)
89+
self.result_type = result_type
90+
if self.result_type == "FIELD":
91+
self.result = {}
92+
else:
93+
self.result = []
94+
95+
self._trace(obj, 0, "$")
96+
97+
return self.result
7198

7299
def _parse_expr(self, expr):
73-
if __debug__:
74-
print(f"before expr : {expr}")
75-
76-
expr = REP_PICKUP_QUOTE.sub(self._f_pickup_quote, expr)
77-
expr = REP_PICKUP_BRACKET.sub(self._f_pickup_bracket, expr)
78-
expr = REP_DOUBLEDOT.sub(f"{SEP}..{SEP}", expr)
79-
expr = REP_DOT.sub(SEP, expr)
80-
expr = REP_PUTBACK_BRACKET.sub(self._f_putback_bracket, expr)
81-
expr = REP_PUTBACK_QUOTE.sub(self._f_putback_quote, expr)
100+
LOG.debug(f"before expr : {expr}")
101+
102+
expr = JSONPath.REP_PICKUP_QUOTE.sub(self._f_pickup_quote, expr)
103+
expr = JSONPath.REP_PICKUP_BRACKET.sub(self._f_pickup_bracket, expr)
104+
expr = JSONPath.REP_DOUBLEDOT.sub(f"{JSONPath.SEP}..{JSONPath.SEP}", expr)
105+
expr = JSONPath.REP_DOT.sub(JSONPath.SEP, expr)
106+
expr = JSONPath.REP_PUTBACK_BRACKET.sub(self._f_putback_bracket, expr)
107+
expr = JSONPath.REP_PUTBACK_QUOTE.sub(self._f_putback_quote, expr)
82108
if expr.startswith("$;"):
83109
expr = expr[2:]
84110

85-
if __debug__:
86-
print(f"after expr : {expr}")
111+
LOG.debug(f"after expr : {expr}")
87112
return expr
88113

89114
def _f_pickup_quote(self, m):
@@ -109,36 +134,43 @@ def _f_brackets(m):
109134
ret += '["%s"]' % e
110135
return ret
111136

112-
def parse(self, obj, result_type="VALUE"):
113-
if not isinstance(obj, (list, dict)):
114-
raise TypeError("obj must be a list or a dict.")
115-
if result_type not in RESULT_TYPE:
116-
raise ValueError(f"result_type must be one of {tuple(RESULT_TYPE.keys())}")
117-
self.result_type = result_type
118-
if self.result_type == "FIELD":
119-
self.result = {}
120-
else:
121-
self.result = []
122-
123-
self._trace(obj, 0, "$")
124-
125-
return self.result
126-
127137
@staticmethod
128138
def _traverse(f, obj, i: int, path: str, *args):
129139
if isinstance(obj, list):
130140
for idx, v in enumerate(obj):
131-
f(v, i, f"{path}{SEP}{idx}", *args)
141+
f(v, i, f"{path}{JSONPath.SEP}{idx}", *args)
132142
elif isinstance(obj, dict):
133143
for k, v in obj.items():
134-
f(v, i, f"{path}{SEP}{k}", *args)
144+
f(v, i, f"{path}{JSONPath.SEP}{k}", *args)
145+
146+
@staticmethod
147+
def _getattr(obj: dict, path: str):
148+
r = obj
149+
for k in path.split("."):
150+
try:
151+
r = r.get(k)
152+
except (AttributeError, KeyError) as err:
153+
LOG.error(err)
154+
return None
155+
156+
return r
157+
158+
@staticmethod
159+
def _sorter(obj, sortbys):
160+
for sortby in sortbys.split(",")[::-1]:
161+
if sortby.startswith("~"):
162+
obj.sort(
163+
key=lambda t, k=sortby: JSONPath._getattr(t[1], k[1:]), reverse=True
164+
)
165+
else:
166+
obj.sort(key=lambda t, k=sortby: JSONPath._getattr(t[1], k))
135167

136168
def _filter(self, obj, i: int, path: str, step: str):
137169
r = False
138170
try:
139171
r = eval(step, None, {"__obj": obj})
140172
except Exception as err:
141-
print(err)
173+
LOG.error(err)
142174
if r:
143175
self._trace(obj, i, path)
144176

@@ -158,7 +190,7 @@ def _trace(self, obj, i: int, path):
158190
self.result.append(path)
159191
elif self.result_type == "FIELD":
160192
pass
161-
print(obj)
193+
LOG.debug(f"path: {path} | value: {obj}")
162194
return
163195

164196
step = self.steps[i]
@@ -178,61 +210,55 @@ def _trace(self, obj, i: int, path):
178210
if isinstance(obj, list) and step.isdigit():
179211
ikey = int(step)
180212
if ikey < len(obj):
181-
self._trace(obj[ikey], i + 1, f"{path}{SEP}{step}")
213+
self._trace(obj[ikey], i + 1, f"{path}{JSONPath.SEP}{step}")
182214
return
183215

184216
# get value from dict
185217
if isinstance(obj, dict) and step in obj:
186-
self._trace(obj[step], i + 1, f"{path}{SEP}{step}")
218+
self._trace(obj[step], i + 1, f"{path}{JSONPath.SEP}{step}")
187219
return
188220

189221
# slice
190-
if isinstance(obj, list) and REP_SLICE_CONTENT.fullmatch(step):
222+
if isinstance(obj, list) and JSONPath.REP_SLICE_CONTENT.fullmatch(step):
223+
obj = [(idx, v) for idx, v in enumerate(obj)]
191224
vals = eval(f"obj[{step}]")
192-
for idx, v in enumerate(vals):
193-
self._trace(v, i + 1, f"{path}{SEP}{idx}")
225+
for idx, v in vals:
226+
self._trace(v, i + 1, f"{path}{JSONPath.SEP}{idx}")
194227
return
195228

196229
# select
197-
if isinstance(obj, dict) and REP_SELECT_CONTENT.fullmatch(step):
230+
if isinstance(obj, dict) and JSONPath.REP_SELECT_CONTENT.fullmatch(step):
198231
for k in step.split(","):
199232
if k in obj:
200-
self._trace(obj[k], i + 1, f"{path}{SEP}{k}")
233+
self._trace(obj[k], i + 1, f"{path}{JSONPath.SEP}{k}")
201234
return
202235

203236
# filter
204237
if step.startswith("?(") and step.endswith(")"):
205238
step = step[2:-1]
206-
step = REP_FILTER_CONTENT.sub(self._f_brackets, step)
239+
step = JSONPath.REP_FILTER_CONTENT.sub(self._f_brackets, step)
207240
self._traverse(self._filter, obj, i + 1, path, step)
208241
return
209242

210243
# sort
211244
if step.startswith("/(") and step.endswith(")"):
212245
if isinstance(obj, list):
213-
for sortby in step[2:-1].split(",")[::-1]:
214-
if sortby.startswith("~"):
215-
obj.sort(
216-
key=lambda t, k=sortby: _getattr(t, k[1:]), reverse=True
217-
)
218-
else:
219-
obj.sort(key=lambda t, k=sortby: _getattr(t, k))
246+
obj = [(idx, v) for idx, v in enumerate(obj)]
247+
self._sorter(obj, step[2:-1])
248+
for idx, v in obj:
249+
self._trace(v, i + 1, f"{path}{JSONPath.SEP}{idx}")
220250
elif isinstance(obj, dict):
221251
obj = [(k, v) for k, v in obj.items()]
222-
for sortby in step[2:-1].split(",")[::-1]:
223-
if sortby.startswith("~"):
224-
obj.sort(
225-
key=lambda t, k=sortby: _getattr(t[1], k[1:]), reverse=True
226-
)
227-
else:
228-
obj.sort(key=lambda t, k=sortby: _getattr(t[1], k))
229-
obj = {k: v for k, v in obj}
230-
self._traverse(self._trace, obj, i + 1, path)
252+
self._sorter(obj, step[2:-1])
253+
for k, v in obj:
254+
self._trace(v, i + 1, f"{path}{JSONPath.SEP}{k}")
255+
else:
256+
raise ExprSyntaxError("sort operate must acting on list or dict")
231257
return
232258

233259

234260
if __name__ == "__main__":
235261
with open("test/data/2.json", "rb") as f:
236262
d = json.load(f)
237-
D = JSONPath("$.scores[/(score)].score").parse(d, "PATH")
263+
D = JSONPath("$.book[/(price)].price").parse(d, "PATH")
238264
print(D)

0 commit comments

Comments
 (0)