Skip to content

Commit c3fc7b0

Browse files
committed
feat: add jsonpath
1 parent 6f05267 commit c3fc7b0

File tree

5 files changed

+2700
-1
lines changed

5 files changed

+2700
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,4 @@ dmypy.json
127127

128128
# Pyre type checker
129129
.pyre/
130+
.vscode

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
11
# jsonpath-python
2-
A JSONPath implementations for python. (More powerful and modern!!!)
2+
3+
A more powerful JSONPath implementations in modern python.
4+
5+
## Features
6+
7+
- [x] Light. (No need to install third-party dependencies.)
8+
- [ ] Support fields-extractor.
9+
- [ ] Support simple multi-selection and inverse-selection.

jsonpath/__init__.py

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
"""
2+
Author : zhangxianbing1
3+
Date : 2020-12-27 09:22:14
4+
LastEditors : zhangxianbing1
5+
LastEditTime : 2020-12-27 12:48:15
6+
Description :
7+
"""
8+
__version__ = "1.0.0"
9+
__author__ = "zhangxianbing"
10+
11+
12+
import json
13+
import re
14+
import sys
15+
16+
# XXX BUGS:
17+
# evalx is generally a crock:
18+
# handle [email protected]???
19+
# there are probably myriad unexpected ways to get an exception:
20+
# wrap initial "trace" call in jsonpath body in a try/except??
21+
22+
# XXX TODO:
23+
# internally keep paths as lists to preserve integer types
24+
# (instead of as ';' delimited strings)
25+
26+
__all__ = ["jsonpath"]
27+
28+
29+
def normalize(x):
30+
"""normalize the path expression; outside jsonpath to allow testing"""
31+
subx = []
32+
33+
# replace index/filter expressions with placeholders
34+
# Python anonymous functions (lambdas) are cryptic, hard to debug
35+
def f1(m):
36+
n = len(subx) # before append
37+
g1 = m.group(1)
38+
subx.append(g1)
39+
ret = "[#%d]" % n
40+
# print("f1:", g1, ret)
41+
return ret
42+
43+
x = re.sub(r"[\['](\??\(.*?\))[\]']", f1, x)
44+
45+
# added the negative lookbehind -krhodes
46+
x = re.sub(r"'?(?<!@)\.'?|\['?", ";", x)
47+
48+
x = re.sub(r";;;|;;", ";..;", x)
49+
50+
x = re.sub(r";$|'?\]|'$", "", x)
51+
52+
# put expressions back
53+
def f2(m):
54+
g1 = m.group(1)
55+
# print("f2:", g1)
56+
return subx[int(g1)]
57+
58+
x = re.sub(r"#([0-9]+)", f2, x)
59+
60+
return x
61+
62+
63+
def jsonpath(obj, expr, result_type="VALUE", debug=0, use_eval=True):
64+
"""traverse JSON object using jsonpath expr, returning values or paths"""
65+
66+
def s(x, y):
67+
"""concatenate path elements"""
68+
return str(x) + ";" + str(y)
69+
70+
def isint(x):
71+
"""check if argument represents a decimal integer"""
72+
return x.isdigit()
73+
74+
def as_path(path):
75+
"""convert internal path representation to
76+
"full bracket notation" for PATH output"""
77+
p = "$"
78+
for piece in path.split(";")[1:]:
79+
# make a guess on how to index
80+
# XXX need to apply \ quoting on '!!
81+
if isint(piece):
82+
p += "[%s]" % piece
83+
else:
84+
p += "['%s']" % piece
85+
return p
86+
87+
def store(path, object):
88+
if result_type == "VALUE":
89+
result.append(object)
90+
elif result_type == "IPATH": # Index format path (Python ext)
91+
# return list of list of indices -- can be used w/o "eval" or split
92+
result.append(path.split(";")[1:])
93+
else: # PATH
94+
result.append(as_path(path))
95+
return path
96+
97+
def trace(expr, obj, path):
98+
if debug:
99+
print("trace", expr, "/", path)
100+
if expr:
101+
x = expr.split(";")
102+
loc = x[0]
103+
x = ";".join(x[1:])
104+
if debug:
105+
print("\t", loc, type(obj))
106+
if loc == "*":
107+
108+
def f03(key, loc, expr, obj, path):
109+
if debug > 1:
110+
print("\tf03", key, loc, expr, path)
111+
trace(s(key, expr), obj, path)
112+
113+
walk(loc, x, obj, path, f03)
114+
elif loc == "..":
115+
trace(x, obj, path)
116+
117+
def f04(key, loc, expr, obj, path):
118+
if debug > 1:
119+
print("\tf04", key, loc, expr, path)
120+
if isinstance(obj, dict):
121+
if key in obj:
122+
trace(s("..", expr), obj[key], s(path, key))
123+
else:
124+
if key < len(obj):
125+
trace(s("..", expr), obj[key], s(path, key))
126+
127+
walk(loc, x, obj, path, f04)
128+
elif loc == "!":
129+
# Perl jsonpath extension: return keys
130+
def f06(key, loc, expr, obj, path):
131+
if isinstance(obj, dict):
132+
trace(expr, key, path)
133+
134+
walk(loc, x, obj, path, f06)
135+
elif isinstance(obj, dict) and loc in obj:
136+
trace(x, obj[loc], s(path, loc))
137+
elif isinstance(obj, list) and isint(loc):
138+
iloc = int(loc)
139+
if debug:
140+
print("----->", iloc, len(obj))
141+
if len(obj) > iloc:
142+
trace(x, obj[iloc], s(path, loc))
143+
else:
144+
# [(index_expression)]
145+
if loc.startswith("(") and loc.endswith(")"):
146+
if debug > 1:
147+
print("index", loc)
148+
e = evalx(loc, obj)
149+
trace(s(e, x), obj, path)
150+
return
151+
152+
# ?(filter_expression)
153+
if loc.startswith("?(") and loc.endswith(")"):
154+
if debug > 1:
155+
print("filter", loc)
156+
157+
def f05(key, loc, expr, obj, path):
158+
if debug > 1:
159+
print("f05", key, loc, expr, path)
160+
if isinstance(obj, dict):
161+
eval_result = evalx(loc, obj[key])
162+
else:
163+
eval_result = evalx(loc, obj[int(key)])
164+
if eval_result:
165+
trace(s(key, expr), obj, path)
166+
167+
loc = loc[2:-1]
168+
walk(loc, x, obj, path, f05)
169+
return
170+
171+
m = re.match(r"(-?[0-9]*):(-?[0-9]*):?(-?[0-9]*)$", loc)
172+
if m:
173+
if isinstance(obj, (dict, list)):
174+
175+
def max(x, y):
176+
if x > y:
177+
return x
178+
return y
179+
180+
def min(x, y):
181+
if x < y:
182+
return x
183+
return y
184+
185+
objlen = len(obj)
186+
s0 = m.group(1)
187+
s1 = m.group(2)
188+
s2 = m.group(3)
189+
190+
# XXX int("badstr") raises exception
191+
start = int(s0) if s0 else 0
192+
end = int(s1) if s1 else objlen
193+
step = int(s2) if s2 else 1
194+
195+
if start < 0:
196+
start = max(0, start + objlen)
197+
else:
198+
start = min(objlen, start)
199+
if end < 0:
200+
end = max(0, end + objlen)
201+
else:
202+
end = min(objlen, end)
203+
204+
for i in range(start, end, step):
205+
trace(s(i, x), obj, path)
206+
return
207+
208+
# after (expr) & ?(expr)
209+
if loc.find(",") >= 0:
210+
# [index,index....]
211+
for piece in re.split(r"'?,'?", loc):
212+
if debug > 1:
213+
print("piece", piece)
214+
trace(s(piece, x), obj, path)
215+
else:
216+
store(path, obj)
217+
218+
def walk(loc, expr, obj, path, funct):
219+
if isinstance(obj, list):
220+
for i in range(0, len(obj)):
221+
funct(i, loc, expr, obj, path)
222+
elif isinstance(obj, dict):
223+
for key in obj:
224+
funct(key, loc, expr, obj, path)
225+
226+
def evalx(loc, obj):
227+
"""eval expression"""
228+
229+
if debug:
230+
print("evalx", loc)
231+
232+
# a nod to JavaScript. doesn't work for @.name.name.length
233+
# Write len(@.name.name) instead!!!
234+
loc = loc.replace("@.length", "len(__obj)")
235+
236+
loc = loc.replace("&&", " and ").replace("||", " or ")
237+
238+
# replace [email protected] with 'name' not in obj
239+
# XXX handle [email protected]....
240+
def notvar(m):
241+
return "'%s' not in __obj" % m.group(1)
242+
243+
loc = re.sub("!@\.([a-zA-Z@_0-9-]*)", notvar, loc)
244+
245+
# replace @.name.... with __obj['name']....
246+
# handle @.name[.name...].length
247+
def varmatch(m):
248+
def brackets(elts):
249+
ret = "__obj"
250+
for e in elts:
251+
if isint(e):
252+
ret += "[%s]" % e # ain't necessarily so
253+
else:
254+
ret += "['%s']" % e # XXX beware quotes!!!!
255+
return ret
256+
257+
g1 = m.group(1)
258+
elts = g1.split(".")
259+
if elts[-1] == "length":
260+
return "len(%s)" % brackets(elts[1:-1])
261+
return brackets(elts[1:])
262+
263+
loc = re.sub(r"(?<!\\)(@\.[a-zA-Z@_.0-9]+)", varmatch, loc)
264+
265+
# removed = -> == translation
266+
# causes problems if a string contains =
267+
268+
# replace @ w/ "__obj", but \@ means a literal @
269+
loc = re.sub(r"(?<!\\)@", "__obj", loc).replace(r"\@", "@")
270+
if not use_eval:
271+
if debug:
272+
print("eval disabled")
273+
raise Exception("eval disabled")
274+
if debug:
275+
print("eval", loc)
276+
try:
277+
# eval w/ caller globals, w/ local "__obj"!
278+
v = eval(loc, caller_globals, {"__obj": obj})
279+
except Exception as e:
280+
if debug:
281+
print(repr(e))
282+
return False
283+
284+
if debug:
285+
print("->", v)
286+
return v
287+
288+
# body of jsonpath()
289+
290+
# Get caller globals so eval can pick up user functions!!!
291+
caller_globals = sys._getframe(1).f_globals
292+
result = []
293+
if expr and obj:
294+
cleaned_expr = normalize(expr)
295+
if cleaned_expr.startswith("$;"):
296+
cleaned_expr = cleaned_expr[2:]
297+
298+
# XXX wrap this in a try??
299+
trace(cleaned_expr, obj, "$")
300+
301+
if len(result) > 0:
302+
return result
303+
return False
304+
305+
306+
if __name__ == "__main__":
307+
with open("tests/data.json", "rb") as f:
308+
city_dict = json.load(f)
309+
D = jsonpath(
310+
city_dict,
311+
# expr="$.content.data.allCitySearchLabels[A,D][?(@.id>=700 and @.id<=800)]",
312+
expr="$..id",
313+
)
314+
for i in D:
315+
print(i)

setup.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""
2+
Author : zhangxianbing1
3+
Date : 2020-12-27 11:32:20
4+
LastEditors : zhangxianbing1
5+
LastEditTime : 2020-12-27 12:42:18
6+
Description :
7+
"""
8+
from setuptools import setup, find_packages
9+
from jsonpath import __version__, __author__
10+
11+
setup(
12+
name="jsonpath",
13+
version=__version__,
14+
author=__author__,
15+
packages=find_packages(),
16+
python_requires=">=3.6",
17+
zip_safe=False,
18+
)

0 commit comments

Comments
 (0)