22Author : zhangxianbing1
33Date : 2020-12-27 09:22:14
44LastEditors : zhangxianbing1
5- LastEditTime : 2021-01-04 12:40:59
5+ LastEditTime : 2021-01-04 14:10:00
66Description : JSONPath
77"""
88__version__ = "0.0.3"
99__author__ = "zhangxianbing"
1010
1111import json
12+ import os
1213import re
14+ import logging
1315from collections import defaultdict
1416from typing import Union
1517
16- RESULT_TYPE = {
17- "VALUE" : "A list of specific values." ,
18- "FIELD" : "A dict with specific fields." ,
19- "PATH" : "All path of specific values." ,
20- }
18+ # pylint: disable=invalid-name,missing-function-docstring,missing-class-docstring,eval-used,logging-fstring-interpolation
2119
2220
23- SEP = ";"
24- # regex patterns
25- REP_PICKUP_QUOTE = re .compile (r"['](.*?)[']" )
26- REP_PICKUP_BRACKET = re .compile (r"[\[](.*?)[\]]" )
27- REP_PUTBACK_QUOTE = re .compile (r"#Q(\d+)" )
28- REP_PUTBACK_BRACKET = re .compile (r"#B(\d+)" )
29- REP_DOUBLEDOT = re .compile (r"\.\." )
30- REP_DOT = re .compile (r"(?<!\.)\.(?!\.)" )
21+ def create_logger (name : str = None , level : Union [int , str ] = logging .INFO ):
22+ """Get or create a logger used for local debug."""
3123
32- # operators
33- REP_SLICE_CONTENT = re .compile (r"^(-?\d*)?:(-?\d*)?(:-?\d*)?$" )
34- REP_SELECT_CONTENT = re .compile (r"^([\w.]+)(,[\w.]+)+$" )
35- REP_FILTER_CONTENT = re .compile (
36- r"@\.(.*?)(?=<=|>=|==|!=|>|<| in| not| is)|len\(@\.(.*?)\)"
37- )
24+ formater = logging .Formatter (
25+ f"%(asctime)s-%(levelname)s-[{ name } ] %(message)s" , datefmt = "[%Y-%m-%d %H:%M:%S]"
26+ )
3827
39- # pylint: disable=invalid-name,missing-function-docstring,missing-class-docstring,eval-used
28+ handler = logging .StreamHandler ()
29+ handler .setLevel (level )
30+ handler .setFormatter (formater )
4031
32+ logger = logging .getLogger (name )
33+ logger .setLevel (level )
34+ logger .addHandler (handler )
35+
36+ return logger
4137
42- def _getattr (obj : dict , path : str ):
43- r = obj
44- for k in path .split ("." ):
45- try :
46- r = r .get (k )
47- except (AttributeError , KeyError ) as err :
48- print (err )
49- return None
5038
51- return r
39+ LOG = create_logger ( "jsonpath" , os . getenv ( "PYLOGLEVEL" , "INFO" ))
5240
5341
5442class ExprSyntaxError (Exception ):
5543 pass
5644
5745
5846class JSONPath :
47+ RESULT_TYPE = {
48+ "VALUE" : "A list of specific values." ,
49+ "FIELD" : "A dict with specific fields." ,
50+ "PATH" : "All path of specific values." ,
51+ }
52+
53+ SEP = ";"
54+ # regex patterns
55+ REP_PICKUP_QUOTE = re .compile (r"['](.*?)[']" )
56+ REP_PICKUP_BRACKET = re .compile (r"[\[](.*?)[\]]" )
57+ REP_PUTBACK_QUOTE = re .compile (r"#Q(\d+)" )
58+ REP_PUTBACK_BRACKET = re .compile (r"#B(\d+)" )
59+ REP_DOUBLEDOT = re .compile (r"\.\." )
60+ REP_DOT = re .compile (r"(?<!\.)\.(?!\.)" )
61+
62+ # operators
63+ REP_SLICE_CONTENT = re .compile (r"^(-?\d*)?:(-?\d*)?(:-?\d*)?$" )
64+ REP_SELECT_CONTENT = re .compile (r"^([\w.]+)(,[\w.]+)+$" )
65+ REP_FILTER_CONTENT = re .compile (
66+ r"@\.(.*?)(?=<=|>=|==|!=|>|<| in| not| is)|len\(@\.(.*?)\)"
67+ )
68+
5969 # annotations
6070 steps : list
6171 lpath : int
@@ -65,25 +75,40 @@ class JSONPath:
6575
6676 def __init__ (self , expr : str ):
6777 expr = self ._parse_expr (expr )
68- self .steps = expr .split (SEP )
78+ self .steps = expr .split (JSONPath . SEP )
6979 self .lpath = len (self .steps )
70- print (f"steps : { self .steps } " )
80+ LOG .debug (f"steps : { self .steps } " )
81+
82+ def parse (self , obj , result_type = "VALUE" ):
83+ if not isinstance (obj , (list , dict )):
84+ raise TypeError ("obj must be a list or a dict." )
85+ if result_type not in JSONPath .RESULT_TYPE :
86+ raise ValueError (
87+ f"result_type must be one of { tuple (JSONPath .RESULT_TYPE .keys ())} "
88+ )
89+ self .result_type = result_type
90+ if self .result_type == "FIELD" :
91+ self .result = {}
92+ else :
93+ self .result = []
94+
95+ self ._trace (obj , 0 , "$" )
96+
97+ return self .result
7198
7299 def _parse_expr (self , expr ):
73- if __debug__ :
74- print (f"before expr : { expr } " )
75-
76- expr = REP_PICKUP_QUOTE .sub (self ._f_pickup_quote , expr )
77- expr = REP_PICKUP_BRACKET .sub (self ._f_pickup_bracket , expr )
78- expr = REP_DOUBLEDOT .sub (f"{ SEP } ..{ SEP } " , expr )
79- expr = REP_DOT .sub (SEP , expr )
80- expr = REP_PUTBACK_BRACKET .sub (self ._f_putback_bracket , expr )
81- expr = REP_PUTBACK_QUOTE .sub (self ._f_putback_quote , expr )
100+ LOG .debug (f"before expr : { expr } " )
101+
102+ expr = JSONPath .REP_PICKUP_QUOTE .sub (self ._f_pickup_quote , expr )
103+ expr = JSONPath .REP_PICKUP_BRACKET .sub (self ._f_pickup_bracket , expr )
104+ expr = JSONPath .REP_DOUBLEDOT .sub (f"{ JSONPath .SEP } ..{ JSONPath .SEP } " , expr )
105+ expr = JSONPath .REP_DOT .sub (JSONPath .SEP , expr )
106+ expr = JSONPath .REP_PUTBACK_BRACKET .sub (self ._f_putback_bracket , expr )
107+ expr = JSONPath .REP_PUTBACK_QUOTE .sub (self ._f_putback_quote , expr )
82108 if expr .startswith ("$;" ):
83109 expr = expr [2 :]
84110
85- if __debug__ :
86- print (f"after expr : { expr } " )
111+ LOG .debug (f"after expr : { expr } " )
87112 return expr
88113
89114 def _f_pickup_quote (self , m ):
@@ -109,36 +134,43 @@ def _f_brackets(m):
109134 ret += '["%s"]' % e
110135 return ret
111136
112- def parse (self , obj , result_type = "VALUE" ):
113- if not isinstance (obj , (list , dict )):
114- raise TypeError ("obj must be a list or a dict." )
115- if result_type not in RESULT_TYPE :
116- raise ValueError (f"result_type must be one of { tuple (RESULT_TYPE .keys ())} " )
117- self .result_type = result_type
118- if self .result_type == "FIELD" :
119- self .result = {}
120- else :
121- self .result = []
122-
123- self ._trace (obj , 0 , "$" )
124-
125- return self .result
126-
127137 @staticmethod
128138 def _traverse (f , obj , i : int , path : str , * args ):
129139 if isinstance (obj , list ):
130140 for idx , v in enumerate (obj ):
131- f (v , i , f"{ path } { SEP } { idx } " , * args )
141+ f (v , i , f"{ path } { JSONPath . SEP } { idx } " , * args )
132142 elif isinstance (obj , dict ):
133143 for k , v in obj .items ():
134- f (v , i , f"{ path } { SEP } { k } " , * args )
144+ f (v , i , f"{ path } { JSONPath .SEP } { k } " , * args )
145+
146+ @staticmethod
147+ def _getattr (obj : dict , path : str ):
148+ r = obj
149+ for k in path .split ("." ):
150+ try :
151+ r = r .get (k )
152+ except (AttributeError , KeyError ) as err :
153+ LOG .error (err )
154+ return None
155+
156+ return r
157+
158+ @staticmethod
159+ def _sorter (obj , sortbys ):
160+ for sortby in sortbys .split ("," )[::- 1 ]:
161+ if sortby .startswith ("~" ):
162+ obj .sort (
163+ key = lambda t , k = sortby : JSONPath ._getattr (t [1 ], k [1 :]), reverse = True
164+ )
165+ else :
166+ obj .sort (key = lambda t , k = sortby : JSONPath ._getattr (t [1 ], k ))
135167
136168 def _filter (self , obj , i : int , path : str , step : str ):
137169 r = False
138170 try :
139171 r = eval (step , None , {"__obj" : obj })
140172 except Exception as err :
141- print (err )
173+ LOG . error (err )
142174 if r :
143175 self ._trace (obj , i , path )
144176
@@ -158,7 +190,7 @@ def _trace(self, obj, i: int, path):
158190 self .result .append (path )
159191 elif self .result_type == "FIELD" :
160192 pass
161- print ( obj )
193+ LOG . debug ( f"path: { path } | value: { obj } " )
162194 return
163195
164196 step = self .steps [i ]
@@ -178,61 +210,55 @@ def _trace(self, obj, i: int, path):
178210 if isinstance (obj , list ) and step .isdigit ():
179211 ikey = int (step )
180212 if ikey < len (obj ):
181- self ._trace (obj [ikey ], i + 1 , f"{ path } { SEP } { step } " )
213+ self ._trace (obj [ikey ], i + 1 , f"{ path } { JSONPath . SEP } { step } " )
182214 return
183215
184216 # get value from dict
185217 if isinstance (obj , dict ) and step in obj :
186- self ._trace (obj [step ], i + 1 , f"{ path } { SEP } { step } " )
218+ self ._trace (obj [step ], i + 1 , f"{ path } { JSONPath . SEP } { step } " )
187219 return
188220
189221 # slice
190- if isinstance (obj , list ) and REP_SLICE_CONTENT .fullmatch (step ):
222+ if isinstance (obj , list ) and JSONPath .REP_SLICE_CONTENT .fullmatch (step ):
223+ obj = [(idx , v ) for idx , v in enumerate (obj )]
191224 vals = eval (f"obj[{ step } ]" )
192- for idx , v in enumerate ( vals ) :
193- self ._trace (v , i + 1 , f"{ path } { SEP } { idx } " )
225+ for idx , v in vals :
226+ self ._trace (v , i + 1 , f"{ path } { JSONPath . SEP } { idx } " )
194227 return
195228
196229 # select
197- if isinstance (obj , dict ) and REP_SELECT_CONTENT .fullmatch (step ):
230+ if isinstance (obj , dict ) and JSONPath . REP_SELECT_CONTENT .fullmatch (step ):
198231 for k in step .split ("," ):
199232 if k in obj :
200- self ._trace (obj [k ], i + 1 , f"{ path } { SEP } { k } " )
233+ self ._trace (obj [k ], i + 1 , f"{ path } { JSONPath . SEP } { k } " )
201234 return
202235
203236 # filter
204237 if step .startswith ("?(" ) and step .endswith (")" ):
205238 step = step [2 :- 1 ]
206- step = REP_FILTER_CONTENT .sub (self ._f_brackets , step )
239+ step = JSONPath . REP_FILTER_CONTENT .sub (self ._f_brackets , step )
207240 self ._traverse (self ._filter , obj , i + 1 , path , step )
208241 return
209242
210243 # sort
211244 if step .startswith ("/(" ) and step .endswith (")" ):
212245 if isinstance (obj , list ):
213- for sortby in step [2 :- 1 ].split ("," )[::- 1 ]:
214- if sortby .startswith ("~" ):
215- obj .sort (
216- key = lambda t , k = sortby : _getattr (t , k [1 :]), reverse = True
217- )
218- else :
219- obj .sort (key = lambda t , k = sortby : _getattr (t , k ))
246+ obj = [(idx , v ) for idx , v in enumerate (obj )]
247+ self ._sorter (obj , step [2 :- 1 ])
248+ for idx , v in obj :
249+ self ._trace (v , i + 1 , f"{ path } { JSONPath .SEP } { idx } " )
220250 elif isinstance (obj , dict ):
221251 obj = [(k , v ) for k , v in obj .items ()]
222- for sortby in step [2 :- 1 ].split ("," )[::- 1 ]:
223- if sortby .startswith ("~" ):
224- obj .sort (
225- key = lambda t , k = sortby : _getattr (t [1 ], k [1 :]), reverse = True
226- )
227- else :
228- obj .sort (key = lambda t , k = sortby : _getattr (t [1 ], k ))
229- obj = {k : v for k , v in obj }
230- self ._traverse (self ._trace , obj , i + 1 , path )
252+ self ._sorter (obj , step [2 :- 1 ])
253+ for k , v in obj :
254+ self ._trace (v , i + 1 , f"{ path } { JSONPath .SEP } { k } " )
255+ else :
256+ raise ExprSyntaxError ("sort operate must acting on list or dict" )
231257 return
232258
233259
234260if __name__ == "__main__" :
235261 with open ("test/data/2.json" , "rb" ) as f :
236262 d = json .load (f )
237- D = JSONPath ("$.scores [/(score )].score " ).parse (d , "PATH" )
263+ D = JSONPath ("$.book [/(price )].price " ).parse (d , "PATH" )
238264 print (D )
0 commit comments