feat: add jsonpath

sean2077 · sean2077 · commit c3fc7b0c4620 · 2020-12-27T12:49:14.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,4 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+.vscode
diff --git a/README.md b/README.md
@@ -1,2 +1,9 @@
 # jsonpath-python
-A JSONPath implementations for python. (More powerful and modern!!!)
+
+A more powerful JSONPath implementations in modern python.
+
+## Features
+
+- [x] Light. (No need to install third-party dependencies.)
+- [ ] Support fields-extractor.
+- [ ] Support simple multi-selection and inverse-selection.
diff --git a/jsonpath/__init__.py b/jsonpath/__init__.py
@@ -0,0 +1,315 @@
+"""
+Author       : zhangxianbing1
+Date         : 2020-12-27 09:22:14
+LastEditors  : zhangxianbing1
+LastEditTime : 2020-12-27 12:48:15
+Description  : 
+"""
+__version__ = "1.0.0"
+__author__ = "zhangxianbing"
+
+
+import json
+import re
+import sys
+
+# XXX BUGS:
+# evalx is generally a crock:
+#       handle !@.name.name???
+# there are probably myriad unexpected ways to get an exception:
+#       wrap initial "trace" call in jsonpath body in a try/except??
+
+# XXX TODO:
+# internally keep paths as lists to preserve integer types
+#       (instead of as ';' delimited strings)
+
+__all__ = ["jsonpath"]
+
+
+def normalize(x):
+    """normalize the path expression; outside jsonpath to allow testing"""
+    subx = []
+
+    # replace index/filter expressions with placeholders
+    # Python anonymous functions (lambdas) are cryptic, hard to debug
+    def f1(m):
+        n = len(subx)  # before append
+        g1 = m.group(1)
+        subx.append(g1)
+        ret = "[#%d]" % n
+        #       print("f1:", g1, ret)
+        return ret
+
+    x = re.sub(r"[\['](\??\(.*?\))[\]']", f1, x)
+
+    # added the negative lookbehind -krhodes
+    x = re.sub(r"'?(?<!@)\.'?|\['?", ";", x)
+
+    x = re.sub(r";;;|;;", ";..;", x)
+
+    x = re.sub(r";$|'?\]|'$", "", x)
+
+    # put expressions back
+    def f2(m):
+        g1 = m.group(1)
+        #       print("f2:", g1)
+        return subx[int(g1)]
+
+    x = re.sub(r"#([0-9]+)", f2, x)
+
+    return x
+
+
+def jsonpath(obj, expr, result_type="VALUE", debug=0, use_eval=True):
+    """traverse JSON object using jsonpath expr, returning values or paths"""
+
+    def s(x, y):
+        """concatenate path elements"""
+        return str(x) + ";" + str(y)
+
+    def isint(x):
+        """check if argument represents a decimal integer"""
+        return x.isdigit()
+
+    def as_path(path):
+        """convert internal path representation to
+        "full bracket notation" for PATH output"""
+        p = "$"
+        for piece in path.split(";")[1:]:
+            # make a guess on how to index
+            # XXX need to apply \ quoting on '!!
+            if isint(piece):
+                p += "[%s]" % piece
+            else:
+                p += "['%s']" % piece
+        return p
+
+    def store(path, object):
+        if result_type == "VALUE":
+            result.append(object)
+        elif result_type == "IPATH":  # Index format path (Python ext)
+            # return list of list of indices -- can be used w/o "eval" or split
+            result.append(path.split(";")[1:])
+        else:  # PATH
+            result.append(as_path(path))
+        return path
+
+    def trace(expr, obj, path):
+        if debug:
+            print("trace", expr, "/", path)
+        if expr:
+            x = expr.split(";")
+            loc = x[0]
+            x = ";".join(x[1:])
+            if debug:
+                print("\t", loc, type(obj))
+            if loc == "*":
+
+                def f03(key, loc, expr, obj, path):
+                    if debug > 1:
+                        print("\tf03", key, loc, expr, path)
+                    trace(s(key, expr), obj, path)
+
+                walk(loc, x, obj, path, f03)
+            elif loc == "..":
+                trace(x, obj, path)
+
+                def f04(key, loc, expr, obj, path):
+                    if debug > 1:
+                        print("\tf04", key, loc, expr, path)
+                    if isinstance(obj, dict):
+                        if key in obj:
+                            trace(s("..", expr), obj[key], s(path, key))
+                    else:
+                        if key < len(obj):
+                            trace(s("..", expr), obj[key], s(path, key))
+
+                walk(loc, x, obj, path, f04)
+            elif loc == "!":
+                # Perl jsonpath extension: return keys
+                def f06(key, loc, expr, obj, path):
+                    if isinstance(obj, dict):
+                        trace(expr, key, path)
+
+                walk(loc, x, obj, path, f06)
+            elif isinstance(obj, dict) and loc in obj:
+                trace(x, obj[loc], s(path, loc))
+            elif isinstance(obj, list) and isint(loc):
+                iloc = int(loc)
+                if debug:
+                    print("----->", iloc, len(obj))
+                if len(obj) > iloc:
+                    trace(x, obj[iloc], s(path, loc))
+            else:
+                # [(index_expression)]
+                if loc.startswith("(") and loc.endswith(")"):
+                    if debug > 1:
+                        print("index", loc)
+                    e = evalx(loc, obj)
+                    trace(s(e, x), obj, path)
+                    return
+
+                # ?(filter_expression)
+                if loc.startswith("?(") and loc.endswith(")"):
+                    if debug > 1:
+                        print("filter", loc)
+
+                    def f05(key, loc, expr, obj, path):
+                        if debug > 1:
+                            print("f05", key, loc, expr, path)
+                        if isinstance(obj, dict):
+                            eval_result = evalx(loc, obj[key])
+                        else:
+                            eval_result = evalx(loc, obj[int(key)])
+                        if eval_result:
+                            trace(s(key, expr), obj, path)
+
+                    loc = loc[2:-1]
+                    walk(loc, x, obj, path, f05)
+                    return
+
+                m = re.match(r"(-?[0-9]*):(-?[0-9]*):?(-?[0-9]*)$", loc)
+                if m:
+                    if isinstance(obj, (dict, list)):
+
+                        def max(x, y):
+                            if x > y:
+                                return x
+                            return y
+
+                        def min(x, y):
+                            if x < y:
+                                return x
+                            return y
+
+                        objlen = len(obj)
+                        s0 = m.group(1)
+                        s1 = m.group(2)
+                        s2 = m.group(3)
+
+                        # XXX int("badstr") raises exception
+                        start = int(s0) if s0 else 0
+                        end = int(s1) if s1 else objlen
+                        step = int(s2) if s2 else 1
+
+                        if start < 0:
+                            start = max(0, start + objlen)
+                        else:
+                            start = min(objlen, start)
+                        if end < 0:
+                            end = max(0, end + objlen)
+                        else:
+                            end = min(objlen, end)
+
+                        for i in range(start, end, step):
+                            trace(s(i, x), obj, path)
+                    return
+
+                # after (expr) & ?(expr)
+                if loc.find(",") >= 0:
+                    # [index,index....]
+                    for piece in re.split(r"'?,'?", loc):
+                        if debug > 1:
+                            print("piece", piece)
+                        trace(s(piece, x), obj, path)
+        else:
+            store(path, obj)
+
+    def walk(loc, expr, obj, path, funct):
+        if isinstance(obj, list):
+            for i in range(0, len(obj)):
+                funct(i, loc, expr, obj, path)
+        elif isinstance(obj, dict):
+            for key in obj:
+                funct(key, loc, expr, obj, path)
+
+    def evalx(loc, obj):
+        """eval expression"""
+
+        if debug:
+            print("evalx", loc)
+
+        # a nod to JavaScript. doesn't work for @.name.name.length
+        # Write len(@.name.name) instead!!!
+        loc = loc.replace("@.length", "len(__obj)")
+
+        loc = loc.replace("&&", " and ").replace("||", " or ")
+
+        # replace !@.name with 'name' not in obj
+        # XXX handle !@.name.name.name....
+        def notvar(m):
+            return "'%s' not in __obj" % m.group(1)
+
+        loc = re.sub("!@\.([a-zA-Z@_0-9-]*)", notvar, loc)
+
+        # replace @.name.... with __obj['name']....
+        # handle @.name[.name...].length
+        def varmatch(m):
+            def brackets(elts):
+                ret = "__obj"
+                for e in elts:
+                    if isint(e):
+                        ret += "[%s]" % e  # ain't necessarily so
+                    else:
+                        ret += "['%s']" % e  # XXX beware quotes!!!!
+                return ret
+
+            g1 = m.group(1)
+            elts = g1.split(".")
+            if elts[-1] == "length":
+                return "len(%s)" % brackets(elts[1:-1])
+            return brackets(elts[1:])
+
+        loc = re.sub(r"(?<!\\)(@\.[a-zA-Z@_.0-9]+)", varmatch, loc)
+
+        # removed = -> == translation
+        # causes problems if a string contains =
+
+        # replace @  w/ "__obj", but \@ means a literal @
+        loc = re.sub(r"(?<!\\)@", "__obj", loc).replace(r"\@", "@")
+        if not use_eval:
+            if debug:
+                print("eval disabled")
+            raise Exception("eval disabled")
+        if debug:
+            print("eval", loc)
+        try:
+            # eval w/ caller globals, w/ local "__obj"!
+            v = eval(loc, caller_globals, {"__obj": obj})
+        except Exception as e:
+            if debug:
+                print(repr(e))
+            return False
+
+        if debug:
+            print("->", v)
+        return v
+
+    # body of jsonpath()
+
+    # Get caller globals so eval can pick up user functions!!!
+    caller_globals = sys._getframe(1).f_globals
+    result = []
+    if expr and obj:
+        cleaned_expr = normalize(expr)
+        if cleaned_expr.startswith("$;"):
+            cleaned_expr = cleaned_expr[2:]
+
+        # XXX wrap this in a try??
+        trace(cleaned_expr, obj, "$")
+
+        if len(result) > 0:
+            return result
+    return False
+
+
+if __name__ == "__main__":
+    with open("tests/data.json", "rb") as f:
+        city_dict = json.load(f)
+    D = jsonpath(
+        city_dict,
+        # expr="$.content.data.allCitySearchLabels[A,D][?(@.id>=700 and @.id<=800)]",
+        expr="$..id",
+    )
+    for i in D:
+        print(i)
diff --git a/setup.py b/setup.py
@@ -0,0 +1,18 @@
+"""
+Author       : zhangxianbing1
+Date         : 2020-12-27 11:32:20
+LastEditors  : zhangxianbing1
+LastEditTime : 2020-12-27 12:42:18
+Description  : 
+"""
+from setuptools import setup, find_packages
+from jsonpath import __version__, __author__
+
+setup(
+    name="jsonpath",
+    version=__version__,
+    author=__author__,
+    packages=find_packages(),
+    python_requires=">=3.6",
+    zip_safe=False,
+)
diff --git a/tests/data.json b/tests/data.json

Original file line number	Diff line number	Diff line change
`@@ -127,3 +127,4 @@ dmypy.json`
`127`	`127`
`128`	`128`	`# Pyre type checker`
`129`	`129`	`.pyre/`
	`130`	`+.vscode`