Support filters without parentheses.

jg-rp · jg-rp · commit 93bbe2bc3ec1 · 2023-03-31T17:00:07.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@
 - Added the built-in `length()` function.
 - Added the built-in `count()` function. `count()` is an alias for `length()`
 - Added the built-in `keys()` function.
+- Support filters without parentheses.
 
 ## Version 0.2.0
 
diff --git a/README.md b/README.md
@@ -227,12 +227,16 @@ $...title
 
 ### Filters (`[?(EXPRESSION)]`)
 
-Filters allow you to remove nodes from a selection using a Boolean expression. Within a filter, `@` refers to the current node and `$` refers to the root node in the target document. `@` and `$` can be used to select nodes as part of the expression.
+Filters allow you to remove nodes from a selection using a Boolean expression. Within a filter, `@` refers to the current node and `$` refers to the root node in the target document. `@` and `$` can be used to select nodes as part of the expression. Since version 0.3.0, the parentheses are optional, as per the IETF JSONPath draft. These two examples are equivalent.
 
 ```text
 $..products.*[?(@.price < $.price_cap)]
 ```
 
+```text
+$..products.*[?@.price < $.price_cap]
+```
+
 Comparison operators include `==`, `!=`, `<`, `>`, `<=` and `>=`. Plus `<>` as an alias for `!=`.
 
 `in` and `contains` are membership operators. `left in right` is equivalent to `right contains left`.
@@ -286,8 +290,7 @@ And this is a list of areas where we deviate from the [IETF JSONPath draft](http
 - The root token (default `$`) is optional.
 - Paths starting with a dot (`.`) are OK. `.thing` is the same as `$.thing`, as is `thing`, `$[thing]` and `$["thing"]`.
 - Nested filters are not supported.
-- When a filter is applied to an object (mapping) value, we do not silently apply that filter to the object's values. See the "Existence of non-singular queries" example in the IETF JSONPath draft.
-- Parentheses are required when writing filter selectors, as is common in existing JSONPath implementations. `$.some[?(@.thing)]` is OK, `$.some[?@.thing]` is not.
+- We don't treat filter expressions without a comparison as existence test, but an "is truthy" test. See the "Existence of non-singular queries" example in the IETF JSONPath draft.
 
 And this is a list of features that are uncommon or unique to Python JSONPath.
 
diff --git a/jsonpath/lex.py b/jsonpath/lex.py
@@ -33,7 +33,6 @@
 from .token import TOKEN_INTERSECTION
 from .token import TOKEN_LE
 from .token import TOKEN_LG
-from .token import TOKEN_LIST_END
 from .token import TOKEN_LIST_SLICE
 from .token import TOKEN_LIST_START
 from .token import TOKEN_LPAREN
@@ -46,6 +45,7 @@
 from .token import TOKEN_NULL
 from .token import TOKEN_OR
 from .token import TOKEN_PROPERTY
+from .token import TOKEN_RBRACKET
 from .token import TOKEN_RE
 from .token import TOKEN_RE_FLAGS
 from .token import TOKEN_RE_PATTERN
@@ -137,7 +137,7 @@ def compile_rules(self) -> Pattern[str]:
             (TOKEN_SLICE, self.slice_pattern),
             (TOKEN_WILD, self.wild_pattern),
             (TOKEN_LIST_SLICE, self.slice_list_pattern),
-            (TOKEN_FILTER_START, r"\[\s*\?\s*\("),
+            (TOKEN_FILTER_START, r"\[\s*\?\s*\(?"),
             (TOKEN_FILTER_END, r"\)\s*]"),
             (TOKEN_FUNCTION, self.function_pattern),
             (TOKEN_BRACKET_PROPERTY, self.bracketed_property_pattern),
@@ -162,7 +162,7 @@ def compile_rules(self) -> Pattern[str]:
             (TOKEN_UNDEFINED, r"undefined"),
             (TOKEN_MISSING, r"missing"),
             (TOKEN_LIST_START, r"\["),
-            (TOKEN_LIST_END, r"]"),
+            (TOKEN_RBRACKET, r"]"),
             (TOKEN_COMMA, r","),
             (TOKEN_EQ, r"=="),
             (TOKEN_NE, r"!="),
diff --git a/jsonpath/parse.py b/jsonpath/parse.py
@@ -58,7 +58,6 @@
 from .token import TOKEN_INTERSECTION
 from .token import TOKEN_LE
 from .token import TOKEN_LG
-from .token import TOKEN_LIST_END
 from .token import TOKEN_LIST_START
 from .token import TOKEN_LPAREN
 from .token import TOKEN_LT
@@ -70,6 +69,7 @@
 from .token import TOKEN_NULL
 from .token import TOKEN_OR
 from .token import TOKEN_PROPERTY
+from .token import TOKEN_RBRACKET
 from .token import TOKEN_RE
 from .token import TOKEN_RE_FLAGS
 from .token import TOKEN_RE_PATTERN
@@ -288,7 +288,7 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector:
         tok = stream.next_token()
         list_items: List[Union[IndexSelector, PropertySelector, SliceSelector]] = []
 
-        while stream.current.kind != TOKEN_LIST_END:
+        while stream.current.kind != TOKEN_RBRACKET:
             if stream.current.kind == TOKEN_INT:
                 list_items.append(
                     IndexSelector(
@@ -313,7 +313,7 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector:
                     token=stream.current,
                 )
 
-            if stream.peek.kind != TOKEN_LIST_END:
+            if stream.peek.kind != TOKEN_RBRACKET:
                 stream.next_token()
 
             stream.next_token()
@@ -329,7 +329,7 @@ def parse_filter(self, stream: TokenStream) -> Filter:
             raise JSONPathSyntaxError("unbalanced ')'", token=stream.current)
 
         stream.next_token()
-        stream.expect(TOKEN_FILTER_END)
+        stream.expect(TOKEN_FILTER_END, TOKEN_RBRACKET)
         return Filter(env=self.env, token=tok, expression=expr)
 
     def parse_boolean(self, stream: TokenStream) -> FilterExpression:
@@ -379,13 +379,18 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression:
         stream.next_token()
 
         while stream.current.kind != TOKEN_RPAREN:
-            if stream.current.kind in (TOKEN_EOF, TOKEN_FILTER_END):
+            if stream.current.kind == TOKEN_EOF:
                 raise JSONPathSyntaxError(
                     "unbalanced parentheses", token=stream.current
                 )
+            if stream.current.kind == TOKEN_FILTER_END:
+                # In some cases, an RPAREN followed by an RBRACKET can
+                # look like the long form "end of filter" token.
+                stream.push(stream.current)
+                break
             expr = self.parse_infix_expression(stream, expr)
 
-        stream.expect(TOKEN_RPAREN)
+        stream.expect(TOKEN_RPAREN, TOKEN_FILTER_END)
         return expr
 
     def parse_root_path(self, stream: TokenStream) -> FilterExpression:
@@ -419,7 +424,7 @@ def parse_list_literal(self, stream: TokenStream) -> FilterExpression:
         stream.next_token()
         list_items: List[FilterExpression] = []
 
-        while stream.current.kind != TOKEN_LIST_END:
+        while stream.current.kind != TOKEN_RBRACKET:
             try:
                 list_items.append(self.list_item_map[stream.current.kind](stream))
             except KeyError as err:
@@ -428,7 +433,7 @@ def parse_list_literal(self, stream: TokenStream) -> FilterExpression:
                     token=stream.current,
                 ) from err
 
-            if stream.peek.kind != TOKEN_LIST_END:
+            if stream.peek.kind != TOKEN_RBRACKET:
                 stream.expect_peek(TOKEN_COMMA)
                 stream.next_token()
 
@@ -466,7 +471,7 @@ def parse_filter_selector(
         try:
             left = self.token_map[stream.current.kind](stream)
         except KeyError as err:
-            if stream.current.kind in (TOKEN_EOF, TOKEN_FILTER_END):
+            if stream.current.kind in (TOKEN_EOF, TOKEN_FILTER_END, TOKEN_RBRACKET):
                 msg = "end of expression"
             else:
                 msg = repr(stream.current.value)
@@ -477,7 +482,7 @@ def parse_filter_selector(
         while True:
             peek_kind = stream.peek.kind
             if (
-                peek_kind in (TOKEN_EOF, TOKEN_FILTER_END)
+                peek_kind in (TOKEN_EOF, TOKEN_FILTER_END, TOKEN_RBRACKET)
                 or self.PRECEDENCES.get(peek_kind, self.PRECEDENCE_LOWEST) < precedence
             ):
                 break
diff --git a/jsonpath/stream.py b/jsonpath/stream.py
@@ -74,17 +74,17 @@ def close(self) -> None:
         """Close the stream."""
         self.current = Token(TOKEN_EOF, "", -1, "")
 
-    def expect(self, typ: str) -> None:
+    def expect(self, *typ: str) -> None:
         """"""
-        if self.current.kind != typ:
+        if self.current.kind not in typ:
             raise JSONPathSyntaxError(
                 f"expected {typ!r}, found {self.current.kind!r}",
                 token=self.current,
             )
 
-    def expect_peek(self, typ: str) -> None:
+    def expect_peek(self, *typ: str) -> None:
         """"""
-        if self.peek.kind != typ:
+        if self.peek.kind not in typ:
             raise JSONPathSyntaxError(
                 f"expected {typ!r}, found {self.peek.kind!r}",
                 token=self.peek,
diff --git a/jsonpath/token.py b/jsonpath/token.py
@@ -19,7 +19,7 @@
 TOKEN_FILTER_START = sys.intern("FILTER_START")
 TOKEN_IDENT = sys.intern("IDENT")
 TOKEN_INDEX = sys.intern("IDX")
-TOKEN_LIST_END = sys.intern("RBRACKET")
+TOKEN_RBRACKET = sys.intern("RBRACKET")
 TOKEN_BARE_PROPERTY = sys.intern("BARE_PROPERTY")
 TOKEN_LIST_SLICE = sys.intern("LSLICE")
 TOKEN_LIST_START = sys.intern("LBRACKET")
diff --git a/tests/compliance.py b/tests/compliance.py
@@ -49,10 +49,6 @@ def cases() -> List[Case]:
 
 def valid_cases() -> List[Case]:
     def mangle_filter(case: Case) -> Case:
-        # XXX: Insert parentheses around filter expression :(
-        if case.name.startswith("filter") and case.selector.count("]") == 1:
-            case.selector = case.selector.replace("[?", "[?(").replace("]", ")]")
-
         # XXX: Insert wildcard in front of root :(
         if (
             case.name.startswith("filter")
diff --git a/tests/test_lex.py b/tests/test_lex.py
@@ -21,13 +21,13 @@
 from jsonpath.token import TOKEN_INDEX
 from jsonpath.token import TOKEN_INT
 from jsonpath.token import TOKEN_INTERSECTION
-from jsonpath.token import TOKEN_LIST_END
 from jsonpath.token import TOKEN_LIST_START
 from jsonpath.token import TOKEN_LT
 from jsonpath.token import TOKEN_NIL
 from jsonpath.token import TOKEN_NOT
 from jsonpath.token import TOKEN_OR
 from jsonpath.token import TOKEN_PROPERTY
+from jsonpath.token import TOKEN_RBRACKET
 from jsonpath.token import TOKEN_RE
 from jsonpath.token import TOKEN_RE_FLAGS
 from jsonpath.token import TOKEN_RE_PATTERN
@@ -84,7 +84,7 @@ class Case:
             Token(kind=TOKEN_ROOT, value="$", index=0, path='$["some"]'),
             Token(kind=TOKEN_LIST_START, value="[", index=1, path='$["some"]'),
             Token(kind=TOKEN_STRING, value="some", index=3, path='$["some"]'),
-            Token(kind=TOKEN_LIST_END, value="]", index=8, path='$["some"]'),
+            Token(kind=TOKEN_RBRACKET, value="]", index=8, path='$["some"]'),
         ],
     ),
     Case(
@@ -94,7 +94,7 @@ class Case:
             Token(kind=TOKEN_ROOT, value="$", index=0, path="$['some']"),
             Token(kind=TOKEN_LIST_START, value="[", index=1, path="$['some']"),
             Token(kind=TOKEN_STRING, value="some", index=3, path="$['some']"),
-            Token(kind=TOKEN_LIST_END, value="]", index=8, path="$['some']"),
+            Token(kind=TOKEN_RBRACKET, value="]", index=8, path="$['some']"),
         ],
     ),
     Case(
@@ -245,7 +245,7 @@ class Case:
             Token(kind=TOKEN_INT, value="4", index=4, path="$[1,4,5]"),
             Token(kind=TOKEN_COMMA, value=",", index=5, path="$[1,4,5]"),
             Token(kind=TOKEN_INT, value="5", index=6, path="$[1,4,5]"),
-            Token(kind=TOKEN_LIST_END, value="]", index=7, path="$[1,4,5]"),
+            Token(kind=TOKEN_RBRACKET, value="]", index=7, path="$[1,4,5]"),
         ],
     ),
     Case(
@@ -259,7 +259,7 @@ class Case:
             Token(kind=TOKEN_SLICE_START, value="4", index=4, path="$[1,4:9]"),
             Token(kind=TOKEN_SLICE_STOP, value="9", index=6, path="$[1,4:9]"),
             Token(kind=TOKEN_SLICE_STEP, value="", index=-1, path="$[1,4:9]"),
-            Token(kind=TOKEN_LIST_END, value="]", index=7, path="$[1,4:9]"),
+            Token(kind=TOKEN_RBRACKET, value="]", index=7, path="$[1,4:9]"),
         ],
     ),
     Case(
@@ -275,7 +275,7 @@ class Case:
             Token(
                 kind=TOKEN_BARE_PROPERTY, value="thing", index=7, path="$[some,thing]"
             ),
-            Token(kind=TOKEN_LIST_END, value="]", index=12, path="$[some,thing]"),
+            Token(kind=TOKEN_RBRACKET, value="]", index=12, path="$[some,thing]"),
         ],
     ),
     Case(
@@ -781,7 +781,7 @@ class Case:
                 kind=TOKEN_STRING, value="1", index=19, path="[?(@.thing in [1, '1'])]"
             ),
             Token(
-                kind=TOKEN_LIST_END,
+                kind=TOKEN_RBRACKET,
                 value="]",
                 index=21,
                 path="[?(@.thing in [1, '1'])]",
@@ -1039,7 +1039,7 @@ class Case:
             Token(
                 kind=TOKEN_STRING, value="thing", index=11, path="$['some', 'thing']"
             ),
-            Token(kind=TOKEN_LIST_END, value="]", index=17, path="$['some', 'thing']"),
+            Token(kind=TOKEN_RBRACKET, value="]", index=17, path="$['some', 'thing']"),
         ],
     ),
     Case(