jmespath
diff --git a/‎jmespath/parser.py
Lines changed: 44 additions & 47 deletions b/‎jmespath/parser.py
Lines changed: 44 additions & 47 deletions
diff --git a/‎tests/compliance/benchmarks.json
Lines changed: 124 additions & 0 deletions b/‎tests/compliance/benchmarks.json
Lines changed: 124 additions & 0 deletions
diff --git a/‎tests/compliance/multiselect.json
Lines changed: 5 additions & 0 deletions b/‎tests/compliance/multiselect.json
Lines changed: 5 additions & 0 deletions
@@ -221,17 +221,16 @@ def _parse_slice_expression(self):
         while not current_token == 'rbracket' and index < 3:
             if current_token == 'colon':
                 index += 1
+                if index == 3:
+                    self._raise_parse_error_for_token(
+                        self._lookahead_token(0), 'syntax error')
                 self._advance()
             elif current_token == 'number':
                 parts[index] = self._lookahead_token(0)['value']
                 self._advance()
             else:
-                t = self._lookahead_token(0)
-                lex_position = t['start']
-                actual_value = t['value']
-                actual_type = t['type']
-                raise exceptions.ParseError(lex_position, actual_value,
-                                            actual_type, 'syntax error')
+                self._raise_parse_error_for_token(
+                    self._lookahead_token(0), 'syntax error')
             current_token = self._current_token()
         self._match('rbracket')
         return ast.slice(*parts)
@@ -271,6 +270,14 @@ def _token_led_and(self, left):
         return ast.and_expression(left, right)
 
     def _token_led_lparen(self, left):
+        if left['type'] != 'field':
+            #  0 - first func arg or closing paren.
+            # -1 - '(' token
+            # -2 - invalid function "name".
+            prev_t = self._lookahead_token(-2)
+            raise exceptions.ParseError(
+                prev_t['start'], prev_t['value'], prev_t['type'],
+                "Invalid function name '%s'" % prev_t['value'])
         name = left['value']
         args = []
         while not self._current_token() == 'rparen':
@@ -393,12 +400,8 @@ def _parse_projection_rhs(self, binding_power):
             self._match('dot')
             right = self._parse_dot_rhs(binding_power)
         else:
-            t = self._lookahead_token(0)
-            lex_position = t['start']
-            actual_value = t['value']
-            actual_type = t['type']
-            raise exceptions.ParseError(lex_position, actual_value,
-                                        actual_type, 'syntax error')
+            self._raise_parse_error_for_token(self._lookahead_token(0),
+                                              'syntax error')
         return right
 
     def _parse_dot_rhs(self, binding_power):
@@ -424,58 +427,33 @@ def _parse_dot_rhs(self, binding_power):
             t = self._lookahead_token(0)
             allowed = ['quoted_identifier', 'unquoted_identifier',
                        'lbracket', 'lbrace']
-            lex_position = t['start']
-            actual_value = t['value']
-            actual_type = t['type']
-            raise exceptions.ParseError(
-                lex_position, actual_value, actual_type,
-                "Expecting: %s, got: %s" % (allowed,
-                                            actual_type))
+            msg = (
+                "Expecting: %s, got: %s" % (allowed, t['type'])
+            )
+            self._raise_parse_error_for_token(t, msg)
 
     def _error_nud_token(self, token):
         if token['type'] == 'eof':
             raise exceptions.IncompleteExpressionError(
                 token['start'], token['value'], token['type'])
-        raise exceptions.ParseError(token['start'], token['value'],
-                                    token['type'], 'Invalid token.')
+        self._raise_parse_error_for_token(token, 'invalid token')
 
     def _error_led_token(self, token):
-        raise exceptions.ParseError(token['start'], token['value'],
-                                    token['type'], 'Invalid token')
+        self._raise_parse_error_for_token(token, 'invalid token')
 
     def _match(self, token_type=None):
         # inline'd self._current_token()
         if self._current_token() == token_type:
             # inline'd self._advance()
             self._advance()
         else:
-            t = self._lookahead_token(0)
-            lex_position = t['start']
-            actual_value = t['value']
-            actual_type = t['type']
-            if actual_type == 'eof':
-                raise exceptions.IncompleteExpressionError(
-                    lex_position, actual_value, actual_type)
-            else:
-                message = 'Expecting: %s, got: %s' % (token_type,
-                                                      actual_type)
-            raise exceptions.ParseError(
-                lex_position, actual_value, actual_type, message)
+            self._raise_parse_error_maybe_eof(
+                token_type, self._lookahead_token(0))
 
     def _match_multiple_tokens(self, token_types):
         if self._current_token() not in token_types:
-            t = self._lookahead_token(0)
-            lex_position = t['start']
-            actual_value = t['value']
-            actual_type = t['type']
-            if actual_type == 'eof':
-                raise exceptions.IncompleteExpressionError(
-                    lex_position, actual_value, actual_type)
-            else:
-                message = 'Expecting: %s, got: %s' % (token_types,
-                                                      actual_type)
-            raise exceptions.ParseError(
-                lex_position, actual_value, actual_type, message)
+            self._raise_parse_error_maybe_eof(
+                token_types, self._lookahead_token(0))
         self._advance()
 
     def _advance(self):
@@ -490,6 +468,25 @@ def _lookahead(self, number):
     def _lookahead_token(self, number):
         return self._tokens[self._index + number]
 
+    def _raise_parse_error_for_token(self, token, reason):
+        lex_position = token['start']
+        actual_value = token['value']
+        actual_type = token['type']
+        raise exceptions.ParseError(lex_position, actual_value,
+                                    actual_type, reason)
+
+    def _raise_parse_error_maybe_eof(self, expected_type, token):
+        lex_position = token['start']
+        actual_value = token['value']
+        actual_type = token['type']
+        if actual_type == 'eof':
+            raise exceptions.IncompleteExpressionError(
+                lex_position, actual_value, actual_type)
+        message = 'Expecting: %s, got: %s' % (expected_type,
+                                              actual_type)
+        raise exceptions.ParseError(
+            lex_position, actual_value, actual_type, message)
+
     def _free_cache_entries(self):
         for key in random.sample(self._CACHE.keys(), int(self._MAX_SIZE / 2)):
             del self._CACHE[key]
 
@@ -0,0 +1,124 @@
+[
+  {
+    "given": {
+      "long_name_for_a_field": true,
+      "a": {
+        "b": {
+          "c": {
+            "d": {
+              "e": {
+                "f": {
+                  "g": {
+                    "h": {
+                      "i": {
+                        "j": {
+                          "k": {
+                            "l": {
+                              "m": {
+                                "n": {
+                                  "o": {
+                                    "p": true
+                                  }
+                                }
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "cases": [
+      {
+        "comment": "simple field",
+        "expression": "a",
+        "bench": "full"
+      },
+      {
+        "comment": "simple subexpression",
+        "expression": "a.b",
+        "bench": "full"
+      },
+      {
+        "comment": "deep field selection",
+        "expression": "a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s",
+        "bench": "full"
+      },
+      {
+        "comment": "simple or",
+        "expression": "not_there || a",
+        "bench": "full"
+      }
+    ]
+  },
+  {
+    "given": {
+      "a":0,"b":1,"c":2,"d":3,"e":4,"f":5,"g":6,"h":7,"i":8,"j":9,"k":10,
+      "l":11,"m":12,"n":13,"o":14,"p":15,"q":16,"r":17,"s":18,"t":19,"u":20,
+      "v":21,"w":22,"x":23,"y":24,"z":25
+    },
+    "cases": [
+      {
+        "comment": "deep ands",
+        "expression": "a && b && c && d && e && f && g && h && i && j && k && l && m && n && o && p && q && r && s && t && u && v && w && x && y && z",
+        "bench": "full"
+      },
+      {
+        "comment": "deep ors",
+        "expression": "z || y || x || w || v || u || t || s || r || q || p || o || n || m || l || k || j || i || h || g || f || e || d || c || b || a",
+        "bench": "full"
+      },
+      {
+        "comment": "lots of summing",
+        "expression": "sum(z, y, x, w, v, u, t, s, r, q, p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a)",
+        "bench": "full"
+      },
+      {
+        "comment": "lots of multi list",
+        "expression": "[z, y, x, w, v, u, t, s, r, q, p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a]",
+        "bench": "full"
+      }
+    ]
+  },
+  {
+    "given": {},
+    "cases": [
+      {
+        "comment": "field 50",
+        "expression": "j49.j48.j47.j46.j45.j44.j43.j42.j41.j40.j39.j38.j37.j36.j35.j34.j33.j32.j31.j30.j29.j28.j27.j26.j25.j24.j23.j22.j21.j20.j19.j18.j17.j16.j15.j14.j13.j12.j11.j10.j9.j8.j7.j6.j5.j4.j3.j2.j1.j0",
+        "bench": "parse"
+      },
+      {
+        "comment": "pipe 50",
+        "expression": "j49|j48|j47|j46|j45|j44|j43|j42|j41|j40|j39|j38|j37|j36|j35|j34|j33|j32|j31|j30|j29|j28|j27|j26|j25|j24|j23|j22|j21|j20|j19|j18|j17|j16|j15|j14|j13|j12|j11|j10|j9|j8|j7|j6|j5|j4|j3|j2|j1|j0",
+        "bench": "parse"
+      },
+      {
+        "comment": "index 50",
+        "expression": "[49][48][47][46][45][44][43][42][41][40][39][38][37][36][35][34][33][32][31][30][29][28][27][26][25][24][23][22][21][20][19][18][17][16][15][14][13][12][11][10][9][8][7][6][5][4][3][2][1][0]",
+        "bench": "parse"
+      },
+      {
+        "comment": "long raw string literal",
+        "expression": "'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz'",
+        "bench": "parse"
+      },
+      {
+        "comment": "deep projection 104",
+        "expression": "a[*].b[*].c[*].d[*].e[*].f[*].g[*].h[*].i[*].j[*].k[*].l[*].m[*].n[*].o[*].p[*].q[*].r[*].s[*].t[*].u[*].v[*].w[*].x[*].y[*].z[*].a[*].b[*].c[*].d[*].e[*].f[*].g[*].h[*].i[*].j[*].k[*].l[*].m[*].n[*].o[*].p[*].q[*].r[*].s[*].t[*].u[*].v[*].w[*].x[*].y[*].z[*].a[*].b[*].c[*].d[*].e[*].f[*].g[*].h[*].i[*].j[*].k[*].l[*].m[*].n[*].o[*].p[*].q[*].r[*].s[*].t[*].u[*].v[*].w[*].x[*].y[*].z[*].a[*].b[*].c[*].d[*].e[*].f[*].g[*].h[*].i[*].j[*].k[*].l[*].m[*].n[*].o[*].p[*].q[*].r[*].s[*].t[*].u[*].v[*].w[*].x[*].y[*].z[*]",
+        "bench": "parse"
+      },
+      {
+        "comment": "filter projection",
+        "expression": "foo[bar > baz][qux > baz]",
+        "bench": "parse"
+      }
+    ]
+  }
+]
@@ -387,6 +387,11 @@
           "comment": "Nested multiselect",
           "expression": "[[*]]",
           "result": [[]]
+        },
+        {
+          "comment": "Select on null",
+          "expression": "missing.{foo: bar}",
+          "result": null
         }
     ]
 }
Original file line number	Diff line number	Diff line change
`@@ -387,6 +387,11 @@`
`387`	`387`	`"comment": "Nested multiselect",`
`388`	`388`	`"expression": "[[*]]",`
`389`	`389`	`"result": [[]]`
	`390`	`+ },`
	`391`	`+ {`
	`392`	`+ "comment": "Select on null",`
	`393`	`+ "expression": "missing.{foo: bar}",`
	`394`	`+ "result": null`
`390`	`395`	`}`
`391`	`396`	`]`
`392`	`397`	`}`