Skip to content

Commit 14cdfb8

Browse files
committed
Merge branch 'sync-compliance' into develop
* sync-compliance: Refactor parse errors into common functionality Fix bugs from latest compliance tests Add latest compliance tests
2 parents 48269d2 + 248eceb commit 14cdfb8

File tree

5 files changed

+275
-86
lines changed

5 files changed

+275
-86
lines changed

jmespath/parser.py

Lines changed: 44 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -221,17 +221,16 @@ def _parse_slice_expression(self):
221221
while not current_token == 'rbracket' and index < 3:
222222
if current_token == 'colon':
223223
index += 1
224+
if index == 3:
225+
self._raise_parse_error_for_token(
226+
self._lookahead_token(0), 'syntax error')
224227
self._advance()
225228
elif current_token == 'number':
226229
parts[index] = self._lookahead_token(0)['value']
227230
self._advance()
228231
else:
229-
t = self._lookahead_token(0)
230-
lex_position = t['start']
231-
actual_value = t['value']
232-
actual_type = t['type']
233-
raise exceptions.ParseError(lex_position, actual_value,
234-
actual_type, 'syntax error')
232+
self._raise_parse_error_for_token(
233+
self._lookahead_token(0), 'syntax error')
235234
current_token = self._current_token()
236235
self._match('rbracket')
237236
return ast.slice(*parts)
@@ -271,6 +270,14 @@ def _token_led_and(self, left):
271270
return ast.and_expression(left, right)
272271

273272
def _token_led_lparen(self, left):
273+
if left['type'] != 'field':
274+
# 0 - first func arg or closing paren.
275+
# -1 - '(' token
276+
# -2 - invalid function "name".
277+
prev_t = self._lookahead_token(-2)
278+
raise exceptions.ParseError(
279+
prev_t['start'], prev_t['value'], prev_t['type'],
280+
"Invalid function name '%s'" % prev_t['value'])
274281
name = left['value']
275282
args = []
276283
while not self._current_token() == 'rparen':
@@ -393,12 +400,8 @@ def _parse_projection_rhs(self, binding_power):
393400
self._match('dot')
394401
right = self._parse_dot_rhs(binding_power)
395402
else:
396-
t = self._lookahead_token(0)
397-
lex_position = t['start']
398-
actual_value = t['value']
399-
actual_type = t['type']
400-
raise exceptions.ParseError(lex_position, actual_value,
401-
actual_type, 'syntax error')
403+
self._raise_parse_error_for_token(self._lookahead_token(0),
404+
'syntax error')
402405
return right
403406

404407
def _parse_dot_rhs(self, binding_power):
@@ -424,58 +427,33 @@ def _parse_dot_rhs(self, binding_power):
424427
t = self._lookahead_token(0)
425428
allowed = ['quoted_identifier', 'unquoted_identifier',
426429
'lbracket', 'lbrace']
427-
lex_position = t['start']
428-
actual_value = t['value']
429-
actual_type = t['type']
430-
raise exceptions.ParseError(
431-
lex_position, actual_value, actual_type,
432-
"Expecting: %s, got: %s" % (allowed,
433-
actual_type))
430+
msg = (
431+
"Expecting: %s, got: %s" % (allowed, t['type'])
432+
)
433+
self._raise_parse_error_for_token(t, msg)
434434

435435
def _error_nud_token(self, token):
436436
if token['type'] == 'eof':
437437
raise exceptions.IncompleteExpressionError(
438438
token['start'], token['value'], token['type'])
439-
raise exceptions.ParseError(token['start'], token['value'],
440-
token['type'], 'Invalid token.')
439+
self._raise_parse_error_for_token(token, 'invalid token')
441440

442441
def _error_led_token(self, token):
443-
raise exceptions.ParseError(token['start'], token['value'],
444-
token['type'], 'Invalid token')
442+
self._raise_parse_error_for_token(token, 'invalid token')
445443

446444
def _match(self, token_type=None):
447445
# inline'd self._current_token()
448446
if self._current_token() == token_type:
449447
# inline'd self._advance()
450448
self._advance()
451449
else:
452-
t = self._lookahead_token(0)
453-
lex_position = t['start']
454-
actual_value = t['value']
455-
actual_type = t['type']
456-
if actual_type == 'eof':
457-
raise exceptions.IncompleteExpressionError(
458-
lex_position, actual_value, actual_type)
459-
else:
460-
message = 'Expecting: %s, got: %s' % (token_type,
461-
actual_type)
462-
raise exceptions.ParseError(
463-
lex_position, actual_value, actual_type, message)
450+
self._raise_parse_error_maybe_eof(
451+
token_type, self._lookahead_token(0))
464452

465453
def _match_multiple_tokens(self, token_types):
466454
if self._current_token() not in token_types:
467-
t = self._lookahead_token(0)
468-
lex_position = t['start']
469-
actual_value = t['value']
470-
actual_type = t['type']
471-
if actual_type == 'eof':
472-
raise exceptions.IncompleteExpressionError(
473-
lex_position, actual_value, actual_type)
474-
else:
475-
message = 'Expecting: %s, got: %s' % (token_types,
476-
actual_type)
477-
raise exceptions.ParseError(
478-
lex_position, actual_value, actual_type, message)
455+
self._raise_parse_error_maybe_eof(
456+
token_types, self._lookahead_token(0))
479457
self._advance()
480458

481459
def _advance(self):
@@ -490,6 +468,25 @@ def _lookahead(self, number):
490468
def _lookahead_token(self, number):
491469
return self._tokens[self._index + number]
492470

471+
def _raise_parse_error_for_token(self, token, reason):
472+
lex_position = token['start']
473+
actual_value = token['value']
474+
actual_type = token['type']
475+
raise exceptions.ParseError(lex_position, actual_value,
476+
actual_type, reason)
477+
478+
def _raise_parse_error_maybe_eof(self, expected_type, token):
479+
lex_position = token['start']
480+
actual_value = token['value']
481+
actual_type = token['type']
482+
if actual_type == 'eof':
483+
raise exceptions.IncompleteExpressionError(
484+
lex_position, actual_value, actual_type)
485+
message = 'Expecting: %s, got: %s' % (expected_type,
486+
actual_type)
487+
raise exceptions.ParseError(
488+
lex_position, actual_value, actual_type, message)
489+
493490
def _free_cache_entries(self):
494491
for key in random.sample(self._CACHE.keys(), int(self._MAX_SIZE / 2)):
495492
del self._CACHE[key]

tests/compliance/benchmarks.json

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
[
2+
{
3+
"given": {
4+
"long_name_for_a_field": true,
5+
"a": {
6+
"b": {
7+
"c": {
8+
"d": {
9+
"e": {
10+
"f": {
11+
"g": {
12+
"h": {
13+
"i": {
14+
"j": {
15+
"k": {
16+
"l": {
17+
"m": {
18+
"n": {
19+
"o": {
20+
"p": true
21+
}
22+
}
23+
}
24+
}
25+
}
26+
}
27+
}
28+
}
29+
}
30+
}
31+
}
32+
}
33+
}
34+
}
35+
}
36+
},
37+
"cases": [
38+
{
39+
"comment": "simple field",
40+
"expression": "a",
41+
"bench": "full"
42+
},
43+
{
44+
"comment": "simple subexpression",
45+
"expression": "a.b",
46+
"bench": "full"
47+
},
48+
{
49+
"comment": "deep field selection",
50+
"expression": "a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s",
51+
"bench": "full"
52+
},
53+
{
54+
"comment": "simple or",
55+
"expression": "not_there || a",
56+
"bench": "full"
57+
}
58+
]
59+
},
60+
{
61+
"given": {
62+
"a":0,"b":1,"c":2,"d":3,"e":4,"f":5,"g":6,"h":7,"i":8,"j":9,"k":10,
63+
"l":11,"m":12,"n":13,"o":14,"p":15,"q":16,"r":17,"s":18,"t":19,"u":20,
64+
"v":21,"w":22,"x":23,"y":24,"z":25
65+
},
66+
"cases": [
67+
{
68+
"comment": "deep ands",
69+
"expression": "a && b && c && d && e && f && g && h && i && j && k && l && m && n && o && p && q && r && s && t && u && v && w && x && y && z",
70+
"bench": "full"
71+
},
72+
{
73+
"comment": "deep ors",
74+
"expression": "z || y || x || w || v || u || t || s || r || q || p || o || n || m || l || k || j || i || h || g || f || e || d || c || b || a",
75+
"bench": "full"
76+
},
77+
{
78+
"comment": "lots of summing",
79+
"expression": "sum(z, y, x, w, v, u, t, s, r, q, p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a)",
80+
"bench": "full"
81+
},
82+
{
83+
"comment": "lots of multi list",
84+
"expression": "[z, y, x, w, v, u, t, s, r, q, p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a]",
85+
"bench": "full"
86+
}
87+
]
88+
},
89+
{
90+
"given": {},
91+
"cases": [
92+
{
93+
"comment": "field 50",
94+
"expression": "j49.j48.j47.j46.j45.j44.j43.j42.j41.j40.j39.j38.j37.j36.j35.j34.j33.j32.j31.j30.j29.j28.j27.j26.j25.j24.j23.j22.j21.j20.j19.j18.j17.j16.j15.j14.j13.j12.j11.j10.j9.j8.j7.j6.j5.j4.j3.j2.j1.j0",
95+
"bench": "parse"
96+
},
97+
{
98+
"comment": "pipe 50",
99+
"expression": "j49|j48|j47|j46|j45|j44|j43|j42|j41|j40|j39|j38|j37|j36|j35|j34|j33|j32|j31|j30|j29|j28|j27|j26|j25|j24|j23|j22|j21|j20|j19|j18|j17|j16|j15|j14|j13|j12|j11|j10|j9|j8|j7|j6|j5|j4|j3|j2|j1|j0",
100+
"bench": "parse"
101+
},
102+
{
103+
"comment": "index 50",
104+
"expression": "[49][48][47][46][45][44][43][42][41][40][39][38][37][36][35][34][33][32][31][30][29][28][27][26][25][24][23][22][21][20][19][18][17][16][15][14][13][12][11][10][9][8][7][6][5][4][3][2][1][0]",
105+
"bench": "parse"
106+
},
107+
{
108+
"comment": "long raw string literal",
109+
"expression": "'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz'",
110+
"bench": "parse"
111+
},
112+
{
113+
"comment": "deep projection 104",
114+
"expression": "a[*].b[*].c[*].d[*].e[*].f[*].g[*].h[*].i[*].j[*].k[*].l[*].m[*].n[*].o[*].p[*].q[*].r[*].s[*].t[*].u[*].v[*].w[*].x[*].y[*].z[*].a[*].b[*].c[*].d[*].e[*].f[*].g[*].h[*].i[*].j[*].k[*].l[*].m[*].n[*].o[*].p[*].q[*].r[*].s[*].t[*].u[*].v[*].w[*].x[*].y[*].z[*].a[*].b[*].c[*].d[*].e[*].f[*].g[*].h[*].i[*].j[*].k[*].l[*].m[*].n[*].o[*].p[*].q[*].r[*].s[*].t[*].u[*].v[*].w[*].x[*].y[*].z[*].a[*].b[*].c[*].d[*].e[*].f[*].g[*].h[*].i[*].j[*].k[*].l[*].m[*].n[*].o[*].p[*].q[*].r[*].s[*].t[*].u[*].v[*].w[*].x[*].y[*].z[*]",
115+
"bench": "parse"
116+
},
117+
{
118+
"comment": "filter projection",
119+
"expression": "foo[bar > baz][qux > baz]",
120+
"bench": "parse"
121+
}
122+
]
123+
}
124+
]

tests/compliance/multiselect.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,11 @@
387387
"comment": "Nested multiselect",
388388
"expression": "[[*]]",
389389
"result": [[]]
390+
},
391+
{
392+
"comment": "Select on null",
393+
"expression": "missing.{foo: bar}",
394+
"result": null
390395
}
391396
]
392397
}

0 commit comments

Comments
 (0)