Skip to content

Commit fcc3513

Browse files
authored
Merge pull request #1 from jmespath-community/jep/string-functions
JEP-14 String Functions
2 parents eca3c16 + f5888c6 commit fcc3513

File tree

5 files changed

+408
-5
lines changed

5 files changed

+408
-5
lines changed

bin/jp.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def main():
4242
except exceptions.JMESPathTypeError as e:
4343
sys.stderr.write("invalid-type: %s\n" % e)
4444
return 1
45+
except exceptions.JMESPathValueError as e:
46+
sys.stderr.write("invalid-value: %s\n" % e)
47+
return 1
4548
except exceptions.UnknownFunctionError as e:
4649
sys.stderr.write("unknown-function: %s\n" % e)
4750
return 1

jmespath/exceptions.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,19 @@ def __str__(self):
112112
self.expected_types, self.actual_type))
113113

114114

115+
@with_str_method
116+
class JMESPathValueError(JMESPathError):
117+
def __init__(self, function_name, current_value, expected_types):
118+
self.function_name = function_name
119+
self.current_value = current_value
120+
self.expected_types = expected_types
121+
122+
def __str__(self):
123+
return ('In function %s(), invalid value: "%s", '
124+
'expected: %s"%s"' % (
125+
self.function_name, self.current_value,
126+
self.expected_types))
127+
115128
class EmptyExpressionError(JMESPathError):
116129
def __init__(self):
117130
super(EmptyExpressionError, self).__init__(

jmespath/functions.py

Lines changed: 178 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,22 +81,28 @@ def call_function(self, function_name, resolved_args):
8181
return function(self, *resolved_args)
8282

8383
def _validate_arguments(self, args, signature, function_name):
84-
if signature and signature[-1].get('variadic'):
84+
required_arguments_count = len([param for param in signature if not param.get('optional') or not param['optional']])
85+
optional_arguments_count = len([param for param in signature if param.get('optional') and param['optional']])
86+
has_variadic = signature[-1].get('variadic') if signature != None else False
87+
if has_variadic:
8588
if len(args) < len(signature):
8689
raise exceptions.VariadictArityError(
8790
len(signature), len(args), function_name)
88-
elif len(args) != len(signature):
91+
elif optional_arguments_count > 0:
92+
if len(args) < required_arguments_count or len(args) > (required_arguments_count + optional_arguments_count):
93+
raise exceptions.ArityError(
94+
len(signature), len(args), function_name)
95+
elif len(args) != required_arguments_count:
8996
raise exceptions.ArityError(
9097
len(signature), len(args), function_name)
9198
return self._type_check(args, signature, function_name)
9299

93100
def _type_check(self, actual, signature, function_name):
94-
for i in range(len(signature)):
95-
allowed_types = signature[i]['types']
101+
for i in range(min(len(signature), len(actual))):
102+
allowed_types = self._get_allowed_types_from_signature(signature[i])
96103
if allowed_types:
97104
self._type_check_single(actual[i], allowed_types,
98105
function_name)
99-
100106
def _type_check_single(self, current, types, function_name):
101107
# Type checking involves checking the top level type,
102108
# and in the case of arrays, potentially checking the types
@@ -120,6 +126,13 @@ def _type_check_single(self, current, types, function_name):
120126
self._subtype_check(current, allowed_subtypes,
121127
types, function_name)
122128

129+
## signature supports monotype {'type': 'type-name'}
130+
## or multiple types {'types': ['type1-name', 'type2-name']}
131+
def _get_allowed_types_from_signature(self, spec):
132+
if spec.get('type'):
133+
spec.update({'types': [spec.get('type')]})
134+
return spec.get('types')
135+
123136
def _get_allowed_pytypes(self, types):
124137
allowed_types = []
125138
allowed_subtypes = []
@@ -164,6 +177,14 @@ def _subtype_check(self, current, allowed_subtypes, types, function_name):
164177
@signature({'types': ['number']})
165178
def _func_abs(self, arg):
166179
return abs(arg)
180+
181+
@signature({'types': ['string']})
182+
def _func_lower(self, arg):
183+
return arg.lower()
184+
185+
@signature({'types': ['string']})
186+
def _func_upper(self, arg):
187+
return arg.upper()
167188

168189
@signature({'types': ['array-number']})
169190
def _func_avg(self, arg):
@@ -287,6 +308,158 @@ def _func_keys(self, arg):
287308
# should we also return the indices of a list?
288309
return list(arg.keys())
289310

311+
@signature(
312+
{'type': 'string'},
313+
{'type': 'string'},
314+
{'type': 'number', 'optional': True},
315+
{'type': 'number', 'optional': True})
316+
def _func_find_first(self, text, search, start = 0, end = None):
317+
self._ensure_integer('find_first', 'start', start)
318+
self._ensure_integer('find_first', 'end', end)
319+
return self._find_impl(
320+
text,
321+
search,
322+
lambda t, s: t.find(s),
323+
start,
324+
end
325+
)
326+
327+
@signature(
328+
{'type': 'string'},
329+
{'type': 'string'},
330+
{'type': 'number', 'optional': True},
331+
{'type': 'number', 'optional': True})
332+
def _func_find_last(self, text, search, start = 0, end = None):
333+
self._ensure_integer('find_last', 'start', start)
334+
self._ensure_integer('find_last', 'end', end)
335+
return self._find_impl(
336+
text,
337+
search,
338+
lambda t, s: t.rfind(s),
339+
start,
340+
end
341+
)
342+
343+
def _find_impl(self, text, search, func, start, end):
344+
if len(search) == 0:
345+
return None
346+
if end == None:
347+
end = len(text)
348+
349+
pos = func(text[start:end], search)
350+
if start < 0:
351+
start = start + len(text)
352+
353+
# restrict resulting range to valid indices
354+
start = min(max(start, 0), len(text))
355+
return start + pos if pos != -1 else None
356+
357+
@signature(
358+
{'type': 'string'},
359+
{'type': 'number'},
360+
{'type': 'string', 'optional': True})
361+
def _func_pad_left(self, text, width, padding = ' '):
362+
self._ensure_non_negative_integer('pad_left', 'width', width)
363+
return self._pad_impl(lambda : text.rjust(width, padding), padding)
364+
365+
@signature(
366+
{'type': 'string'},
367+
{'type': 'number'},
368+
{'type': 'string', 'optional': True})
369+
def _func_pad_right(self, text, width, padding = ' '):
370+
self._ensure_non_negative_integer('pad_right', 'width', width)
371+
return self._pad_impl(lambda : text.ljust(width, padding), padding)
372+
373+
def _pad_impl(self, func, padding):
374+
if len(padding) != 1:
375+
raise exceptions.JMESPathError(
376+
'syntax-error: pad_right() expects $padding to have a '
377+
'single character, but received `{}` instead.'
378+
.format(padding))
379+
return func()
380+
381+
@signature(
382+
{'type': 'string'},
383+
{'type': 'string'},
384+
{'type': 'string'},
385+
{'type': 'number', 'optional': True})
386+
def _func_replace(self, text, search, replacement, count = None):
387+
self._ensure_non_negative_integer(
388+
'replace',
389+
'count',
390+
count)
391+
392+
if count != None:
393+
return text.replace(search, replacement, int(count))
394+
return text.replace(search, replacement)
395+
396+
@signature(
397+
{'type': 'string'},
398+
{'type': 'string'},
399+
{'type': 'number', 'optional': True})
400+
def _func_split(self, text, search, count = None):
401+
self._ensure_non_negative_integer(
402+
'split',
403+
'count',
404+
count)
405+
406+
if len(search) == 0:
407+
chars = list(text)
408+
if count == None:
409+
return chars
410+
411+
head = [c for c in chars[:count]]
412+
tail = [''.join(chars[count:])]
413+
return head + tail
414+
415+
if count != None:
416+
return text.split(search, count)
417+
return text.split(search)
418+
419+
def _ensure_integer(
420+
self,
421+
func_name,
422+
param_name,
423+
param_value):
424+
425+
if param_value != None:
426+
if int(param_value) != param_value:
427+
raise exceptions.JMESPathValueError(
428+
func_name,
429+
param_value,
430+
"integer")
431+
432+
def _ensure_non_negative_integer(
433+
self,
434+
func_name,
435+
param_name,
436+
param_value):
437+
438+
if param_value != None:
439+
if int(param_value) != param_value or int(param_value) < 0:
440+
raise exceptions.JMESPathValueError(
441+
func_name,
442+
param_name,
443+
"non-negative integer")
444+
445+
@signature({'type': 'string'}, {'type': 'string', 'optional': True})
446+
def _func_trim(self, text, chars = None):
447+
if chars == None or len(chars) == 0:
448+
return text.strip()
449+
return text.strip(chars)
450+
451+
@signature({'type': 'string'}, {'type': 'string', 'optional': True})
452+
def _func_trim_left(self, text, chars = None):
453+
if chars == None or len(chars) == 0:
454+
return text.lstrip()
455+
return text.lstrip(chars)
456+
457+
@signature({'type': 'string'}, {'type': 'string', 'optional': True})
458+
def _func_trim_right(self, text, chars = None):
459+
if chars == None or len(chars) == 0:
460+
return text.rstrip()
461+
return text.rstrip(chars)
462+
290463
@signature({"types": ['object']})
291464
def _func_values(self, arg):
292465
return list(arg.values())
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
[
2+
{
3+
"given": {
4+
"abab": "aabaaabaaaab",
5+
"string": "subject string",
6+
"split": "avg|-|min|-|max|-|mean|-|mode|-|median"
7+
},
8+
"cases": [
9+
{
10+
"expression": "find_first(string, 'string', `1`, `2`, `3`)",
11+
"error": "invalid-arity"
12+
},
13+
{
14+
"expression": "find_first(@, 'string', `1`, `2`)",
15+
"error": "invalid-type"
16+
},
17+
{
18+
"expression": "find_first(string, 'string', '1')",
19+
"error": "invalid-type"
20+
},
21+
{
22+
"expression": "find_first(string, 'string', `1`, '2')",
23+
"error": "invalid-type"
24+
},
25+
{
26+
"expression": "find_first(string, 'string', `1.3`, '2')",
27+
"error": "invalid-type"
28+
},
29+
{
30+
"expression": "find_first(string, 'string', `1`, '2.4')",
31+
"error": "invalid-value"
32+
},
33+
34+
{ "expression": "find_first(string, 'string')", "result": 8 },
35+
{ "expression": "find_first(string, 'string', `0`)", "result": 8 },
36+
{ "expression": "find_first(string, 'string', `0`, `14`)", "result": 8 },
37+
{ "expression": "find_first(string, 'string', `-6`)", "result": 8 },
38+
{ "expression": "find_first(string, 'string', `-99`, `100`)", "result": 8 },
39+
{ "expression": "find_first(string, 'string', `0`, `13`)", "result": null },
40+
{ "expression": "find_first(string, 'string', `8`)", "result": 8 },
41+
{ "expression": "find_first(string, 'string', `8`, `11`)", "result": null },
42+
{ "expression": "find_first(string, 'string', `9`)", "result": null },
43+
{ "expression": "find_first(string, 's')", "result": 0 },
44+
{ "expression": "find_first(string, 's', `1`)", "result": 8 },
45+
{ "expression": "find_first(string, '')", "result": null },
46+
{ "expression": "find_first('', '')", "result": null },
47+
48+
{ "expression": "find_last(string, 'string')", "result": 8 },
49+
{ "expression": "find_last(string, 'string', `8`)", "result": 8 },
50+
{ "expression": "find_last(string, 'string', `8`, `9`)", "result": null },
51+
{ "expression": "find_last(string, 'string', `9`)", "result": null },
52+
{ "expression": "find_last(string, 's', `1`)", "result": 8 },
53+
{ "expression": "find_last(string, 's', `-6`)", "result": 8 },
54+
{ "expression": "find_last(string, 's', `0`, `7`)", "result": 0 },
55+
{ "expression": "find_last(string, '')", "result": null },
56+
{ "expression": "find_last('', '')", "result": null },
57+
58+
{ "expression": "lower('STRING')", "result": "string" },
59+
{ "expression": "upper('string')", "result": "STRING" },
60+
61+
{
62+
"expression": "replace(abab, 'aa', '-', `0.333333`)",
63+
"error": "invalid-value"
64+
},
65+
66+
{
67+
"expression": "replace(abab, 'aa', '-', `0.001`)",
68+
"error": "invalid-value"
69+
},
70+
71+
{ "expression": "replace(abab, 'aa', '-', `0`)", "result": "aabaaabaaaab" },
72+
{ "expression": "replace(abab, 'aa', '-', `1`)", "result": "-baaabaaaab" },
73+
{ "expression": "replace(abab, 'aa', '-', `2`)", "result": "-b-abaaaab" },
74+
{ "expression": "replace(abab, 'aa', '-', `3`)", "result": "-b-ab-aab" },
75+
{ "expression": "replace(abab, 'aa', '-')", "result": "-b-ab--b" },
76+
77+
{ "expression": "trim(' subject string ')", "result": "subject string" },
78+
{ "expression": "trim(' subject string ', '')", "result": "subject string" },
79+
{ "expression": "trim(' subject string ', ' ')", "result": "subject string" },
80+
{ "expression": "trim(' subject string ', 's')", "result": " subject string " },
81+
{ "expression": "trim(' subject string ', 'su')", "result": " subject string " },
82+
{ "expression": "trim(' subject string ', 'su ')", "result": "bject string" },
83+
{ "expression": "trim(' subject string ', 'gsu ')", "result": "bject strin" },
84+
85+
{
86+
"expression": "trim('\u0009\u000A\u000B\u000C\u000D\u0020\u0085\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000')",
87+
"result": ""
88+
},
89+
90+
{ "expression": "trim_left(' subject string ')", "result": "subject string " },
91+
{ "expression": "trim_left(' subject string ', 's')", "result": " subject string " },
92+
{ "expression": "trim_left(' subject string ', 'su')", "result": " subject string " },
93+
{ "expression": "trim_left(' subject string ', 'su ')", "result": "bject string " },
94+
{ "expression": "trim_left(' subject string ', 'gsu ')", "result": "bject string " },
95+
96+
{ "expression": "trim_right(' subject string ')", "result": " subject string" },
97+
{ "expression": "trim_right(' subject string ', 's')", "result": " subject string " },
98+
{ "expression": "trim_right(' subject string ', 'su')", "result": " subject string " },
99+
{ "expression": "trim_right(' subject string ', 'su ')", "result": " subject string" },
100+
{ "expression": "trim_right(' subject string ', 'gsu ')", "result": " subject strin" },
101+
102+
{
103+
"expression": "pad_left('string', '1')",
104+
"error": "syntax"
105+
106+
},
107+
{
108+
"expression": "pad_left('string', `1`, '--')",
109+
"error": "syntax"
110+
111+
},
112+
{
113+
"expression": "pad_left('string', `1.4`)",
114+
"error": "invalid-value"
115+
116+
},
117+
118+
{ "expression": "pad_left('string', `0`)", "result": "string" },
119+
{ "expression": "pad_left('string', `5`)", "result": "string" },
120+
{ "expression": "pad_left('string', `10`)", "result": " string" },
121+
{ "expression": "pad_left('string', `10`, '-')", "result": "----string" },
122+
123+
{ "expression": "pad_right('string', `0`)", "result": "string" },
124+
{ "expression": "pad_right('string', `5`)", "result": "string" },
125+
{ "expression": "pad_right('string', `10`)", "result": "string " },
126+
{ "expression": "pad_right('string', `10`, '-')", "result": "string----" },
127+
128+
{
129+
"expression": "split('/', '/', `3.7`)",
130+
"error": "invalid-value"
131+
},
132+
133+
{ "expression": "split('/', '/')", "result": [ "", "" ] },
134+
{ "expression": "split('', '')", "result": [ ] },
135+
{ "expression": "split('all chars', '')", "result": [ "a", "l", "l", " ", "c", "h", "a", "r", "s" ] },
136+
{ "expression": "split('all chars', '', `3`)", "result": [ "a", "l", "l", " chars" ] },
137+
138+
{ "expression": "split(split, '|-|')", "result": [ "avg", "min", "max", "mean", "mode", "median" ] },
139+
{ "expression": "split(split, '|-|', `3`)", "result": [ "avg", "min", "max", "mean|-|mode|-|median" ] },
140+
{ "expression": "split(split, '|-|', `2`)", "result": [ "avg", "min", "max|-|mean|-|mode|-|median" ] },
141+
{ "expression": "split(split, '|-|', `1`)", "result": [ "avg", "min|-|max|-|mean|-|mode|-|median" ] },
142+
{ "expression": "split(split, '|-|', `0`)", "result": [ "avg|-|min|-|max|-|mean|-|mode|-|median" ] }
143+
]
144+
}
145+
]

0 commit comments

Comments
 (0)