@@ -8,8 +8,7 @@ class Lexer
88{
99 private $ regex , $ offsetToToken ;
1010
11- /** @var array Map of regular expressions to their token match value */
12- private $ tokenMap = [
11+ private $ tokens = [
1312 '[a-zA-Z_][a-zA-Z_0-9]* ' => 'identifier ' ,
1413 '\. ' => 'dot ' ,
1514 '\* ' => 'star ' ,
@@ -41,16 +40,13 @@ class Lexer
4140
4241 public function __construct ()
4342 {
44- $ this ->regex = '(( ' .
45- implode (')|( ' , array_keys ($ this ->tokenMap )) . ')) ' ;
46- $ this ->offsetToToken = array_values ($ this ->tokenMap );
43+ $ this ->regex = '(( ' . implode (')|( ' , array_keys ($ this ->tokens )) . ')) ' ;
44+ $ this ->offsetToToken = array_values ($ this ->tokens );
4745 }
4846
4947 /**
50- * Tokenize the JMESPath expression into an array of tokens.
51- *
52- * Each token array contains a type, value, and pos key along with any
53- * other keys that might be relevant to the particular token.
48+ * Tokenize the JMESPath expression into an array of tokens hashes that
49+ * contain a 'type', 'value', and 'key'.
5450 *
5551 * @param string $input JMESPath input
5652 *
@@ -59,23 +55,16 @@ public function __construct()
5955 */
6056 public function tokenize ($ input )
6157 {
62- $ offset = 0 ;
63- $ tokens = [];
64-
6558 if (!preg_match_all ($ this ->regex , $ input , $ matches , PREG_SET_ORDER )) {
66- $ this ->throwSyntax ('Invalid expression ' , $ offset , $ input );
59+ throw $ this ->throwSyntax ('Invalid expression ' , 0 , $ input );
6760 }
6861
62+ $ offset = 0 ;
63+ $ tokens = [];
6964 foreach ($ matches as $ match ) {
7065 $ type = $ this ->offsetToToken [count ($ match ) - 2 ];
71-
7266 if ($ type !== 'skip ' ) {
73- $ token = [
74- 'type ' => $ type ,
75- 'value ' => $ match [0 ],
76- 'pos ' => $ offset
77- ];
78-
67+ $ token = ['type ' => $ type , 'value ' => $ match [0 ], 'pos ' => $ offset ];
7968 switch ($ token ['type ' ]) {
8069 case 'quoted_identifier ' :
8170 $ token ['value ' ] = $ this ->decodeJson (
@@ -101,10 +90,8 @@ public function tokenize($input)
10190 $ offset += strlen ($ match [0 ]);
10291 }
10392
104- // Always end the token stream with an EOF token
10593 $ tokens [] = ['type ' => 'eof ' , 'pos ' => $ offset , 'value ' => null ];
10694
107- // Ensure that the expression did not contain invalid characters
10895 if (strlen ($ input ) != $ offset ) {
10996 $ this ->invalidExpression ($ input );
11097 }
@@ -114,31 +101,16 @@ public function tokenize($input)
114101
115102 private function takeLiteral ($ value , $ offset , $ input )
116103 {
117- // Maps common JavaScript primitives with a native PHP primitive
118- static $ primitives = ['true ' => 0 , 'false ' => 1 , 'null ' => 2 ];
119- static $ primitiveMap = [true , false , null ];
120- // If a literal starts with these characters, it is JSON decoded
121- static $ decodeCharacters = ['" ' => 1 , '[ ' => 1 , '{ ' => 1 ];
122-
104+ static $ valid = '/(true|false|null)|(^[\["{])|(^\-?[0-9]*(\.[0-9]+)?([e|E][+|\-][0-9]+)?$)/ ' ;
123105 $ value = str_replace ('\\` ' , '` ' , ltrim (substr ($ value , 1 , -1 )));
124106
125- if (isset ($ primitives [$ value ])) {
126- // Fast lookups for common JSON primitives
127- return $ primitiveMap [$ primitives [$ value ]];
128- } elseif (strlen ($ value ) == 0 ) {
129- $ this ->throwSyntax ('Empty JSON literal ' , $ offset , $ input );
130- } elseif (isset ($ decodeCharacters [$ value [0 ]])) {
131- // Always decode the JSON directly if it starts with these chars
132- return $ this ->decodeJson ($ value , $ offset , $ input );
133- } elseif (preg_match (
134- '/^\-?[0-9]*(\.[0-9]+)?([e|E][+|\-][0-9]+)?$/ ' ,
135- $ value
136- )) {
137- // If it starts with a "-" or numbers, then attempt to JSON decode
138- return $ this ->decodeJson ($ value , $ offset , $ input );
107+ if ($ value === '' ) {
108+ throw $ this ->throwSyntax ('Empty JSON literal ' , $ offset , $ input );
139109 }
140110
141- return $ this ->decodeJson ('" ' . $ value . '" ' , $ offset , $ input );
111+ return preg_match ($ valid , $ value )
112+ ? $ this ->decodeJson ($ value , $ offset , $ input )
113+ : $ this ->decodeJson ('" ' . $ value . '" ' , $ offset , $ input );
142114 }
143115
144116 private function decodeJson ($ json , $ offset , $ input )
@@ -155,7 +127,7 @@ private function decodeJson($json, $offset, $input)
155127
156128 if ($ error = json_last_error ()) {
157129 $ message = isset ($ errs [$ error ]) ? $ errs [$ error ] : 'Unknown error ' ;
158- $ this ->throwSyntax (
130+ throw $ this ->throwSyntax (
159131 "Error decoding JSON: ( {$ error }) {$ message }, given {$ json }" ,
160132 $ offset ,
161133 $ input
@@ -167,7 +139,7 @@ private function decodeJson($json, $offset, $input)
167139
168140 private function throwSyntax ($ message , $ offset , $ input )
169141 {
170- throw new SyntaxErrorException (
142+ return new SyntaxErrorException (
171143 $ message ,
172144 ['value ' => substr ($ input , $ offset , 1 ), 'pos ' => $ offset ],
173145 $ input
@@ -177,12 +149,10 @@ private function throwSyntax($message, $offset, $input)
177149 private function invalidExpression ($ input )
178150 {
179151 $ offset = 0 ;
180- $ regex = $ this ->regex . 'A ' ;
181-
182- while (preg_match ($ regex , $ input , $ matches , 0 , $ offset )) {
152+ while (preg_match ("{$ this ->regex }A " , $ input , $ matches , 0 , $ offset )) {
183153 $ offset += strlen ($ matches [0 ]);
184154 }
185155
186- $ this ->throwSyntax ('Unexpected character ' , $ offset , $ input );
156+ throw $ this ->throwSyntax ('Unexpected character ' , $ offset , $ input );
187157 }
188158}
0 commit comments