Skip to content

Commit 422b3e3

Browse files
committed
Expression: Rewrote expression parsing.
1 parent 466b59c commit 422b3e3

File tree

1 file changed

+123
-111
lines changed

1 file changed

+123
-111
lines changed

src/Components/Expression.php

Lines changed: 123 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@
2828
class Expression extends Component
2929
{
3030

31+
/**
32+
* List of allowed reserved keywords in expressions.
33+
*
34+
* @var array
35+
*/
36+
private static $ALLOWED_KEYWORDS = array(
37+
'AS' => 1, 'DUAL' => 1, 'NULL' => 1, 'REGEXP' => 1
38+
);
39+
3140
/**
3241
* The name of this database.
3342
*
@@ -137,9 +146,9 @@ public static function parse(Parser $parser, TokensList $list, array $options =
137146
/**
138147
* Whether an alias is expected. Is 2 if `AS` keyword was found.
139148
*
140-
* @var int $alias
149+
* @var bool $alias
141150
*/
142-
$alias = 0;
151+
$alias = false;
143152

144153
/**
145154
* Counts brackets.
@@ -149,17 +158,14 @@ public static function parse(Parser $parser, TokensList $list, array $options =
149158
$brackets = 0;
150159

151160
/**
152-
* Keeps track of the previous token.
153-
* Possible values:
154-
* string, if function was previously found;
155-
* true, if opening bracket was previously found;
156-
* null, in any other case.
161+
* Keeps track of the last two previous tokens.
157162
*
158-
* @var string|bool $prev
163+
* @var Token[] $prev
159164
*/
160-
$prev = null;
165+
$prev = array(null, null);
161166

162167
for (; $list->idx < $list->count; ++$list->idx) {
168+
163169
/**
164170
* Token parsed at this moment.
165171
*
@@ -173,59 +179,74 @@ public static function parse(Parser $parser, TokensList $list, array $options =
173179
}
174180

175181
// Skipping whitespaces and comments.
176-
if (($token->type === Token::TYPE_WHITESPACE) || ($token->type === Token::TYPE_COMMENT)) {
177-
if (($isExpr) && (!$alias)) {
182+
if (($token->type === Token::TYPE_WHITESPACE)
183+
|| ($token->type === Token::TYPE_COMMENT)
184+
) {
185+
if ($isExpr) {
178186
$ret->expr .= $token->token;
179187
}
180-
if (($alias === 0) && (empty($options['noAlias'])) && (!$isExpr) && (!$dot) && (!empty($ret->expr))) {
181-
$alias = 1;
182-
}
183188
continue;
184189
}
185190

186-
if (($token->type === Token::TYPE_KEYWORD)
187-
&& ($token->flags & Token::FLAG_KEYWORD_RESERVED)
188-
&& ($token->value !== 'DUAL')
189-
&& ($token->value !== 'NULL')
190-
) {
191-
// Keywords may be found only between brackets.
192-
if ($brackets === 0) {
193-
if ((empty($options['noAlias'])) && ($token->value === 'AS')) {
194-
$alias = 2;
195-
continue;
196-
}
197-
if (!($token->flags & Token::FLAG_KEYWORD_FUNCTION)) {
191+
if ($token->type === Token::TYPE_KEYWORD) {
192+
if (($brackets > 0) && (empty($ret->subquery))
193+
&& (!empty(Parser::$STATEMENT_PARSERS[$token->value]))
194+
) {
195+
// A `(` was previously found and this keyword is the
196+
// beginning of a statement, so this is a subquery.
197+
$ret->subquery = $token->value;
198+
} elseif ($token->flags & Token::FLAG_KEYWORD_FUNCTION) {
199+
$isExpr = true;
200+
} elseif (($token->flags & Token::FLAG_KEYWORD_RESERVED)
201+
&& ($brackets === 0)
202+
) {
203+
if (empty(self::$ALLOWED_KEYWORDS[$token->value])) {
204+
// A reserved keyword that is not allowed in the
205+
// expression was found so the expression must have
206+
// ended and a new clause is starting.
198207
break;
199208
}
200-
} elseif ($prev === true) {
201-
if ((empty($ret->subquery) && (!empty(Parser::$STATEMENT_PARSERS[$token->value])))) {
202-
// A `(` was previously found and this keyword is the
203-
// beginning of a statement, so this is a subquery.
204-
$ret->subquery = $token->value;
209+
if ($token->value === 'AS') {
210+
if (!empty($options['noAlias'])) {
211+
break;
212+
}
213+
if (!empty($ret->alias)) {
214+
$parser->error(
215+
__('An alias was previously found.'),
216+
$token
217+
);
218+
break;
219+
}
220+
$alias = true;
221+
continue;
205222
}
223+
$isExpr = true;
206224
}
207225
}
208226

209227
if ($token->type === Token::TYPE_OPERATOR) {
210228
if ((!empty($options['noBrackets']))
211229
&& (($token->value === '(') || ($token->value === ')'))
212230
) {
231+
// No brackets were expected.
213232
break;
214233
}
215234
if ($token->value === '(') {
216235
++$brackets;
217-
if ((empty($ret->function)) && ($prev !== null) && ($prev !== true)) {
218-
// A function name was previously found and now an open
219-
// bracket, so this is a function call.
220-
$ret->function = $prev;
236+
if ((empty($ret->function)) && ($prev[1] !== null)
237+
&& (($prev[1]->type === Token::TYPE_NONE)
238+
|| ($prev[1]->type === Token::TYPE_SYMBOL)
239+
|| (($prev[1]->type === Token::TYPE_KEYWORD)
240+
&& ($prev[1]->flags & Token::FLAG_KEYWORD_FUNCTION)))
241+
) {
242+
$ret->function = $prev[1]->value;
221243
}
222-
$isExpr = true;
223244
} elseif ($token->value === ')') {
224245
--$brackets;
225246
if ($brackets === 0) {
226247
if (!empty($options['bracketsDelimited'])) {
227-
// The current token is the last brackets, the next
228-
// one will be outside.
248+
// The current token is the last bracket, the next
249+
// one will be outside the expression.
229250
$ret->expr .= $token->token;
230251
++$list->idx;
231252
break;
@@ -236,109 +257,100 @@ public static function parse(Parser $parser, TokensList $list, array $options =
236257
break;
237258
}
238259
} elseif ($token->value === ',') {
260+
// Expressions are comma-delimited.
239261
if ($brackets === 0) {
240262
break;
241263
}
242264
}
243265
}
244266

245-
if (($token->type === Token::TYPE_NUMBER) || ($token->type === Token::TYPE_BOOL)
246-
|| (($token->type === Token::TYPE_SYMBOL) && ($token->flags & Token::FLAG_SYMBOL_VARIABLE))
247-
|| (($token->type === Token::TYPE_OPERATOR)) && ($token->value !== '.')
267+
if (($token->type === Token::TYPE_NUMBER)
268+
|| ($token->type === Token::TYPE_BOOL)
269+
|| (($token->type === Token::TYPE_SYMBOL)
270+
&& ($token->flags & Token::FLAG_SYMBOL_VARIABLE))
271+
|| (($token->type === Token::TYPE_OPERATOR)
272+
&& ($token->value !== '.'))
248273
) {
249-
// Numbers, booleans and operators are usually part of expressions.
274+
// Numbers, booleans and operators (except dot) are usually part
275+
// of expressions.
250276
$isExpr = true;
251277
}
252278

279+
// Saving the previous token.
280+
$prev[0] = $prev[1];
281+
$prev[1] = $token;
282+
253283
if ($alias) {
254284
// An alias is expected (the keyword `AS` was previously found).
255285
if (!empty($ret->alias)) {
256286
$parser->error(__('An alias was previously found.'), $token);
287+
break;
257288
}
258289
$ret->alias = $token->value;
259-
$alias = 0;
260-
} else {
261-
if (!$isExpr) {
262-
if (($token->type === Token::TYPE_OPERATOR) && ($token->value === '.')) {
263-
// Found a `.` which means we expect a column name and
264-
// the column name we parsed is actually the table name
265-
// and the table name is actually a database name.
266-
if ((!empty($ret->database)) || ($dot)) {
267-
$parser->error(__('Unexpected dot.'), $token);
268-
}
269-
$ret->database = $ret->table;
270-
$ret->table = $ret->column;
271-
$ret->column = null;
272-
$dot = true;
273-
} else {
274-
// We found the name of a column (or table if column
275-
// field should be skipped; used to parse table names).
276-
$field = (!empty($options['skipColumn'])) ? 'table' : 'column';
277-
if (!empty($ret->$field)) {
278-
// No alias is expected.
279-
if (!empty($options['noAlias'])) {
280-
break;
281-
}
282-
283-
// Parsing aliases without `AS` keyword and any
284-
// whitespace.
285-
// Example: SELECT 1`foo`
286-
if (($token->type === Token::TYPE_STRING)
287-
|| (($token->type === Token::TYPE_SYMBOL)
288-
&& ($token->flags & Token::FLAG_SYMBOL_BACKTICK))
289-
) {
290-
if (!empty($ret->alias)) {
291-
$parser->error(
292-
__('An alias was previously found.'),
293-
$token
294-
);
295-
}
296-
$ret->alias = $token->value;
297-
}
298-
} else {
299-
$ret->$field = $token->value;
300-
}
301-
$dot = false;
290+
$alias = false;
291+
} elseif ($isExpr) {
292+
// Handling aliases.
293+
if (/* (empty($ret->alias)) && */ ($brackets === 0)
294+
&& (($prev[0] === null)
295+
|| ((($prev[0]->type !== Token::TYPE_OPERATOR)
296+
|| ($prev[0]->token === ')'))
297+
&& (($prev[0]->type !== Token::TYPE_KEYWORD)
298+
|| (!($prev[0]->flags & Token::FLAG_KEYWORD_RESERVED)))))
299+
&& (($prev[1]->type === Token::TYPE_STRING)
300+
|| (($prev[1]->type === Token::TYPE_SYMBOL)
301+
&& (!($prev[1]->flags & Token::FLAG_SYMBOL_VARIABLE)))
302+
|| ($prev[1]->type === Token::TYPE_NONE))
303+
) {
304+
if (!empty($ret->alias)) {
305+
$parser->error(__('An alias was previously found.'), $token);
306+
break;
302307
}
308+
$ret->alias = $prev[1]->value;
303309
} else {
304-
// Parsing aliases without `AS` keyword.
305-
// Example: SELECT 'foo' `bar`
306-
if (($brackets === 0) && (empty($options['noAlias']))) {
307-
if (($token->type === Token::TYPE_NONE) || ($token->type === Token::TYPE_STRING)
308-
|| (($token->type === Token::TYPE_SYMBOL) && ($token->flags & Token::FLAG_SYMBOL_BACKTICK))
309-
) {
310-
if (!empty($ret->alias)) {
311-
$parser->error(
312-
__('An alias was previously found.'),
313-
$token
314-
);
315-
}
316-
$ret->alias = $token->value;
317-
continue;
310+
$ret->expr .= $token->token;
311+
}
312+
} elseif (!$isExpr) {
313+
if (($token->type === Token::TYPE_OPERATOR) && ($token->value === '.')) {
314+
// Found a `.` which means we expect a column name and
315+
// the column name we parsed is actually the table name
316+
// and the table name is actually a database name.
317+
if ((!empty($ret->database)) || ($dot)) {
318+
$parser->error(__('Unexpected dot.'), $token);
319+
}
320+
$ret->database = $ret->table;
321+
$ret->table = $ret->column;
322+
$ret->column = null;
323+
$dot = true;
324+
$ret->expr .= $token->token;
325+
} else {
326+
$field = (!empty($options['skipColumn'])) ? 'table' : 'column';
327+
if (empty($ret->$field)) {
328+
$ret->$field = $token->value;
329+
$ret->expr .= $token->token;
330+
$dot = false;
331+
} else {
332+
// No alias is expected.
333+
if (!empty($options['noAlias'])) {
334+
break;
335+
}
336+
if (!empty($ret->alias)) {
337+
$parser->error(__('An alias was previously found.'), $token);
338+
break;
318339
}
340+
$ret->alias = $token->value;
319341
}
320342
}
321-
322-
$ret->expr .= $token->token;
323-
}
324-
325-
if (($token->type === Token::TYPE_KEYWORD) && ($token->flags & Token::FLAG_KEYWORD_FUNCTION)) {
326-
$prev = strtoupper($token->value);
327-
} elseif (($token->type === Token::TYPE_OPERATOR) || ($token->value === '(')) {
328-
$prev = true;
329-
} else {
330-
$prev = null;
331343
}
332344
}
333345

334-
if ($alias === 2) {
346+
if ($alias) {
335347
$parser->error(
336348
__('An alias was expected.'),
337349
$list->tokens[$list->idx - 1]
338350
);
339351
}
340352

341-
// Whitespaces might be added at the end.
353+
// White-spaces might be added at the end.
342354
$ret->expr = trim($ret->expr);
343355

344356
if (empty($ret->expr)) {

0 commit comments

Comments
 (0)