|
19 | 19 | use function is_array; |
20 | 20 | use function is_string; |
21 | 21 | use function key; |
22 | | -use function preg_match_all; |
| 22 | +use function ctype_alpha; |
| 23 | +use function ctype_alnum; |
23 | 24 | use function sprintf; |
24 | 25 | use function strlen; |
| 26 | +use function strtoupper; |
25 | 27 | use function substr; |
26 | 28 |
|
27 | 29 | /** |
|
31 | 33 | */ |
32 | 34 | final readonly class JoinSpec |
33 | 35 | { |
34 | | - private const IDENTIFIER_PATTERN = '/\b(?!(?:AS|AND|OR|BETWEEN)\b)([a-zA-Z_]\w*+(?:\.[a-zA-Z_]\w*+)*)(?!\s*\()/i'; |
35 | | - |
36 | 36 | public string|TableIdentifier|Select|ExpressionInterface $table; |
37 | 37 | public JoinTableType $tableType; |
38 | 38 | public ?string $alias; |
@@ -87,35 +87,46 @@ public function __construct(array $join) |
87 | 87 | $this->onTokens = is_string($on) ? self::tokenizeOn($on) : null; |
88 | 88 | } |
89 | 89 |
|
90 | | - /** |
91 | | - * Tokenize a string ON clause into Identifier and Literal tokens. |
92 | | - * Identifiers are word-like tokens excluding SQL keywords and function calls. |
93 | | - * |
94 | | - * @return ArgumentInterface[] |
95 | | - */ |
| 90 | + /** @return ArgumentInterface[] */ |
96 | 91 | private static function tokenizeOn(string $on): array |
97 | 92 | { |
98 | | - preg_match_all(self::IDENTIFIER_PATTERN, $on, $matches, PREG_OFFSET_CAPTURE); |
99 | | - |
100 | | - if ($matches[0] === []) { |
101 | | - return [new Literal($on)]; |
102 | | - } |
103 | | - |
104 | | - $tokens = []; |
105 | | - $pos = 0; |
106 | | - |
107 | | - foreach ($matches[0] as [$match, $offset]) { |
108 | | - if ($offset > $pos) { |
109 | | - $tokens[] = new Literal(substr($on, $pos, $offset - $pos)); |
| 93 | + $tokens = []; |
| 94 | + $len = strlen($on); |
| 95 | + $pos = 0; |
| 96 | + $literalStart = 0; |
| 97 | + |
| 98 | + while ($pos < $len) { |
| 99 | + $ch = $on[$pos]; |
| 100 | + |
| 101 | + if ($ch === '_' || ctype_alpha($ch)) { |
| 102 | + $wordStart = $pos++; |
| 103 | + while ($pos < $len && ($on[$pos] === '_' || $on[$pos] === '.' || ctype_alnum($on[$pos]))) { |
| 104 | + $pos++; |
| 105 | + } |
| 106 | + |
| 107 | + $word = substr($on, $wordStart, $pos - $wordStart); |
| 108 | + $upper = strtoupper($word); |
| 109 | + |
| 110 | + if ($upper === 'AND' || $upper === 'OR' || $upper === 'AS' || $upper === 'BETWEEN' |
| 111 | + || ($pos < $len && $on[$pos] === '(') |
| 112 | + ) { |
| 113 | + continue; |
| 114 | + } |
| 115 | + |
| 116 | + if ($wordStart > $literalStart) { |
| 117 | + $tokens[] = new Literal(substr($on, $literalStart, $wordStart - $literalStart)); |
| 118 | + } |
| 119 | + $tokens[] = new Identifier($word); |
| 120 | + $literalStart = $pos; |
| 121 | + } else { |
| 122 | + $pos++; |
110 | 123 | } |
111 | | - $tokens[] = new Identifier($match); |
112 | | - $pos = $offset + strlen($match); |
113 | 124 | } |
114 | 125 |
|
115 | | - if ($pos < strlen($on)) { |
116 | | - $tokens[] = new Literal(substr($on, $pos)); |
| 126 | + if ($literalStart < $len) { |
| 127 | + $tokens[] = new Literal(substr($on, $literalStart)); |
117 | 128 | } |
118 | 129 |
|
119 | | - return $tokens; |
| 130 | + return $tokens ?: [new Literal($on)]; |
120 | 131 | } |
121 | 132 | } |
0 commit comments