@@ -82,6 +82,31 @@ class Tokenizer
8282 */
8383 protected $ nextToken = [];
8484
85+ /**
86+ * @var int
87+ */
88+ protected $ currentStringLength = 0 ;
89+
90+ /**
91+ * @var int
92+ */
93+ protected $ oldStringLength = 0 ;
94+
95+ /**
96+ * @var string
97+ */
98+ protected $ previousToken = '' ;
99+
100+ /**
101+ * @var int
102+ */
103+ protected $ tokenLength = 0 ;
104+
105+ /**
106+ * @var array
107+ */
108+ protected $ tokens = [];
109+
85110
86111 /**
87112 * Builds all the regular expressions needed to Tokenize the input.
@@ -119,30 +144,63 @@ protected function initRegex($variable)
119144 */
120145 public function tokenize ($ string )
121146 {
122- $ tokens = [];
147+ return (strlen ($ string ) > 0 ) ? $ this ->processTokens ($ string ) : [];
148+ }
123149
124- if (strlen ($ string ) > 0 ) {
125- $ token = null ;
126- $ currentStringLength = strlen ($ string );
127- $ oldStringLength = strlen ($ string ) + 1 ;
150+ /**
151+ * @param $string
152+ *
153+ * @return array
154+ */
155+ protected function processTokens ($ string )
156+ {
157+ $ this ->tokens = [];
158+ $ this ->previousToken = '' ;
159+ $ this ->currentStringLength = strlen ($ string );
160+ $ this ->oldStringLength = strlen ($ string ) + 1 ;
161+
162+ while ($ this ->currentStringLength >= 0 ) {
163+ if ($ this ->oldStringLength <= $ this ->currentStringLength ) {
164+ break ;
165+ }
166+ $ string = $ this ->processOneToken ($ string );
167+ }
168+ return $ this ->tokens ;
169+ }
128170
129- while ($ currentStringLength >= 0 ) {
130- if ($ oldStringLength <= $ currentStringLength ) {
131- break ;
132- }
171+ /**
172+ * @param $string
173+ *
174+ * @return string
175+ */
176+ protected function processOneToken ($ string )
177+ {
178+ $ token = $ this ->getToken ($ string , $ this ->currentStringLength , $ this ->previousToken );
179+ $ this ->tokens [] = $ token ;
180+ $ this ->tokenLength = strlen ($ token [self ::TOKEN_VALUE ]);
181+ $ this ->previousToken = $ token ;
133182
134- $ token = $ this ->getToken ($ string , $ currentStringLength , $ token );
135- $ tokens [] = $ token ;
136- $ tokenLength = strlen ($ token [self ::TOKEN_VALUE ]);
183+ $ this ->oldStringLength = $ this ->currentStringLength ;
184+ $ this ->currentStringLength -= $ this ->tokenLength ;
137185
138- $ oldStringLength = $ currentStringLength ;
139- $ currentStringLength -= $ tokenLength ;
186+ return substr ( $ string , $ this -> tokenLength ) ;
187+ }
140188
141- $ string = substr ($ string , $ tokenLength );
142- }
189+ /**
190+ * @param $string
191+ * @param $currentStringLength
192+ * @param $previousToken
193+ *
194+ * @return array|mixed
195+ */
196+ protected function getToken ($ string , $ currentStringLength , $ previousToken )
197+ {
198+ $ cacheKey = $ this ->useTokenCache ($ string , $ currentStringLength );
199+ if (!empty ($ cacheKey ) && isset ($ this ->tokenCache [$ cacheKey ])) {
200+ return $ this ->getNextTokenFromCache ($ cacheKey );
143201 }
144202
145- return $ tokens ;
203+ return $ this -> getNextTokenFromString ( $ string , $ previousToken , $ cacheKey ) ;
146204 }
147205
148206 /**
@@ -289,22 +347,4 @@ protected function quoteRegex($string)
289347 {
290348 return preg_quote ($ string , '/ ' );
291349 }
292-
293- /**
294- * @param $string
295- * @param $currentStringLength
296- * @param $token
297- *
298- * @return array|mixed
299- */
300- protected function getToken ($ string , $ currentStringLength , $ token )
301- {
302- $ cacheKey = $ this ->useTokenCache ($ string , $ currentStringLength );
303- if (!empty ($ cacheKey ) && isset ($ this ->tokenCache [$ cacheKey ])) {
304- $ token = $ this ->getNextTokenFromCache ($ cacheKey );
305- } else {
306- $ token = $ this ->getNextTokenFromString ($ string , $ token , $ cacheKey );
307- }
308- return $ token ;
309- }
310350}
0 commit comments