|
36 | 36 | import re |
37 | 37 | import copy |
38 | 38 |
|
| 39 | + |
39 | 40 | class LexError(Exception): |
40 | 41 | ''' |
41 | 42 | Exception raised if an invalid character is encountered and no default |
@@ -326,7 +327,15 @@ def _build(cls): |
326 | 327 | # Form the master regular expression |
327 | 328 | #previous = ('|' + cls._master_re.pattern) if cls._master_re else '' |
328 | 329 | # cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags) |
329 | | - cls._master_re = cls.regex_module.compile('|'.join(parts), cls.reflags) |
| 330 | + filtered_parts = [] |
| 331 | + string_parts = [] |
| 332 | + for p in parts: |
| 333 | + if 'QUOTE_STRING' in p or 'DQUOTE_STRING' in p: |
| 334 | + string_parts.append(p) |
| 335 | + else: |
| 336 | + filtered_parts.append(p) |
| 337 | + cls._master_re = cls.regex_module.compile('|'.join(filtered_parts), cls.reflags) |
| 338 | + cls._strings_pattern_re = cls.regex_module.compile('|'.join(string_parts), cls.reflags) |
330 | 339 |
|
331 | 340 | # Verify that that ignore and literals specifiers match the input type |
332 | 341 | if not isinstance(cls.ignore, str): |
@@ -360,13 +369,14 @@ def pop_state(self): |
360 | 369 | self.begin(self.__state_stack.pop()) |
361 | 370 |
|
362 | 371 | def tokenize(self, text, lineno=1, index=0): |
363 | | - _ignored_tokens = _master_re = _ignore = _token_funcs = _literals = _remapping = None |
| 372 | + _ignored_tokens = _master_re =_strings_pattern_re = _ignore = _token_funcs = _literals = _remapping = None |
364 | 373 |
|
365 | 374 | # --- Support for state changes |
366 | 375 | def _set_state(cls): |
367 | | - nonlocal _ignored_tokens, _master_re, _ignore, _token_funcs, _literals, _remapping |
| 376 | + nonlocal _ignored_tokens, _master_re, _strings_pattern_re, _ignore, _token_funcs, _literals, _remapping |
368 | 377 | _ignored_tokens = cls._ignored_tokens |
369 | 378 | _master_re = cls._master_re |
| 379 | + _strings_pattern_re = cls._strings_pattern_re |
370 | 380 | _ignore = cls.ignore |
371 | 381 | _token_funcs = cls._token_funcs |
372 | 382 | _literals = cls.literals |
@@ -406,7 +416,11 @@ def _reject(): |
406 | 416 | tok = Token() |
407 | 417 | tok.lineno = lineno |
408 | 418 | tok.index = index |
409 | | - m = _master_re.match(text, index) |
| 419 | + |
| 420 | + m = _strings_pattern_re.match(text, index) |
| 421 | + if m is None: |
| 422 | + m = _master_re.match(text, index) |
| 423 | + |
410 | 424 | if m: |
411 | 425 | tok.end = index = m.end() |
412 | 426 | tok.value = m.group() |
|
0 commit comments