|
37 | 37 | import copy |
38 | 38 |
|
39 | 39 |
|
40 | | -_strings_pattern_re = re.compile('(?P<QUOTE_STRING>(\'(?:\\\\.|[^\'])*(?:\'\'(?:\\\\.|[^\'])*)*\'))|(?P<DQUOTE_STRING>("(?:\\\\.|[^"])*"))') |
41 | | - |
42 | | - |
43 | 40 | class LexError(Exception): |
44 | 41 | ''' |
45 | 42 | Exception raised if an invalid character is encountered and no default |
@@ -330,7 +327,15 @@ def _build(cls): |
330 | 327 | # Form the master regular expression |
331 | 328 | #previous = ('|' + cls._master_re.pattern) if cls._master_re else '' |
332 | 329 | # cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags) |
333 | | - cls._master_re = cls.regex_module.compile('|'.join(parts), cls.reflags) |
| 330 | + filtered_parts = [] |
| 331 | + string_parts = [] |
| 332 | + for p in parts: |
| 333 | + if 'QUOTE_STRING' in p or 'DQUOTE_STRING' in p: |
| 334 | + string_parts.append(p) |
| 335 | + else: |
| 336 | + filtered_parts.append(p) |
| 337 | + cls._master_re = cls.regex_module.compile('|'.join(filtered_parts), cls.reflags) |
| 338 | + cls._strings_pattern_re = cls.regex_module.compile('|'.join(string_parts), cls.reflags) |
334 | 339 |
|
335 | 340 | # Verify that that ignore and literals specifiers match the input type |
336 | 341 | if not isinstance(cls.ignore, str): |
@@ -364,13 +369,14 @@ def pop_state(self): |
364 | 369 | self.begin(self.__state_stack.pop()) |
365 | 370 |
|
366 | 371 | def tokenize(self, text, lineno=1, index=0): |
367 | | - _ignored_tokens = _master_re = _ignore = _token_funcs = _literals = _remapping = None |
| 372 | + _ignored_tokens = _master_re =_strings_pattern_re = _ignore = _token_funcs = _literals = _remapping = None |
368 | 373 |
|
369 | 374 | # --- Support for state changes |
370 | 375 | def _set_state(cls): |
371 | | - nonlocal _ignored_tokens, _master_re, _ignore, _token_funcs, _literals, _remapping |
| 376 | + nonlocal _ignored_tokens, _master_re, _strings_pattern_re, _ignore, _token_funcs, _literals, _remapping |
372 | 377 | _ignored_tokens = cls._ignored_tokens |
373 | 378 | _master_re = cls._master_re |
| 379 | + _strings_pattern_re = cls._strings_pattern_re |
374 | 380 | _ignore = cls.ignore |
375 | 381 | _token_funcs = cls._token_funcs |
376 | 382 | _literals = cls.literals |
@@ -411,9 +417,9 @@ def _reject(): |
411 | 417 | tok.lineno = lineno |
412 | 418 | tok.index = index |
413 | 419 |
|
414 | | - m = _master_re.match(text, index, index + 50) |
| 420 | + m = _strings_pattern_re.match(text, index) |
415 | 421 | if m is None: |
416 | | - m = _strings_pattern_re.match(text, index) |
| 422 | + m = _master_re.match(text, index) |
417 | 423 |
|
418 | 424 | if m: |
419 | 425 | tok.end = index = m.end() |
|
0 commit comments