Skip to content

Commit a188500

Browse files
authored
Merge pull request #58 from mindsdb/parse-big-strings
Workaround for big strings parsing
2 parents d12a372 + cdaf14a commit a188500

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

mindsdb_sql_parser/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
__title__ = 'mindsdb_sql_parser'
22
__package_name__ = 'mindsdb_sql_parser'
3-
__version__ = '0.11.1'
3+
__version__ = '0.11.2'
44
__description__ = "Mindsdb SQL parser"
55
__email__ = "[email protected]"
66
__author__ = 'MindsDB Inc'

sly/lex.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import re
3737
import copy
3838

39+
3940
class LexError(Exception):
4041
'''
4142
Exception raised if an invalid character is encountered and no default
@@ -326,7 +327,15 @@ def _build(cls):
326327
# Form the master regular expression
327328
#previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
328329
# cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags)
329-
cls._master_re = cls.regex_module.compile('|'.join(parts), cls.reflags)
330+
filtered_parts = []
331+
string_parts = []
332+
for p in parts:
333+
if 'QUOTE_STRING' in p or 'DQUOTE_STRING' in p:
334+
string_parts.append(p)
335+
else:
336+
filtered_parts.append(p)
337+
cls._master_re = cls.regex_module.compile('|'.join(filtered_parts), cls.reflags)
338+
cls._strings_pattern_re = cls.regex_module.compile('|'.join(string_parts), cls.reflags)
330339

331340
# Verify that that ignore and literals specifiers match the input type
332341
if not isinstance(cls.ignore, str):
@@ -360,13 +369,14 @@ def pop_state(self):
360369
self.begin(self.__state_stack.pop())
361370

362371
def tokenize(self, text, lineno=1, index=0):
363-
_ignored_tokens = _master_re = _ignore = _token_funcs = _literals = _remapping = None
372+
_ignored_tokens = _master_re =_strings_pattern_re = _ignore = _token_funcs = _literals = _remapping = None
364373

365374
# --- Support for state changes
366375
def _set_state(cls):
367-
nonlocal _ignored_tokens, _master_re, _ignore, _token_funcs, _literals, _remapping
376+
nonlocal _ignored_tokens, _master_re, _strings_pattern_re, _ignore, _token_funcs, _literals, _remapping
368377
_ignored_tokens = cls._ignored_tokens
369378
_master_re = cls._master_re
379+
_strings_pattern_re = cls._strings_pattern_re
370380
_ignore = cls.ignore
371381
_token_funcs = cls._token_funcs
372382
_literals = cls.literals
@@ -406,7 +416,11 @@ def _reject():
406416
tok = Token()
407417
tok.lineno = lineno
408418
tok.index = index
409-
m = _master_re.match(text, index)
419+
420+
m = _strings_pattern_re.match(text, index)
421+
if m is None:
422+
m = _master_re.match(text, index)
423+
410424
if m:
411425
tok.end = index = m.end()
412426
tok.value = m.group()

0 commit comments

Comments
 (0)