Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Alphabetical list of contributors:
* Florian Bauer <[email protected]>
* Fredy Wijaya <[email protected]>
* Gavin Wahl <[email protected]>
* Guillaume Tassery <[email protected]>
* hurcy <[email protected]>
* Ian Robertson <[email protected]>
* JacekPliszka <[email protected]>
Expand Down
19 changes: 11 additions & 8 deletions sqlparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,31 @@
__all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli']


def parse(sql, encoding=None):
def parse(sql, encoding=None, lexer=None):
"""Parse sql and return a list of statements.

:param sql: A string containing one or more SQL statements.
:param encoding: The encoding of the statement (optional).
:param lexer: A custom SQL query lexer (optional).
:returns: A tuple of :class:`~sqlparse.sql.Statement` instances.
"""
return tuple(parsestream(sql, encoding))
return tuple(parsestream(sql, encoding=encoding, lexer=lexer))


def parsestream(stream, encoding=None):
def parsestream(stream, encoding=None, lexer=None):
"""Parses sql statements from file-like object.

:param stream: A file-like object.
:param encoding: The encoding of the stream contents (optional).
:param lexer: A custom SQL query lexer (optional).
:returns: A generator of :class:`~sqlparse.sql.Statement` instances.
"""
stack = engine.FilterStack()
stack.enable_grouping()
return stack.run(stream, encoding)
return stack.run(stream, encoding=encoding, custom_lexer=lexer)


def format(sql, encoding=None, **options):
def format(sql, encoding=None, lexer=None, **options):
"""Format *sql* according to *options*.

Available options are documented in :ref:`formatting`.
Expand All @@ -56,15 +58,16 @@ def format(sql, encoding=None, **options):
options = formatter.validate_options(options)
stack = formatter.build_filter_stack(stack, options)
stack.postprocess.append(filters.SerializerUnicode())
return ''.join(stack.run(sql, encoding))
return ''.join(stack.run(sql, encoding=encoding, custom_lexer=lexer))


def split(sql, encoding=None):
def split(sql, encoding=None, lexer=None):
"""Split *sql* into single statements.

:param sql: A string containing one or more SQL statements.
:param encoding: The encoding of the statement (optional).
:param lexer: A custom SQL query lexer (optional).
:returns: A list of strings.
"""
stack = engine.FilterStack()
return [str(stmt).strip() for stmt in stack.run(sql, encoding)]
return [str(stmt).strip() for stmt in stack.run(sql, encoding=encoding, custom_lexer=lexer)]
4 changes: 2 additions & 2 deletions sqlparse/engine/filter_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def __init__(self):
def enable_grouping(self):
self._grouping = True

def run(self, sql, encoding=None):
stream = lexer.tokenize(sql, encoding)
def run(self, sql, encoding=None, custom_lexer=None):
stream = lexer.tokenize(sql, encoding=encoding, custom_lexer=custom_lexer)
# Process token stream
for filter_ in self.preprocess:
stream = filter_.process(stream)
Expand Down
8 changes: 6 additions & 2 deletions sqlparse/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ class Lexer:

_default_intance = None

def __init__(self):
self.clear()

# Development notes:
# - This class is prepared to be able to support additional SQL dialects
# in the future by adding additional functions that take the place of
Expand Down Expand Up @@ -146,10 +149,11 @@ def get_tokens(self, text, encoding=None):
yield tokens.Error, char


def tokenize(sql, encoding=None):
def tokenize(sql, encoding=None, custom_lexer=None):
"""Tokenize sql.

Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
of ``(token type, value)`` items.
"""
return Lexer.get_default_instance().get_tokens(sql, encoding)
lexer = Lexer.get_default_instance() if custom_lexer is None else custom_lexer
return lexer.get_tokens(sql, encoding)
32 changes: 32 additions & 0 deletions tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,3 +566,35 @@ def test_configurable_regex():
for t in tokens
if t.ttype not in sqlparse.tokens.Whitespace
)[4] == (sqlparse.tokens.Keyword, "zorder by")


def test_custom_lexer():
lex = Lexer()
my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword)

lex.set_SQL_REGEX(
keywords.SQL_REGEX[:38]
+ [my_regex]
+ keywords.SQL_REGEX[38:]
)
lex.add_keywords(keywords.KEYWORDS_COMMON)
lex.add_keywords(keywords.KEYWORDS_ORACLE)
lex.add_keywords(keywords.KEYWORDS_PLPGSQL)
lex.add_keywords(keywords.KEYWORDS_HQL)
lex.add_keywords(keywords.KEYWORDS_MSACCESS)
lex.add_keywords(keywords.KEYWORDS)

tokens = sqlparse.parse("select * from foo zorder by bar;", lexer=lex)[0]
assert list(
(t.ttype, t.value)
for t in tokens
if t.ttype not in sqlparse.tokens.Whitespace
)[4] == (sqlparse.tokens.Keyword, "zorder by")

# Should not impact the parse who has a default configuration
tokens = sqlparse.parse("select * from foo forder by bar;")[0]
assert list(
(t.ttype, t.value)
for t in tokens
if t.ttype not in sqlparse.tokens.Whitespace
)[4] != (sqlparse.tokens.Keyword, "zorder by")