Skip to content

Commit 9ced4e8

Browse files
committed
feat!: add support for noop string escapes
This commit let dialects (like MySQL) allow escaping quotes that are not the current string delimiter, effectively treating the escape character as a no-op regarding the character's literal value but consuming the backslash. Previously, tokenize("""'\"a'""", "mysql") resulted in "a. With this change, it correctly results in "a. Example: In MySQL: '"' -> " (Backslash is consumed) '\"' -> " (Backslash is escaped, quote remains)
1 parent 9454a18 commit 9ced4e8

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

sqlglot/dialects/mysql.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ class Tokenizer(tokens.Tokenizer):
202202
STRING_ESCAPES = ["'", '"', "\\"]
203203
BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")]
204204
HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")]
205+
STRING_ESCAPES_NOOP = ['"', "'"]
205206

206207
NESTED_COMMENTS = False
207208

sqlglot/tokens.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ def _quotes_to_format(
548548
}
549549

550550
klass._STRING_ESCAPES = set(klass.STRING_ESCAPES)
551+
klass._STRING_ESCAPES_NOOP = set(klass.STRING_ESCAPES_NOOP)
551552
klass._IDENTIFIER_ESCAPES = set(klass.IDENTIFIER_ESCAPES)
552553
klass._COMMENTS = {
553554
**dict(
@@ -668,6 +669,7 @@ class Tokenizer(metaclass=_Tokenizer):
668669
QUOTES: t.List[t.Tuple[str, str] | str] = ["'"]
669670
STRING_ESCAPES = ["'"]
670671
VAR_SINGLE_TOKENS: t.Set[str] = set()
672+
STRING_ESCAPES_NOOP: t.List[str] = []
671673

672674
# The strings in this list can always be used as escapes, regardless of the surrounding
673675
# identifier delimiters. By default, the closing delimiter is assumed to also act as an
@@ -698,6 +700,7 @@ class Tokenizer(metaclass=_Tokenizer):
698700
_STRING_ESCAPES: t.Set[str] = set()
699701
_KEYWORD_TRIE: t.Dict = {}
700702
_RS_TOKENIZER: t.Optional[t.Any] = None
703+
_STRING_ESCAPES_NOOP: t.Set[str] = set()
701704

702705
KEYWORDS: t.Dict[str, TokenType] = {
703706
**{f"{{%{postfix}": TokenType.BLOCK_START for postfix in ("", "+", "-")},
@@ -1509,14 +1512,21 @@ def _extract_string(
15091512
self._advance(2)
15101513
text += unescaped_sequence
15111514
continue
1515+
15121516
if (
15131517
(self.STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS or not raw_string)
15141518
and self._char in escapes
1515-
and (self._peek == delimiter or self._peek in escapes)
1519+
and (
1520+
self._peek == delimiter
1521+
or self._peek in escapes
1522+
or self._peek in self.STRING_ESCAPES_NOOP
1523+
)
15161524
and (self._char not in self._QUOTES or self._char == self._peek)
15171525
):
15181526
if self._peek == delimiter:
15191527
text += self._peek
1528+
elif self._peek in self.STRING_ESCAPES_NOOP and self._char != self._peek:
1529+
text += self._peek
15201530
else:
15211531
text += self._char + self._peek
15221532

tests/dialects/test_mysql.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,19 @@ def test_escape(self):
432432
},
433433
)
434434

435+
self.validate_all(
436+
r"'\"'",
437+
write={
438+
"mysql": """\'"\'""",
439+
},
440+
)
441+
self.validate_all(
442+
"'\\\\\"a'",
443+
write={
444+
"mysql": "'\\\\\"a'",
445+
},
446+
)
447+
435448
def test_introducers(self):
436449
self.validate_all(
437450
"_utf8mb4 'hola'",

0 commit comments

Comments
 (0)