Skip to content

Commit 209b4d3

Browse files
Martin ZoellnerMartin Zoellner
authored andcommitted
added tests and fixed issues with existing tests
1 parent 0978145 commit 209b4d3

File tree

5 files changed

+424
-312
lines changed

5 files changed

+424
-312
lines changed

simple_ddl_parser/dialects/sql.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,9 @@ def p_expression_comment_on(self, p: List):
971971
p[0] = {"comment_on": comment_on}
972972
p_list = list(p)
973973
obj_type = p_list[3]
974-
comment_on["comment"] = p_list[-1]
974+
975+
# Cleanse comment quotes and handle escaped quotes
976+
comment_on["comment"] = p_list[-1][1:-1].replace("''", "'")
975977
comment_on["object_type"] = obj_type
976978

977979
if obj_type == "COLUMN":
@@ -1055,7 +1057,7 @@ def p_id_equals(self, p: List) -> None:
10551057
"""
10561058
p_list = list(p)
10571059

1058-
if not p_list[-1] in [")", "]"]:
1060+
if p_list[-1] not in [")", "]"]:
10591061
p[0] = {p[1]: p_list[-1]}
10601062
else:
10611063
if len(p_list) > 6 and isinstance(p_list[5], list):

simple_ddl_parser/output/base_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,13 +285,13 @@ def set_object_comment(self, statement: Dict) -> None:
285285

286286
def set_table_comment(self, statement: Dict) -> None:
287287
comment = statement["comment_on"]
288-
self.comment = comment["comment"][1:-1]
288+
self.comment = comment["comment"]
289289

290290
def set_column_comments(self, statement: Dict) -> None:
291291
comment = statement["comment_on"]
292292
for column in self.columns:
293293
if column["name"] == comment["column_name"]:
294-
column["comment"] = comment["comment"][1:-1]
294+
column["comment"] = comment["comment"]
295295
break
296296

297297
def set_default_columns_from_alter(self, statement: Dict) -> None:

simple_ddl_parser/parser.py

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
import os
44
import re
5-
from typing import Dict, List, Optional, Tuple, Union
5+
from typing import Dict, List, Optional, Tuple, Union, cast
66

77
from ply import lex, yacc
88

@@ -19,6 +19,8 @@
1919
IN_COM = "--"
2020
MYSQL_COM = "#"
2121

22+
LF_IN_QUOTE = r"\N"
23+
2224

2325
def set_logging_config(
2426
log_level: Union[str, int], log_file: Optional[str] = None
@@ -173,7 +175,7 @@ def process_regex_input(self, data):
173175
return data
174176

175177
def pre_process_data(self, data):
176-
data = data.decode("utf-8")
178+
data = cast(str, data.decode("utf-8"))
177179
# todo: not sure how to workaround ',' normal way
178180
if "input.regex" in data:
179181
data = self.process_regex_input(data)
@@ -182,37 +184,44 @@ def pre_process_data(self, data):
182184
result = []
183185
in_quote = False
184186
i = 0
185-
symbol_spacing_map = {
186-
",": " , ",
187-
"(": " ( ",
188-
")": " ) ",
189-
}
187+
symbol_spacing_map = {",", "(", ")"}
188+
189+
# Special handling for odd number of single quotes
190+
if data.count("'") % 2 != 0:
191+
data = data.replace("\\'", "pars_m_single")
192+
190193
while i < len(data):
191194
char = data[i]
195+
startswith = data[i:].startswith
192196

193197
# Handle quote start/end
194-
if char == "'" and (i == 0 or data[i - 1] != "\\"):
198+
if char == "'":
195199
in_quote = not in_quote
196200
result.append(char)
201+
202+
# Handle line feeds in quotes
203+
elif in_quote and startswith("\\n"):
204+
result.append(LF_IN_QUOTE)
205+
i += 1
206+
207+
# Handle special unicode quotes
208+
elif not in_quote and (startswith(r"\u2018") or startswith(r"\u2019")):
209+
result.append("'")
210+
i += 5
211+
197212
# Handle symbols that need spacing
198213
elif not in_quote and char in symbol_spacing_map:
199-
result.append(symbol_spacing_map[char])
214+
result.append(f" {char} ")
215+
200216
# Keep all other characters as-is
201217
else:
202218
result.append(char)
203219

204220
i += 1
205221

206222
data = "".join(result)
207-
208-
if data.count("'") % 2 != 0:
209-
data = data.replace("\\'", "pars_m_single")
210223
data = (
211224
data.replace("\\x", "\\0")
212-
.replace("‘", "'")
213-
.replace("’", "'")
214-
.replace("\\u2018", "'")
215-
.replace("\\u2019", "'")
216225
.replace("'\\t'", "'pars_m_t'")
217226
.replace("\\t", " ")
218227
)
@@ -296,7 +305,13 @@ def process_line(
296305
) -> Tuple[Optional[str], bool]:
297306
self.pre_process_line()
298307

299-
self.line = self.line.strip().replace("\n", "").replace("\t", "")
308+
# Remove whitespace, while preserving newlines in quotes
309+
self.line = (
310+
self.line.strip()
311+
.replace("\n", "")
312+
.replace("\t", "")
313+
.replace(LF_IN_QUOTE, "\\n")
314+
)
300315
self.skip = self.check_line_on_skip_words()
301316

302317
self.parse_set_statement()

0 commit comments

Comments
 (0)