Skip to content

Commit f6ad6ec

Browse files
authored
Merge pull request #233 from dmaresma/feature/fix_snowflake
Feature/fix snowflake
2 parents f5fe0f6 + 63fd597 commit f6ad6ec

File tree

6 files changed

+525
-351
lines changed

6 files changed

+525
-351
lines changed

CHANGELOG.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
**v1.0.3**
2+
### Improvements
3+
1. Fixed bug with `CREATE OR REPLACE SCHEMA`.
4+
5+
### Snowflake
6+
1. Fixed bug with snowflake (stage_)fileformat option value equal a single string as `FIELD_OPTIONALLY_ENCLOSED_BY = '\"'`, `FIELD_DELIMITER = '|'`
7+
2. improve snowflake fileformat key equals value into dict. type.
8+
19
**v1.0.2**
210
### Improvements
311
1. Fixed bug with places first table property value in 'authorization' key. Now it is used real property name.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "simple-ddl-parser"
3-
version = "1.0.2"
3+
version = "1.0.3"
44
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
55
authors = ["Iuliia Volkova <[email protected]>"]
66
license = "MIT"

simple_ddl_parser/dialects/snowflake.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from typing import List
22

33
from simple_ddl_parser.utils import remove_par
4+
import re
45

56

67
class Snowflake:
8+
79
def p_clone(self, p: List) -> None:
810
"""clone : CLONE id"""
911
p_list = list(p)
@@ -17,20 +19,26 @@ def p_expression_cluster_by(self, p: List) -> None:
1719
p_list = remove_par(list(p))
1820
p[0]["cluster_by"] = p_list[-1]
1921

20-
def p_multiple_format_equals(self0, p: List) -> None:
21-
"""multiple_format_equals : fmt_equals
22-
| multiple_format_equals fmt_equals
22+
def p_multi_id_or_string(self, p: List) -> None:
23+
"""multi_id_or_string : id_or_string
24+
| multi_id_or_string id_or_string
25+
| f_call
26+
| multi_id_or_string f_call
2327
"""
24-
# Handles multiple format in the same format statement
25-
p[0] = p[1]
28+
p_list = list(p)
29+
if isinstance(p[1], list):
30+
p[0] = p[1]
31+
p[0].append(p_list[-1])
32+
else:
33+
value = " ".join(p_list[1:])
34+
p[0] = value
2635

2736
def p_fmt_equals(self, p: List) -> None:
28-
"""fmt_equals : id LP RP
29-
| id LP fmt_equals RP
30-
| id LP multi_id RP
37+
"""fmt_equals : id LP multi_id_or_string RP
3138
"""
39+
fmt_split = re.compile(r"\w+\s*=\s*\w+|\w+\s*=\s*'.'|\w+\s*=\s*'..'|\w+\s*=\s*\('.+'\)|\w+\s*=\(\)")
3240
p_list = list(p)
33-
p[0] = p_list[2:][1].split(" ")
41+
p[0] = {f.split('=')[0].strip(): f.split('=')[1].strip() for f in fmt_split.findall(p_list[3]) if '=' in f}
3442

3543
def p_table_property_equals(self, p: List) -> None:
3644
"""table_property_equals : id id id_or_string
@@ -164,13 +172,13 @@ def p_expression_catalog(self, p: List) -> None:
164172
p[0]["catalog"] = p_list[-1]
165173

166174
def p_expression_file_format(self, p: List) -> None:
167-
"""expr : expr FILE_FORMAT multiple_format_equals"""
175+
"""expr : expr FILE_FORMAT fmt_equals"""
168176
p[0] = p[1]
169177
p_list = remove_par(list(p))
170178
p[0]["file_format"] = p_list[-1]
171179

172180
def p_expression_stage_file_format(self, p: List) -> None:
173-
"""expr : expr STAGE_FILE_FORMAT multiple_format_equals"""
181+
"""expr : expr STAGE_FILE_FORMAT fmt_equals"""
174182
p[0] = p[1]
175183
p_list = remove_par(list(p))
176184
p[0]["stage_file_format"] = p_list[-1] if len(p_list[-1]) > 1 else p_list[-1][0]

simple_ddl_parser/dialects/sql.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,8 @@ def set_auth_property_in_schema(self, p: List, p_list: List) -> None:
516516

517517
def p_c_schema(self, p: List) -> None:
518518
"""c_schema : CREATE SCHEMA
519-
| CREATE ID SCHEMA"""
519+
| CREATE ID SCHEMA
520+
| CREATE OR REPLACE SCHEMA"""
520521
if len(p) == 4:
521522
p[0] = {"remote": True}
522523

@@ -539,6 +540,7 @@ def p_create_schema(self, p: List) -> None:
539540
del p_list[-1]
540541

541542
self.add_if_not_exists(p[0], p_list)
543+
542544
if isinstance(p_list[1], dict):
543545
p[0] = p_list[1]
544546
self.set_properties_for_schema_and_database(p, p_list)

simple_ddl_parser/parsetab.py

Lines changed: 332 additions & 331 deletions
Large diffs are not rendered by default.

tests/dialects/test_snowflake.py

Lines changed: 162 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -967,15 +967,170 @@ def test_virtual_column_table():
967967
"location": "@ADL_Azure_Storage_Account_Container_Name/",
968968
"table_properties": {
969969
"auto_refresh": False,
970-
"file_format": [
971-
"TYPE","=","JSON",
972-
"NULL_IF","=('field')",
973-
"DATE_FORMAT","=","AUTO",
974-
"TRIM_SPACE","=","TRUE",
975-
],
976-
"stage_file_format": ["TYPE","=","JSON", "NULL_IF","=()"],
970+
"file_format": {'TYPE' : 'JSON', 'NULL_IF' : "('field')", 'DATE_FORMAT' : 'AUTO', 'TRIM_SPACE' : 'TRUE'},
971+
"stage_file_format": {'TYPE' : 'JSON', 'NULL_IF' :'()'},
977972
},
978973
}
979974
]
980975

981976
assert result_ext_table == expected_ext_table
977+
978+
def test_schema_create():
979+
ddl = """
980+
create schema myschema;
981+
"""
982+
result = DDLParser(ddl).run(output_mode="snowflake")
983+
expected = [{"schema_name": 'myschema'}]
984+
985+
assert expected == result
986+
987+
def test_schema_create_if_not_exists():
988+
ddl = """
989+
create schema if not exists myschema;
990+
"""
991+
result = DDLParser(ddl).run(output_mode="snowflake")
992+
expected = [{"schema_name": 'myschema', 'if_not_exists' : True}]
993+
994+
assert expected == result
995+
996+
def test_schema_create_or_replace():
997+
#https://docs.snowflake.com/en/sql-reference/sql/create-schema
998+
ddl = """
999+
create or replace schema myschema;
1000+
"""
1001+
result = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
1002+
expected = [{"schema_name": 'myschema'}]
1003+
1004+
assert result == expected
1005+
1006+
def test_external_table_with_nullif():
1007+
ddl = """create or replace external table if not exists ${database_name}.MySchemaName.MyTableName(
1008+
"Filename" VARCHAR(16777216) AS (METADATA$FILENAME))
1009+
partition by ("Filename")
1010+
location = @ADL_DH_DL_PTS/
1011+
auto_refresh = false
1012+
file_format = (TYPE=JSON NULLIF=())
1013+
;"""
1014+
1015+
result = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
1016+
expected = [{'table_name': 'MyTableName',
1017+
'schema': 'MySchemaName',
1018+
'primary_key': [],
1019+
'columns': [{
1020+
'name': 'Filename',
1021+
'type': 'VARCHAR',
1022+
'size': 16777216,
1023+
'references': None,
1024+
'unique': False,
1025+
'nullable': True,
1026+
'default': None,
1027+
'check': None,
1028+
'generated': {'as' : 'METADATA$FILENAME'}
1029+
}],
1030+
'alter': {},
1031+
'checks': [],
1032+
'index': [],
1033+
'partitioned_by': [],
1034+
'partition_by': {'columns': ['Filename'], 'type': None},
1035+
'tablespace': None,
1036+
'if_not_exists': True,
1037+
'table_properties': {'project': '${database_name}',
1038+
'auto_refresh': False,
1039+
'file_format': {'TYPE' : 'JSON', 'NULLIF':'()'},
1040+
},
1041+
'replace': True,
1042+
'location': '@ADL_DH_DL_PTS/',
1043+
'external' : True,
1044+
'primary_key_enforced' : None,
1045+
'clone' : None
1046+
}]
1047+
1048+
assert result == expected
1049+
1050+
def test_external_table_with_field_delimiter():
1051+
ddl = """create or replace external table if not exists ${database_name}.MySchemaName.MyTableName(
1052+
"Filename" VARCHAR(16777216) AS (METADATA$FILENAME))
1053+
partition by ("Filename")
1054+
location = @ADL_DH_DL_PTS/
1055+
auto_refresh = false
1056+
file_format = (TYPE=CSV FIELD_DELIMITER='|' TRIM_SPACE=TRUE ERROR_ON_COLUMN_COUNT_MISMATCH=FALSE REPLACE_INVALID_CHARACTERS=TRUE)
1057+
;"""
1058+
1059+
result = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
1060+
expected = [{'table_name': 'MyTableName',
1061+
'schema': 'MySchemaName',
1062+
'primary_key': [],
1063+
'columns': [{
1064+
'name': 'Filename',
1065+
'type': 'VARCHAR',
1066+
'size': 16777216,
1067+
'references': None,
1068+
'unique': False,
1069+
'nullable': True,
1070+
'default': None,
1071+
'check': None,
1072+
'generated': {'as' : 'METADATA$FILENAME'}
1073+
}],
1074+
'alter': {},
1075+
'checks': [],
1076+
'index': [],
1077+
'partitioned_by': [],
1078+
'partition_by': {'columns': ['Filename'], 'type': None},
1079+
'tablespace': None,
1080+
'if_not_exists': True,
1081+
'table_properties': {'project': '${database_name}',
1082+
'auto_refresh': False,
1083+
'file_format': {'TYPE' : 'CSV',
1084+
'FIELD_DELIMITER' : "'|'",
1085+
'TRIM_SPACE' : 'TRUE',
1086+
'ERROR_ON_COLUMN_COUNT_MISMATCH' : 'FALSE',
1087+
'REPLACE_INVALID_CHARACTERS' :'TRUE'}},
1088+
'replace': True,
1089+
'location': '@ADL_DH_DL_PTS/',
1090+
'external' : True,
1091+
'primary_key_enforced' : None,
1092+
'clone' : None
1093+
}]
1094+
1095+
assert result == expected
1096+
1097+
def test_table_column_def_clusterby():
1098+
ddl = """CREATE TABLE ${database_name}.MySchemaName."MyTableName" (ID NUMBER(38,0) NOT NULL, "DocProv" VARCHAR(2)) cluster by ("DocProv");"""
1099+
1100+
result = DDLParser(ddl, normalize_names=True, debug=True).run(output_mode="snowflake")
1101+
expected = [{'table_name': 'MyTableName',
1102+
'schema': 'MySchemaName',
1103+
'primary_key': [],
1104+
'columns': [{
1105+
'name': 'ID',
1106+
'size' : (38,0),
1107+
'type': 'NUMBER',
1108+
'references': None,
1109+
'unique': False,
1110+
'nullable': False,
1111+
'default': None,
1112+
'check': None,
1113+
},
1114+
{
1115+
'name': 'DocProv',
1116+
'size' : 2,
1117+
'type': 'VARCHAR',
1118+
'references': None,
1119+
'unique': False,
1120+
'nullable': True,
1121+
'default': None,
1122+
'check': None,
1123+
}],
1124+
'alter': {},
1125+
'checks': [],
1126+
'index': [],
1127+
'partitioned_by': [],
1128+
'cluster_by' : ['DocProv'],
1129+
'tablespace': None,
1130+
'external' : False,
1131+
'primary_key_enforced' : None,
1132+
'table_properties': {'project': '${database_name}'},
1133+
'clone' : None
1134+
}]
1135+
1136+
assert result == expected

0 commit comments

Comments
 (0)