Skip to content

Commit 3c1d633

Browse files
committed
PostgreSQL & Snowflake Improvements
1 parent 6b8d0c4 commit 3c1d633

File tree

11 files changed

+31799
-30638
lines changed

11 files changed

+31799
-30638
lines changed

CHANGELOG.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
**v1.3.0**
2+
3+
### Fixes
4+
PostgreSQL:
5+
1. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
6+
BigQuery:
7+
1. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
8+
range placed in own keyword - 'range' (can be array or str).
9+
Also for all ``*_TRUNC PARTITIONS` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'
10+
11+
### Improvements
12+
PostgreSQL:
13+
1. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250
14+
15+
BigQuery:
16+
1. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183
17+
118
**v1.2.1**
219
### Fixes
320
MySQL:

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,23 @@ for help with debugging & testing support for BigQuery dialect DDLs:
486486

487487

488488
## Changelog
489+
**v1.3.0**
490+
491+
### Fixes
492+
PostgreSQL:
493+
1. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
494+
BigQuery:
495+
1. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
496+
range placed in own keyword - 'range' (can be array or str).
497+
Also for all ``*_TRUNC PARTITIONS` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'
498+
499+
### Improvements
500+
PostgreSQL:
501+
1. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250
502+
503+
BigQuery:
504+
1. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183
505+
489506
**v1.2.1**
490507
### Fixes
491508
MySQL:

docs/README.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,33 @@ for help with debugging & testing support for BigQuery dialect DDLs:
549549
Changelog
550550
---------
551551

552+
**v1.3.0**
553+
554+
Fixes
555+
^^^^^
556+
557+
PostgreSQL:
558+
559+
560+
#. Timezone was moved out from type definition to keyword 'with_time_zone' it can be True (if with time zone) or False (if without)
561+
BigQuery:
562+
#. Previously Range in RANGE_BUCKETS was parsed as a columns, now this behaviour is changed and
563+
range placed in own keyword - 'range' (can be array or str).
564+
Also for all ```*_TRUNC PARTITIONS`` like DATETIME_TRUNC, TIMESTAMP_TRUNC, etc, second argument moved to arg 'trunc_by'
565+
566+
Improvements
567+
^^^^^^^^^^^^
568+
569+
PostgreSQL:
570+
571+
572+
#. Added support for PostgreSQL with / without time zone - https://github.com/xnuinside/simple-ddl-parser/issues/250
573+
574+
BigQuery:
575+
576+
577+
#. Added support for GENERATE_ARRAY in RANGE_BUCKETS https://github.com/xnuinside/simple-ddl-parser/issues/183
578+
552579
**v1.2.1**
553580

554581
Fixes

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "simple-ddl-parser"
3-
version = "1.2.1"
3+
version = "1.3.0"
44
description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
55
authors = ["Iuliia Volkova <xnuinside@gmail.com>"]
66
license = "MIT"

simple_ddl_parser/dialects/psql.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,13 @@ def p_expr_inherits(self, p: List) -> None:
1313
"table_name": p_list[-1]["table_name"],
1414
}
1515
p[1].update({"inherits": table_identifier})
16+
17+
def p_timezone(self, p: List) -> None:
18+
"""timezone : WITH id id
19+
| WITHOUT id id"""
20+
p_list = remove_par(list(p))
21+
if "WITH" in p_list:
22+
timezone = True
23+
else:
24+
timezone = False
25+
p[0] = {"with_time_zone": timezone}

simple_ddl_parser/dialects/sql.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,56 @@
99

1010

1111
class AfterColumns:
12+
@staticmethod
13+
def _parse_range_bucket(data: list[str]) -> Tuple[List[str], List[str]]:
14+
range = None
15+
16+
if len(data) == 3:
17+
columns = data[0]
18+
range = data[2]
19+
else:
20+
columns = []
21+
for column in data[0]:
22+
if "[" in column:
23+
range = [column.replace("[", "")]
24+
elif range:
25+
range.append(column.replace("]", ""))
26+
else:
27+
columns.append(column)
28+
return columns, range
29+
1230
def p_expression_partition_by(self, p: List) -> None:
1331
"""expr : expr PARTITION BY LP pid RP
1432
| expr PARTITION BY id LP pid RP
1533
| expr PARTITION BY pid
16-
| expr PARTITION BY id pid"""
34+
| expr PARTITION BY id pid
35+
| expr PARTITION BY id LP pid COMMA f_call RP
36+
"""
1737
p[0] = p[1]
18-
p_list = list(p)
19-
_type = None
20-
if isinstance(p[4], list):
21-
columns = p[4]
38+
p_list = remove_par(list(p))
39+
_type, range, trunc_by = None, None, None
40+
41+
if isinstance(p_list[4], list):
42+
columns = p_list[4]
43+
elif "_TRUNC" in p_list[4]:
44+
# bigquery
45+
_type = p_list[4]
46+
trunc_by = p_list[5][-1]
47+
p_list[5].pop(-1)
48+
columns = p_list[5]
49+
elif p_list[4].upper() == "RANGE_BUCKET":
50+
# bigquery RANGE_BUCKET with GENERATE_ARRAY
51+
_type = p_list[4]
52+
columns, range = self._parse_range_bucket(p_list[5:])
2253
else:
23-
columns = p_list[-2]
24-
if isinstance(p[4], str) and p[4].lower() != "(":
25-
_type = p[4]
54+
columns = p_list[-1]
55+
if not _type and isinstance(p_list[4], str):
56+
_type = p_list[4]
2657
p[0]["partition_by"] = {"columns": columns, "type": _type}
58+
if range:
59+
p[0]["partition_by"]["range"] = range
60+
if trunc_by:
61+
p[0]["partition_by"]["trunc_by"] = trunc_by
2762

2863

2964
class Database:
@@ -419,6 +454,7 @@ def p_defcolumn(self, p: List) -> None:
419454
| defcolumn as_virtual
420455
| defcolumn constraint
421456
| defcolumn generated_by
457+
| defcolumn timezone
422458
"""
423459
p[0] = p[1]
424460
p_list = list(p)

simple_ddl_parser/parsetab.py

Lines changed: 31514 additions & 30623 deletions
Large diffs are not rendered by default.

simple_ddl_parser/tokens.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
"POLICY",
5959
"MASKING",
6060
"WITH",
61+
"WITHOUT",
6162
"ORDER",
6263
"NOORDER",
6364
"VISIBLE",

tests/dialects/test_bigquery.py

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,8 @@ def test_table_name_with_project_id():
484484
}
485485
],
486486
"partition_by": {
487-
"columns": ["fiscal_half_year_reporting_week_no", "DAY"],
487+
"columns": ["fiscal_half_year_reporting_week_no"],
488+
"trunc_by": "DAY",
488489
"type": "DATETIME_TRUNC",
489490
},
490491
"partitioned_by": [],
@@ -646,7 +647,8 @@ def test_multiple_options():
646647
{"option_four": '"Four"'},
647648
],
648649
"partition_by": {
649-
"columns": ["fiscal_half_year_reporting_week_no", "DAY"],
650+
"columns": ["fiscal_half_year_reporting_week_no"],
651+
"trunc_by": "DAY",
650652
"type": "DATETIME_TRUNC",
651653
},
652654
"partitioned_by": [],
@@ -869,3 +871,126 @@ def test_bigquery_options_string():
869871
"types": [],
870872
}
871873
assert result == expected
874+
875+
876+
def test_bigquery_partition_range():
877+
ddl = """
878+
CREATE TABLE data.test(
879+
field_a INT OPTIONS(description='some description')
880+
)
881+
PARTITION BY RANGE_BUCKET(field_a, GENERATE_ARRAY(10, 1000, 1));"""
882+
883+
result = DDLParser(ddl).run(output_mode="bigquery")
884+
expected = [
885+
{
886+
"alter": {},
887+
"checks": [],
888+
"columns": [
889+
{
890+
"check": None,
891+
"default": None,
892+
"name": "field_a",
893+
"nullable": True,
894+
"options": [{"description": "'some description'"}],
895+
"references": None,
896+
"size": None,
897+
"type": "INT",
898+
"unique": False,
899+
}
900+
],
901+
"index": [],
902+
"partition_by": {
903+
"columns": ["field_a"],
904+
"range": "GENERATE_ARRAY(10,1000,1)",
905+
"type": "RANGE_BUCKET",
906+
},
907+
"partitioned_by": [],
908+
"primary_key": [],
909+
"dataset": "data",
910+
"table_name": "test",
911+
"tablespace": None,
912+
}
913+
]
914+
915+
assert result == expected
916+
917+
918+
def test_array_range():
919+
ddl = """CREATE TABLE data.test(
920+
field_a INT OPTIONS(description='some description')
921+
)
922+
PARTITION BY RANGE_BUCKET(field_a, [1,2,3]]) ;"""
923+
924+
result = DDLParser(ddl).run(output_mode="bigquery")
925+
expected = [
926+
{
927+
"alter": {},
928+
"checks": [],
929+
"columns": [
930+
{
931+
"check": None,
932+
"default": None,
933+
"name": "field_a",
934+
"nullable": True,
935+
"options": [{"description": "'some description'"}],
936+
"references": None,
937+
"size": None,
938+
"type": "INT",
939+
"unique": False,
940+
}
941+
],
942+
"dataset": "data",
943+
"index": [],
944+
"partition_by": {
945+
"columns": ["field_a"],
946+
"range": ["1", "2", "3"],
947+
"type": "RANGE_BUCKET",
948+
},
949+
"partitioned_by": [],
950+
"primary_key": [],
951+
"table_name": "test",
952+
"tablespace": None,
953+
}
954+
]
955+
assert expected == result
956+
957+
958+
def test_date_trunc():
959+
ddl = """CREATE TABLE data.test(
960+
field_a INT OPTIONS(description='some description')
961+
)
962+
PARTITION BY DATE_TRUNC(field, MONTH);"""
963+
964+
result = DDLParser(ddl).run(output_mode="bigquery")
965+
expected = [
966+
{
967+
"alter": {},
968+
"checks": [],
969+
"columns": [
970+
{
971+
"check": None,
972+
"default": None,
973+
"name": "field_a",
974+
"nullable": True,
975+
"options": [{"description": "'some description'"}],
976+
"references": None,
977+
"size": None,
978+
"type": "INT",
979+
"unique": False,
980+
}
981+
],
982+
"dataset": "data",
983+
"index": [],
984+
"partition_by": {
985+
"columns": ["field"],
986+
"trunc_by": "MONTH",
987+
"type": "DATE_TRUNC",
988+
},
989+
"partitioned_by": [],
990+
"primary_key": [],
991+
"table_name": "test",
992+
"tablespace": None,
993+
}
994+
]
995+
996+
assert result == expected

tests/dialects/test_psql.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,36 @@ def test_cast_generated():
8686
}
8787
]
8888
assert expected == result
89+
90+
91+
def test_with_time_zone():
92+
expected = [
93+
{
94+
"alter": {},
95+
"checks": [],
96+
"columns": [
97+
{
98+
"check": None,
99+
"default": None,
100+
"name": "date_updated",
101+
"nullable": True,
102+
"references": None,
103+
"size": None,
104+
"type": "timestamp",
105+
"unique": False,
106+
"with_time_zone": True,
107+
}
108+
],
109+
"index": [],
110+
"partitioned_by": [],
111+
"primary_key": [],
112+
"schema": "public",
113+
"table_name": "test",
114+
"tablespace": None,
115+
}
116+
]
117+
ddl = """
118+
CREATE TABLE public.test (date_updated timestamp with time zone);"""
119+
120+
result = DDLParser(ddl).run(output_mode="postgres")
121+
assert expected == result

0 commit comments

Comments
 (0)