Merge pull request #246 from xnuinside/v1.1.0_release

xnuinside · web-flow · commit c13ad11ea25e · 2024-04-21T21:22:16.000+03:00
V1.1.0 release
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,3 +1,13 @@
+**v1.1.0**
+### Improvements
+MySQL:
+1. Added support for INDEX statement inside table definition
+2. Added support for MySQL INVISIBLE/VISIBLE statement - https://github.com/xnuinside/simple-ddl-parser/issues/243
+
+Snowflake:
+1. Added support for cluster by statement before columns definition - https://github.com/xnuinside/simple-ddl-parser/issues/234
+
+
 **v1.0.4**
 ### Improvements
 1. Support functions with schema prefix in `DEFAULT` and `CHECK` statements. https://github.com/xnuinside/simple-ddl-parser/issues/240
diff --git a/README.md b/README.md
@@ -486,6 +486,16 @@ for help with debugging & testing support for BigQuery dialect DDLs:
 
 
 ## Changelog
+**v1.1.0**
+### Improvements
+MySQL:
+1. Added support for INDEX statement inside table definition
+2. Added support for MySQL INVISIBLE/VISIBLE statement - https://github.com/xnuinside/simple-ddl-parser/issues/243
+
+Snowflake:
+1. Added support for cluster by statement before columns definition - https://github.com/xnuinside/simple-ddl-parser/issues/234
+
+
 **v1.0.4**
 ### Improvements
 1. Support functions with schema prefix in `DEFAULT` and `CHECK` statements. https://github.com/xnuinside/simple-ddl-parser/issues/240
diff --git a/docs/README.rst b/docs/README.rst
@@ -537,7 +537,7 @@ Big thanks for the involving & contribution with test cases with DDL samples & o
 
 
 * https://github.com/kukigai , 
-* https://github.com/Awalkman90 ,
+* https://github.com/kliushnichenko ,
 * https://github.com/geob3d
 
 for help with debugging & testing support for BigQuery dialect DDLs:
@@ -549,6 +549,22 @@ for help with debugging & testing support for BigQuery dialect DDLs:
 Changelog
 ---------
 
+**v1.1.0**
+
+Improvements
+^^^^^^^^^^^^
+
+MySQL:
+
+
+#. Added support for INDEX statement inside table definition
+#. Added support for MySQL INVISIBLE/VISIBLE statement - https://github.com/xnuinside/simple-ddl-parser/issues/243
+
+Snowflake:
+
+
+#. Added support for cluster by statement before columns definition - https://github.com/xnuinside/simple-ddl-parser/issues/234
+
 **v1.0.4**
 
 Improvements
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "simple-ddl-parser"
-version = "1.0.4"
+version = "1.1.0"
 description = "Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl."
 authors = ["Iuliia Volkova <xnuinside@gmail.com>"]
 license = "MIT"
@@ -22,6 +22,10 @@ classifiers = [
     "Topic :: Software Development :: Libraries :: Python Modules"
 ]
 
+[[tool.poetry.source]]
+name = "pypi-public"
+url = "https://pypi.org/simple/"
+
 [tool.poetry.dependencies]
 python = "^3.6"
 dataclasses = { version = "0.8", python = ">=3.6,<3.7" }
diff --git a/simple_ddl_parser/ddl_parser.py b/simple_ddl_parser/ddl_parser.py
@@ -62,9 +62,9 @@ def after_columns_tokens(self, t: LexToken) -> LexToken:
         return t
 
     def process_body_tokens(self, t: LexToken) -> LexToken:
-        if (
-            self.lexer.last_par == "RP" and not self.lexer.lp_open
-        ) or self.lexer.after_columns:
+        if (self.lexer.last_par == "RP" and not self.lexer.lp_open) or (
+            self.lexer.after_columns and not self.lexer.columns_def
+        ):
             t = self.after_columns_tokens(t)
         elif self.lexer.columns_def:
             t.type = tok.columns_definition.get(t.value.upper(), t.type)
@@ -83,7 +83,6 @@ def tokens_not_columns_names(self, t: LexToken) -> LexToken:
         t_tag = self.parse_tags_symbols(t)
         if t_tag:
             return t_tag
-
         if "ARRAY" in t.value:
             t.type = "ARRAY"
             return t
@@ -98,7 +97,8 @@ def tokens_not_columns_names(self, t: LexToken) -> LexToken:
         elif self.lexer.last_token != "COMMA":
             t.type = tok.common_statements.get(t.value.upper(), t.type)
         else:
-            t.type = tok.first_liners.get(t.value.upper(), t.type)
+            if not (self.lexer.columns_def and self.lexer.after_columns):
+                t.type = tok.first_liners.get(t.value.upper(), t.type)
 
         # get tokens from other token dicts
         t = self.process_body_tokens(t)
@@ -198,7 +198,6 @@ def t_ID(self, t: LexToken):
         self.commat_type(t)
 
         self.set_lexx_tags(t)
-
         return self.set_last_token(t)
 
     def commat_type(self, t: LexToken):
@@ -209,14 +208,16 @@ def capitalize_tokens(self, t: LexToken):
         if t.type != "ID" and t.type not in ["LT", "RT"]:
             t.value = t.value.upper()
 
-    def set_parathesis_tokens(self, t: LexToken):
+    def set_parenthesis_tokens(self, t: LexToken):
         if t.type in ["RP", "LP"]:
             if t.type == "RP" and self.lexer.lp_open:
                 self.lexer.lp_open -= 1
+                if not self.lexer.lp_open:
+                    self.lexer.after_columns = True
             self.lexer.last_par = t.type
 
     def set_lexx_tags(self, t: LexToken):
-        self.set_parathesis_tokens(t)
+        self.set_parenthesis_tokens(t)
 
         if t.type == "ALTER":
             self.lexer.is_alter = True
diff --git a/simple_ddl_parser/dialects/snowflake.py b/simple_ddl_parser/dialects/snowflake.py
@@ -1,7 +1,7 @@
+import re
 from typing import List
 
 from simple_ddl_parser.utils import remove_par
-import re
 
 
 class Snowflake:
@@ -11,12 +11,17 @@ def p_clone(self, p: List) -> None:
         p[0] = {"clone": {"from": p_list[-1]}}
 
     def p_expression_cluster_by(self, p: List) -> None:
-        """expr : expr CLUSTER BY LP pid RP
-        | expr CLUSTER BY pid
-        """
+        """expr : expr cluster_by"""
+        p_list = list(p)
         p[0] = p[1]
+        p[0].update(p_list[-1])
+
+    def p_cluster_by(self, p: List) -> None:
+        """cluster_by : CLUSTER BY LP pid RP
+        | CLUSTER BY pid
+        """
         p_list = remove_par(list(p))
-        p[0]["cluster_by"] = p_list[-1]
+        p[0] = {"cluster_by": p_list[-1]}
 
     def p_multi_id_or_string(self, p: List) -> None:
         """multi_id_or_string : id_or_string
diff --git a/simple_ddl_parser/dialects/sql.py b/simple_ddl_parser/dialects/sql.py
@@ -981,6 +981,26 @@ def p_index_table_name(self, p: List) -> None:
             table_name = p_list[-1]
         p[0].update({"schema": schema, "table_name": table_name})
 
+    def p_c_index(self, p: List) -> None:
+        """c_index : INDEX LP index_pid RP
+        | INDEX id LP index_pid RP
+        | c_index INVISIBLE
+        | c_index VISIBLE"""
+        p_list = remove_par(p_list=list(p))
+        if isinstance(p_list[1], dict):
+            p[0] = p_list[1]
+            p[0]["details"] = {p_list[-1].lower(): True}
+        else:
+            if len(p_list) == 3:
+                name = None
+            else:
+                name = p_list[2]
+            p[0] = {
+                "index_stmt": True,
+                "name": name,
+                "columns": p_list[-1]["detailed_columns"],
+            }
+
     def p_create_index(self, p: List) -> None:
         """create_index : CREATE INDEX id
         | CREATE UNIQUE INDEX id
@@ -1020,7 +1040,9 @@ def p_expression_table(self, p: List) -> None:  # noqa R701
         | table_name LP defcolumn
         | table_name
         | table_name LP RP
+        | table_name cluster_by LP defcolumn
         | expr COMMA defcolumn
+        | expr COMMA c_index
         | expr COMMA
         | expr COMMA constraint
         | expr COMMA check_ex
@@ -1041,30 +1063,33 @@ def p_expression_table(self, p: List) -> None:  # noqa R701
         """
         p[0] = p[1] or defaultdict(list)
         p_list = remove_par(list(p))
+        if len(p_list) > 2 and "cluster_by" in p_list[2]:
+            p[0].update(p_list[2])
         if p_list[-1] != "," and p_list[-1] is not None:
             if "type" in p_list[-1] and "name" in p_list[-1]:
                 if not p[0].get("columns"):
                     p[0]["columns"] = []
                 p[0]["columns"].append(p_list[-1])
             elif "index_stmt" in p_list[-1]:
+                del p_list[-1]["index_stmt"]
                 if not p[0].get("index"):
                     p[0]["index"] = []
                 index_data = p_list[-1]
-                p[0]["index"].append(
-                    {
-                        "clustered": False,
-                        "columns": [index_data["columns"]],
-                        "detailed_columns": [
-                            {
-                                "name": index_data["columns"],
-                                "nulls": "LAST",
-                                "order": "ASC",
-                            }
-                        ],
-                        "index_name": index_data["name"],
-                        "unique": False,
-                    }
-                )
+                _index = {
+                    "clustered": False,
+                    "columns": [index_data["columns"]],
+                    "detailed_columns": [
+                        {
+                            "name": index_data["columns"],
+                            "nulls": "LAST",
+                            "order": "ASC",
+                        }
+                    ],
+                    "index_name": index_data["name"],
+                    "unique": False,
+                }
+                _index.update(index_data.get("details", {}))
+                p[0]["index"].append(_index)
             elif "check" in p_list[-1]:
                 p[0] = self.extract_check_data(p, p_list)
             elif "enforced" in p_list[-1]:
diff --git a/simple_ddl_parser/parsetab.py b/simple_ddl_parser/parsetab.py
diff --git a/simple_ddl_parser/tokens.py b/simple_ddl_parser/tokens.py
@@ -60,6 +60,8 @@
     "WITH",
     "ORDER",
     "NOORDER",
+    "VISIBLE",
+    "INVISIBLE",
 }
 columns_definition = {value: value for value in columns_definition}
 columns_definition[","] = "COMMA"
@@ -70,9 +72,12 @@
     "CONSTRAINT",
     "FOREIGN",
     "PRIMARY",
+    "INDEX",
     "UNIQUE",
     "CHECK",
     "WITH",
+    "CLUSTER",
+    "BY",
 }
 first_liners = {value: value for value in first_liners}
 
diff --git a/tests/dialects/test_mssql_specific.py b/tests/dialects/test_mssql_specific.py
@@ -1876,7 +1876,7 @@ def test_constraint_primary_key():
 
     ddl = """CREATE TABLE [dbo].[users_WorkSchedule](
         [id] [int] IDENTITY(1,1) NOT NULL,
-        [user_id] [int] NULL),
+        [user_id] [int] NULL,
         CONSTRAINT [PK_users_WorkSchedule_id] PRIMARY KEY CLUSTERED
     (
         [id] ASC
@@ -1885,7 +1885,7 @@ def test_constraint_primary_key():
         CONSTRAINT [PK_users_WorkSchedule_id] PRIMARY KEY
     (
         [id] ASC
-    )
+    ))
     """
     result = DDLParser(ddl).run(group_by_type=True, output_mode="mssql")
     assert result == expected
diff --git a/tests/dialects/test_mysql.py b/tests/dialects/test_mysql.py
diff --git a/tests/dialects/test_snowflake.py b/tests/dialects/test_snowflake.py
diff --git a/tests/test_references.py b/tests/test_references.py