bodo-ai
diff --git a/‎pydough/errors/error_utils.py‎
Lines changed: 109 additions & 11 deletions b/‎pydough/errors/error_utils.py‎
Lines changed: 109 additions & 11 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 22 additions & 15 deletions b/‎tests/conftest.py‎
Lines changed: 22 additions & 15 deletions
@@ -37,6 +37,7 @@
 import keyword
 import re
 from abc import ABC, abstractmethod
+from enum import Enum, auto
 
 import numpy as np
 
@@ -167,28 +168,125 @@ class ValidSQLName(PyDoughPredicate):
     as the name for a SQL table path/column name.
     """
 
-    # Regex for unquoted SQL identifiers
-    _UNQUOTED_SQL_IDENTIFIER = re.compile(
-        r"^[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$"
-    )
+    # Single-part unquoted SQL identifier (no dots here).
+    UNQUOTED_SQL_IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
+    """
+    Regex pattern for a single-part unquoted SQL identifier (without dots).
+    """
 
     def __init__(self):
         self.error_messages: dict[str, str] = {
             "identifier": "must have a SQL name that is a valid SQL identifier",
             "sql_keyword": "must have a SQL name that is not a reserved word",
         }
 
+    def _split_identifier(self, name: str) -> list[str]:
+        """
+        Split a potentially qualified SQL identifier into parts.
+
+        Behavior:
+        - Dots (.) **outside** quotes/backticks separate parts.
+        - Escaped double quotes "" are allowed inside a quoted name ("...").
+        - Escaped backticks `` are allowed inside a backtick name (`...`).
+        - Dots inside quoted/backtick names are literal characters and do not split.
+        - Returned parts include their surrounding quotes/backticks if present.
+        (This is intentional, since quoted and unquoted names will be validated differently later.)
+        - Empty parts may be returned for cases like:
+            * ".field"   → ["", "field"]
+            * "schema." → ["schema", ""]
+            * "db..tbl" → ["db", "", "tbl"]
+        (Validation will decide if empty parts are allowed.)
+
+        Notes:
+        - After closing a quoted/backtick identifier, parsing continues in the same token
+        until a dot (.) is seen or the string ends. Quotes themselves do not trigger splitting.
+        - If spaces or other invalid characters appear in a part, the validator will
+        reject that token later.
+
+        Examples:
+            >>> _split_identifier('schema.table')
+            ['schema', 'table']
+
+            >>> _split_identifier('"foo"."bar"')
+            ['"foo"', '"bar"']
+
+            >>> _split_identifier('db."table.name"')
+            ['db', '"table.name"']
+
+            >>> _split_identifier('`a``b`.`c``d`')
+            ['`a``b`', '`c``d`']
+
+            >>> _split_identifier('.field')
+            ['', 'field']
+
+            >>> _split_identifier('field.')
+            ['field', '']
+        """
+
+        class split_states(Enum):
+            START = auto()
+            UNQUOTED = auto()
+            DOUBLE_QUOTE = auto()
+            BACKTICK = auto()
+
+        parts: list[str] = []
+        start_idx: int = 0
+        state: split_states = split_states.START
+        length = len(name)
+        ii: int = 0
+
+        while ii < length:
+            ch: str = name[ii]
+            match state:
+                case split_states.START:
+                    match ch:
+                        case '"':
+                            state = split_states.DOUBLE_QUOTE
+                            ii += 1
+                        case "`":
+                            state = split_states.BACKTICK
+                            ii += 1
+                        case _:
+                            state = split_states.UNQUOTED
+                case split_states.UNQUOTED:
+                    if ch == ".":
+                        parts.append(name[start_idx:ii])
+                        start_idx = ii + 1
+                        state = split_states.START
+                    ii += 1
+                case split_states.DOUBLE_QUOTE:
+                    if ch == '"':
+                        if (ii + 1 < length) and (name[ii + 1] == '"'):
+                            ii += 1
+                        else:
+                            state = split_states.UNQUOTED
+                    ii += 1
+                case split_states.BACKTICK:
+                    if ch == "`":
+                        if (ii + 1 < length) and (name[ii + 1] == "`"):
+                            ii += 1
+                        else:
+                            state = split_states.UNQUOTED
+                    ii += 1
+        parts.append(name[start_idx:ii])
+        return parts
+
     def _error_code(self, obj: object) -> str | None:
         """Return an error code if invalid, or None if valid."""
         ret_val: str | None = None
         # Check that obj is a string
         if isinstance(obj, str):
-            # Check that obj is a valid SQL identifier
-            if not self.is_valid_sql_identifier(obj):
-                ret_val = "identifier"
-            # Check that obj is not a SQL reserved word
-            elif self._is_sql_keyword(obj):
-                ret_val = "sql_keyword"
+            # Check each part of a qualified name: db.schema.table
+            for part in self._split_identifier(obj):
+                # Check that obj is a valid SQL identifier
+                # Empty parts (e.g., leading/trailing dots) are invalid
+                if not part or not self.is_valid_sql_identifier(part):
+                    ret_val = "identifier"
+                    break
+                # Check that obj is not a SQL reserved word
+                if self._is_sql_keyword(part):
+                    ret_val = "sql_keyword"
+                    break
         else:
             ret_val = "identifier"
 
@@ -209,7 +307,7 @@ def is_valid_sql_identifier(self, name: str) -> bool:
             return False
 
         # Case 1: unquoted
-        if self._UNQUOTED_SQL_IDENTIFIER.match(name):
+        if self.UNQUOTED_SQL_IDENTIFIER.match(name):
             return True
 
         # Case 2: double quoted
 
@@ -193,6 +193,7 @@ def get_test_graph_by_name() -> graph_fetcher:
     test_graph_location: dict[str, str] = {
         "synthea": "synthea_graph.json",
         "world_development_indicators": "world_development_indicators_graph.json",
+        "keywords": "reserved_words_graph.json",
     }
 
     @cache
@@ -616,13 +617,24 @@ def sqlite_custom_datasets_connection() -> DatabaseContext:
     """
     Returns the SQLITE database connection with all the custom datasets attached.
     """
-    commands: list[str] = [
-        "cd tests/gen_data",
-        "rm -fv synthea.db",
-        "rm -fv world_development_indicators.db",
-        "sqlite3 synthea.db < init_synthea_sqlite.sql",
-        "sqlite3 world_development_indicators.db < init_world_indicators_sqlite.sql",
+    gen_data_path: str = "tests/gen_data"
+    # Dataset tuple format: (schema_name, db_file_name, init_sql_file_name)
+    SQLite_datasets: list[tuple[str, str, str]] = [
+        ("synthea", "synthea.db", "init_synthea_sqlite.sql"),
+        ("wdi", "world_development_indicators.db", "init_world_indicators_sqlite.sql"),
+        ("keywords", "reserved_words.db", "init_reserved_words_sqlite.sql"),
     ]
+
+    # List of shell commands required to re-create all the db files
+    commands: list[str] = [f"cd {gen_data_path}"]
+    # Collect all db_file_names into the rm command
+    rm_command: str = "rm -fv " + " ".join(
+        db_file for (_, db_file, _) in SQLite_datasets
+    )
+    commands.append(rm_command)
+    # Add one sqlite3 command per dataset
+    for _, db_file, init_sql in SQLite_datasets:
+        commands.append(f"sqlite3 {db_file} < {init_sql}")
     # Get the shell commands required to re-create all the db files
     shell_cmd: str = "; ".join(commands)
 
@@ -633,16 +645,11 @@ def sqlite_custom_datasets_connection() -> DatabaseContext:
     # Central in-memory connection
     connection: sqlite3.Connection = sqlite3.connect(":memory:")
 
-    # Dict: schema_name → database file path
-    dbs: dict[str, str] = {
-        "synthea": "tests/gen_data/synthea.db",
-        "wdi": "tests/gen_data/world_development_indicators.db",
-    }
-
-    # Attach them all
-    for schema, path in dbs.items():
-        path = os.path.join(base_dir, path)
+    # Use (schema_name, db_file_name info) on SQLite_datasets to ATTACH DBs
+    for schema, db_file, _ in SQLite_datasets:
+        path: str = os.path.join(base_dir, gen_data_path, db_file)
         connection.execute(f"ATTACH DATABASE '{path}' AS {schema}")
+
     return DatabaseContext(DatabaseConnection(connection), DatabaseDialect.SQLITE)