Skip to content

Commit 28c2751

Browse files
committed
Enhance DumpCleaner and import_to_supabase functions with improved logging, error handling, and support for target schema. Refactor run_command for real-time output streaming and password redaction.
1 parent a2fa2c4 commit 28c2751

File tree

3 files changed

+360
-108
lines changed

3 files changed

+360
-108
lines changed

cloudsql_to_supabase/clean.py

Lines changed: 230 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,258 @@
11
import re
22
import logging
33
from pathlib import Path
4-
from typing import List, Optional
5-
from . import config
4+
from typing import Optional, List # Added List for type hinting
5+
6+
# Assuming config is a module in the same directory or an accessible path
7+
from . import config # If config.py is in the same directory as this script
68

79
logger = logging.getLogger('cloudsql_to_supabase.clean')
810

911
class DumpCleaner:
10-
def __init__(self, input_file: Path = None, output_file: Path = None, target_schema: str = None) -> None:
11-
self.input_file = input_file or Path(config.OUTPUT_DUMP)
12-
self.output_file = output_file or Path(config.CLEANED_DUMP)
12+
def __init__(
13+
self,
14+
input_file: Optional[Path] = None,
15+
output_file: Optional[Path] = None,
16+
target_schema: Optional[str] = None,
17+
) -> None:
18+
self.input_file = Path(input_file or config.OUTPUT_DUMP)
19+
self.output_file = Path(output_file or config.CLEANED_DUMP)
1320
self.target_schema = target_schema or config.SUPABASE_SCHEMA
14-
21+
22+
logger.info(f"Initializing DumpCleaner. Input: '{self.input_file}', Output: '{self.output_file}', Target Schema: '{self.target_schema}'")
23+
24+
# List of problematic roles (typically source-specific admin/system roles)
25+
# whose usage should be entirely removed if they are not meant to exist in the target.
26+
# 'postgres' is usually the main superuser in Supabase, so it's generally NOT added here.
27+
self.problematic_roles_to_filter: List[str] = ["cloudsqlsuperuser", "cloudsqladmin"]
28+
# Add other cloud-specific roles like 'rdsadmin', etc., if applicable.
29+
30+
self.problematic_role_match_pattern: Optional[str] = None
31+
if self.problematic_roles_to_filter:
32+
role_patterns = []
33+
for role in self.problematic_roles_to_filter:
34+
escaped_role = re.escape(role)
35+
# Matches "role_name" OR role_name (as a whole word)
36+
role_patterns.append(f'"{escaped_role}"')
37+
role_patterns.append(rf'{escaped_role}\b')
38+
self.problematic_role_match_pattern = rf"(?:{'|'.join(role_patterns)})"
39+
logger.debug(f"Problematic role regex part: {self.problematic_role_match_pattern}")
40+
41+
42+
# Patterns to skip lines entirely
1543
self.skip_patterns = [
16-
r'^(CREATE|ALTER) ROLE', # Skip role creation/alteration
17-
r'^COMMENT ON EXTENSION', # Skip extension comments
44+
re.compile(r'^\s*(CREATE|ALTER)\s+ROLE\b', re.IGNORECASE), # Skip any role creation/alteration
45+
re.compile(r'^\s*COMMENT ON EXTENSION\s+(?:pg_stat_statements|plpgsql)\s*;', re.IGNORECASE), # More specific
46+
re.compile(r'^\s*COMMENT ON EXTENSION\b', re.IGNORECASE), # General extension comments
47+
re.compile(r'^\s*SET\s+(?:transaction_timeout|idle_in_transaction_session_timeout|lock_timeout|statement_timeout)\s*=\s*.*?;', re.IGNORECASE),
48+
re.compile(r'^\s*SET\s+default_transaction_read_only\s*=\s*on;', re.IGNORECASE), # Often in Cloud SQL read replicas
49+
re.compile(r'^\s*GRANT\s+pg_signal_backend\s+TO\s+cloudsqlsuperuser;', re.IGNORECASE), # Specific Cloud SQL grant
1850
]
19-
20-
# Regular ownership replacement
21-
owner_replacement = 'OWNER TO public;'
22-
# If using non-public schema and we have permissions, keep the target schema as owner
23-
if self.target_schema != "public":
24-
owner_replacement = f'OWNER TO {self.target_schema};'
25-
51+
52+
if self.problematic_role_match_pattern:
53+
self.skip_patterns.extend([
54+
# Skip SET ROLE or SET SESSION AUTHORIZATION to problematic roles
55+
re.compile(r'^\s*SET\s+(?:ROLE|SESSION\s+AUTHORIZATION)\s+' + self.problematic_role_match_pattern + r'\s*;', re.IGNORECASE),
56+
57+
# Skip any GRANT or REVOKE statement that mentions a problematic role.
58+
# This is broad but effective for preventing errors if these roles don't exist.
59+
# It catches grants TO the role, grants OF the role, and revokes involving the role.
60+
re.compile(r'^\s*(?:GRANT|REVOKE).*' + self.problematic_role_match_pattern + r'.*?;', re.IGNORECASE),
61+
62+
# Skip ALTER DEFAULT PRIVILEGES for problematic roles
63+
re.compile(r'^\s*ALTER DEFAULT PRIVILEGES\s+FOR ROLE\s+' + self.problematic_role_match_pattern + r'\s+.*?;', re.IGNORECASE),
64+
])
65+
66+
# Replacement rules: list of tuples (compiled_pattern, replacement_str)
67+
if self.target_schema == "public":
68+
owner_replacement_str = 'OWNER TO public;'
69+
else:
70+
# Quote the schema name if it's not 'public' to preserve case and handle special characters.
71+
owner_replacement_str = f'OWNER TO "{self.target_schema}";'
72+
logger.debug(f"Owner replacement string: {owner_replacement_str}")
73+
2674
self.replacement_rules = [
27-
(r'OWNER TO .*?;', owner_replacement),
28-
(r'CREATE SCHEMA .*?;', '-- Schema creation removed'), # Comment out schema creation
75+
# Matches OWNER TO any_user; or OWNER TO "any_user";
76+
(re.compile(r'OWNER TO (?:"[^"]+"|[^\s;]+);', re.IGNORECASE), owner_replacement_str),
77+
(re.compile(r'^\s*CREATE SCHEMA\s+.*?;', re.IGNORECASE), '-- Schema creation removed (Supabase extension schema used instead)'),
78+
(re.compile(r'^\s*ALTER SCHEMA\s+public\s+OWNER TO .*?;', re.IGNORECASE), f'-- ALTER SCHEMA public OWNER removed, will be owned by supabase admin/postgres'),
79+
# Remove or comment out setting search_path to only public if we are using a target schema
80+
(re.compile(r"^\s*SELECT pg_catalog\.set_config\('search_path', '', false\);\s*$", re.IGNORECASE), "-- SELECT pg_catalog.set_config('search_path', '', false); (emptying search_path removed)"),
2981
]
30-
31-
# If we're targeting a non-public schema, add schema modifications
82+
83+
# Schema-specific replacements if target_schema is not 'public'
3284
if self.target_schema != "public":
33-
# Add rules to update schema references in the dump
34-
self.replacement_rules.extend([
35-
# Update SET search_path statements
36-
(r'SET search_path = public', f'SET search_path = {self.target_schema}'),
37-
# Update schema references in table names
38-
(r'CREATE TABLE public\.', f'CREATE TABLE {self.target_schema}.'),
39-
# Update schema references in sequence names
40-
(r'CREATE SEQUENCE public\.', f'CREATE SEQUENCE {self.target_schema}.'),
41-
# Update schema references in views
42-
(r'CREATE VIEW public\.', f'CREATE VIEW {self.target_schema}.'),
43-
# Update schema references in functions
44-
(r'CREATE FUNCTION public\.', f'CREATE FUNCTION {self.target_schema}.'),
45-
])
46-
85+
schema_replacements = [
86+
# Change SET search_path = public, ''; (or similar) to include the target schema first.
87+
# This regex handles various forms of search_path settings.
88+
(r"SET search_path = (?:public(?:,\s*)?)([^;]*);", rf"SET search_path = \"{self.target_schema}\", public\1;"),
89+
(r"CREATE TABLE public\.([\w_]+)", rf'CREATE TABLE "{self.target_schema}".\1'),
90+
(r"ALTER TABLE ONLY public\.([\w_]+)", rf'ALTER TABLE ONLY "{self.target_schema}".\1'),
91+
(r"CREATE SEQUENCE public\.([\w_]+)", rf'CREATE SEQUENCE "{self.target_schema}".\1'),
92+
(r"ALTER SEQUENCE public\.([\w_]+)", rf'ALTER SEQUENCE "{self.target_schema}".\1'),
93+
(r"CREATE VIEW public\.([\w_]+)", rf'CREATE VIEW "{self.target_schema}".\1'),
94+
(r"CREATE FUNCTION public\.([\w_]+)", rf'CREATE FUNCTION "{self.target_schema}".\1'),
95+
(r"CREATE TRIGGER ", "CREATE TRIGGER "), # No change, but ensure it's before more specific public. replacements if any
96+
(r"REFERENCES public\.([\w_]+)", rf'REFERENCES "{self.target_schema}".\1'),
97+
# For foreign keys that might reference public schema (e.g. from extensions like auth, storage in public)
98+
# we generally want them to stay as public. This needs careful consideration.
99+
# The above rule is quite broad. If you have FKs to your own tables previously in public, they should be remapped.
100+
# If FKs are to Supabase's own public tables (auth.users etc.), they should NOT be remapped.
101+
# This often requires more specific rules or post-import adjustments.
102+
# For now, we are broadly remapping 'public.' to the target schema.
103+
]
104+
for pattern_str, repl_str in schema_replacements:
105+
self.replacement_rules.append((re.compile(pattern_str, re.IGNORECASE), repl_str))
106+
else: # If target_schema is 'public'
107+
self.replacement_rules.append(
108+
(re.compile(r"SET search_path = .*?;", re.IGNORECASE), r"SET search_path = public, pg_catalog;") # Ensure a sane default
109+
)
110+
111+
47112
def clean_dump_file(self) -> Path:
48-
"""
49-
Clean the SQL dump file for Supabase import by removing/modifying
50-
incompatible statements.
51-
52-
Returns:
53-
Path to the cleaned dump file
54-
"""
55-
logger.info(f"Cleaning dump file: {self.input_file}")
56-
113+
logger.info(f"Starting cleaning of dump file: {self.input_file}")
114+
57115
if not self.input_file.exists():
116+
logger.error(f"Input file not found: {self.input_file}")
58117
raise FileNotFoundError(f"Input file not found: {self.input_file}")
59-
60-
# Count of modifications for logging
61-
skipped_lines = 0
62-
modified_lines = 0
63-
64-
with open(self.input_file, 'r') as infile, open(self.output_file, 'w') as outfile:
118+
119+
skipped_lines, modified_lines, lines_processed = 0, 0, 0
120+
121+
# Create output directory if it doesn't exist
122+
self.output_file.parent.mkdir(parents=True, exist_ok=True)
123+
124+
with self.input_file.open('r', encoding='utf-8') as infile, \
125+
self.output_file.open('w', encoding='utf-8') as outfile:
65126
for line_num, line in enumerate(infile, 1):
66-
# Check if line should be skipped
67-
if any(re.search(pattern, line) for pattern in self.skip_patterns):
68-
skipped_lines += 1
69-
continue
70-
71-
# Apply replacements
127+
lines_processed += 1
72128
original_line = line
129+
130+
# Check skip patterns first
131+
should_skip = False
132+
for pattern in self.skip_patterns:
133+
if pattern.search(line):
134+
# logger.debug(f"Line {line_num} skipped by pattern: {pattern.pattern}\n\t{line.strip()}")
135+
skipped_lines += 1
136+
should_skip = True
137+
break
138+
if should_skip:
139+
outfile.write(f"-- SKIPPED LINE (by pattern): {line.strip()}\n") # Write as comment for audit
140+
continue
141+
142+
# Apply replacement rules
143+
modified_in_line = 0
73144
for pattern, replacement in self.replacement_rules:
74-
if re.search(pattern, line):
75-
line = re.sub(pattern, replacement, line)
76-
if line != original_line:
77-
modified_lines += 1
145+
line, count = pattern.subn(replacement, line)
146+
if count > 0:
147+
modified_lines += count
148+
modified_in_line += count
78149

150+
# if modified_in_line > 0:
151+
# logger.debug(f"Line {line_num} modified from: {original_line.strip()} \n\tTO: {line.strip()}")
152+
79153
outfile.write(line)
80-
81-
logger.info(f"Cleaning completed: {skipped_lines} lines skipped, {modified_lines} lines modified")
154+
155+
logger.info(f"Cleaning completed: {lines_processed} lines processed, {skipped_lines} lines skipped, {modified_lines} total modifications applied.")
82156
logger.info(f"Cleaned dump saved as {self.output_file}")
83157
return self.output_file
84158

85-
def clean_dump_file(input_file: Optional[Path] = None, output_file: Optional[Path] = None, target_schema: Optional[str] = None) -> Path:
159+
def clean_dump_file(
160+
input_file: Optional[Path] = None,
161+
output_file: Optional[Path] = None,
162+
target_schema: Optional[str] = None,
163+
) -> Path:
86164
"""
87-
Convenience function to clean a SQL dump file
88-
165+
Cleans a PostgreSQL dump file to make it suitable for import into Supabase.
166+
89167
Args:
90-
input_file: Path to the input SQL dump file
91-
output_file: Path to the output cleaned SQL file
92-
target_schema: Schema to use in Supabase. If None, uses the one from config
168+
input_file: Path to the input SQL dump file.
169+
output_file: Path where the cleaned SQL dump file will be saved.
170+
target_schema: The target schema in Supabase (e.g., "public" or a custom extension schema).
171+
172+
Returns:
173+
Path to the cleaned output file.
93174
"""
94175
cleaner = DumpCleaner(input_file, output_file, target_schema)
95-
return cleaner.clean_dump_file()
176+
return cleaner.clean_dump_file()
177+
178+
# Example usage (for testing purposes)
179+
if __name__ == '__main__':
180+
# Create a dummy config.py for the test
181+
with open("config.py", "w") as f:
182+
f.write("OUTPUT_DUMP = 'dummy_input.sql'\n")
183+
f.write("CLEANED_DUMP = 'dummy_cleaned_output.sql'\n")
184+
f.write("SUPABASE_SCHEMA = 'extensions'\n") # or 'public'
185+
186+
# Create a dummy input SQL file
187+
dummy_sql_content = """
188+
-- PostgreSQL database dump
189+
190+
SET statement_timeout = 0;
191+
SET lock_timeout = 0;
192+
SET idle_in_transaction_session_timeout = 0;
193+
SET client_encoding = 'UTF8';
194+
SET standard_conforming_strings = on;
195+
SELECT pg_catalog.set_config('search_path', '', false);
196+
SET check_function_bodies = false;
197+
SET xmloption = content;
198+
SET client_min_messages = warning;
199+
SET row_security = off;
200+
201+
COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
202+
CREATE ROLE cloudsqlsuperuser;
203+
ALTER ROLE cloudsqlsuperuser WITH NOSUPERUSER INHERIT NOCREATEROLE NOCREATEDB LOGIN NOREPLICATION NOBYPASSRLS;
204+
GRANT pg_signal_backend TO cloudsqlsuperuser;
205+
SET SESSION AUTHORIZATION cloudsqlsuperuser;
206+
SET ROLE cloudsqlsuperuser;
207+
208+
CREATE SCHEMA extensions;
209+
ALTER SCHEMA extensions OWNER TO cloudsqlsuperuser;
210+
211+
CREATE SCHEMA public; -- This should be commented out
212+
ALTER SCHEMA public OWNER TO cloudsqlsuperuser; -- This should be changed or commented out
213+
214+
SET search_path = public, extensions;
215+
216+
CREATE TABLE public.my_table (id integer);
217+
ALTER TABLE public.my_table OWNER TO cloudsqlsuperuser;
218+
219+
CREATE TABLE extensions.another_table (name text);
220+
ALTER TABLE extensions.another_table OWNER TO postgres; -- Assuming postgres is the target
221+
222+
GRANT SELECT ON public.my_table TO cloudsqlsuperuser;
223+
GRANT INSERT ON public.my_table TO "cloudsqlsuperuser";
224+
GRANT cloudsqlsuperuser TO some_other_user;
225+
REVOKE cloudsqlsuperuser FROM some_other_user;
226+
ALTER DEFAULT PRIVILEGES FOR ROLE cloudsqlsuperuser IN SCHEMA public GRANT SELECT ON TABLES TO public;
227+
ALTER DEFAULT PRIVILEGES FOR ROLE "cloudsqlsuperuser" GRANT SELECT ON SEQUENCES TO public;
228+
229+
-- End of dummy dump
230+
"""
231+
with open("dummy_input.sql", "w", encoding='utf-8') as f:
232+
f.write(dummy_sql_content)
233+
234+
# Configure basic logging for the test
235+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
236+
237+
logger.info("Running dummy cleaning process...")
238+
# Test with target_schema = 'extensions'
239+
# cleaner_ext = DumpCleaner(input_file=Path("dummy_input.sql"), output_file=Path("dummy_cleaned_ext.sql"), target_schema="extensions")
240+
# cleaned_path_ext = cleaner_ext.clean_dump_file()
241+
# print(f"Cleaned file for 'extensions' schema at: {cleaned_path_ext}")
242+
# with open(cleaned_path_ext, 'r') as f_ext:
243+
# print("\n--- Cleaned for 'extensions' schema ---")
244+
# print(f_ext.read())
245+
246+
# Test with target_schema = 'public' (Supabase default for user tables)
247+
cleaner_public = DumpCleaner(input_file=Path("dummy_input.sql"), output_file=Path("dummy_cleaned_public.sql"), target_schema="public")
248+
cleaned_path_public = cleaner_public.clean_dump_file()
249+
print(f"Cleaned file for 'public' schema at: {cleaned_path_public}")
250+
with open(cleaned_path_public, 'r') as f_public:
251+
print("\n--- Cleaned for 'public' schema ---")
252+
print(f_public.read())
253+
254+
# Clean up dummy files
255+
# Path("config.py").unlink(missing_ok=True)
256+
# Path("dummy_input.sql").unlink(missing_ok=True)
257+
# Path("dummy_cleaned_ext.sql").unlink(missing_ok=True)
258+
# Path("dummy_cleaned_public.sql").unlink(missing_ok=True)

cloudsql_to_supabase/import_.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
def import_to_supabase(input_file: Optional[Path] = None, password: Optional[str] = None, schema: Optional[str] = None) -> None:
1010
"""
1111
Import a cleaned SQL dump file into Supabase
12-
12+
1313
Args:
1414
input_file: Path to the SQL dump file to import. If None, uses the default.
1515
password: Supabase database password. If None, uses from config.
@@ -18,16 +18,16 @@ def import_to_supabase(input_file: Optional[Path] = None, password: Optional[str
1818
config.validate_config()
1919
dump_file = input_file or Path(config.CLEANED_DUMP)
2020
target_schema = schema or config.SUPABASE_SCHEMA
21-
21+
2222
if not dump_file.exists():
2323
raise FileNotFoundError(f"Dump file not found: {dump_file}")
24-
25-
logger.info(f"Importing into Supabase database: {config.SUPABASE_DB}, schema: {target_schema}")
26-
24+
25+
logger.info(f"Importing file {dump_file} into Supabase database: {config.SUPABASE_DB}, schema: {target_schema}")
26+
2727
# Set up environment with password
2828
env = os.environ.copy()
2929
env['PGPASSWORD'] = password or config.SUPABASE_PASSWORD
30-
30+
3131
# Create schema if it doesn't exist (and it's not 'public')
3232
if target_schema != "public":
3333
create_schema_cmd = (
@@ -42,25 +42,27 @@ def import_to_supabase(input_file: Optional[Path] = None, password: Optional[str
4242
utils.run_command(create_schema_cmd, env)
4343
except Exception as e:
4444
logger.warning(f"Failed to create schema, it may already exist: {e}")
45-
45+
4646
# Build psql command
4747
cmd = (
4848
f"psql -h {config.SUPABASE_HOST} "
4949
f"-p {config.SUPABASE_PORT} "
5050
f"-U {config.SUPABASE_USER} "
5151
f"-d {config.SUPABASE_DB} "
52-
f"--set ON_ERROR_STOP=on " # Stop on first error
53-
f"--single-transaction " # Run as a single transaction
52+
f"--set ON_ERROR_STOP=on "
53+
f"--single-transaction "
5454
)
55-
55+
5656
# Set search_path if using non-public schema
5757
if target_schema != "public":
5858
cmd += f"--set search_path={target_schema} "
59-
60-
59+
60+
# Attach SQL file
61+
cmd += f"-f {dump_file} "
62+
6163
try:
6264
utils.run_command(cmd, env)
6365
logger.info("Import completed successfully")
6466
except Exception as e:
6567
logger.error(f"Import failed: {e}")
66-
raise
68+
raise

0 commit comments

Comments
 (0)