Skip to content

Commit 295facf

Browse files
committed
Refactor dump cleaning functionality into DumpCleaner class with enhanced logging and error handling
1 parent 99dd23c commit 295facf

File tree

1 file changed

+67
-11
lines changed

1 file changed

+67
-11
lines changed

cloudsql_to_supabase/clean.py

Lines changed: 67 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,70 @@
1+
import re
2+
import logging
3+
from pathlib import Path
4+
from typing import List, Optional
15
from . import config
26

37

4-
def clean_dump_file():
5-
print(f"Cleaning dump file: {config.OUTPUT_DUMP}")
6-
import re
7-
with open(config.OUTPUT_DUMP, 'r') as infile, open(config.CLEANED_DUMP, 'w') as outfile:
8-
for line in infile:
9-
if re.search(r'^(CREATE|ALTER)ROLE', line):
10-
continue
11-
if re.search(r'OWNER TO', line):
12-
line = re.sub(r'OWNER TO .*?;', 'OWNER TO public;', line)
13-
outfile.write(line)
14-
print(f"Cleaned dump file saved as: {config.CLEANED_DUMP}")
8+
logger = logging.getLogger('cloudsql_to_supabase.clean')
9+
10+
11+
class DumpCleaner:
12+
def __init__(self, input_file: Path = None, output_file: Path = None) -> None:
13+
self.input_file = input_file or Path(config.OUTPUT_DUMP)
14+
self.output_file = output_file or Path(config.OUTPUT_DUMP)
15+
self.skip_patterns = [
16+
r'^(CREATE|ALTER) ROLE',
17+
r'^COMMENT ON EXTENSION',
18+
]
19+
20+
self.replacement_rules = [
21+
(r'OWNER TO .*?;', 'OWNER TO public;'),
22+
(r'CREATE SCHEMA .*?;', '--schema creation removed')
23+
]
24+
25+
def clean_dump_file(self) -> Path:
26+
"""
27+
Clean the SQL dump file for Supabase import by removing/modifying
28+
incompatible statements.
29+
30+
Returns:
31+
Path to the cleaned dump file
32+
"""
33+
34+
logger.info(f"cleaning dump file: {self.input_file}")
35+
36+
if not self.input_file.exists():
37+
raise FileNotFoundError(f"input file not found: {self.input_file}")
38+
39+
skipped_lines: int = 0
40+
modified_lines: int = 0
41+
42+
with open(self.input_file, 'r') as infile, open(self.output_file, 'w') as outfile:
43+
for line_num, line in enumerate(infile, 1):
44+
if any(re.search(pattern, line) for pattern in self.skip_patterns):
45+
skipped_lines += 1
46+
continue
47+
48+
original_line = line
49+
for pattern, replacement in self.replacement_rules:
50+
if re.search(pattern, line):
51+
line = re.sub(pattern, replacement, line)
52+
if line != original_line:
53+
modified_lines += 1
54+
55+
outfile.write(line)
56+
57+
logger.info(f'cleaning completed: {skipped_lines} lines skipped, {modified_lines} lines modified')
58+
logger.info(f'cleaned dump saved as {self.output_file}')
59+
60+
return self.output_file
61+
62+
63+
64+
def clean_dump_file(input_file: Optional[Path] = None, output_file: Optional[Path] = None) -> Path:
65+
"""
66+
convenience function to clean a sql dump file
67+
"""
68+
69+
cleaner = DumpCleaner(input_file, output_file)
70+
return cleaner.clean_dump_file()

0 commit comments

Comments
 (0)