1+ import re
2+ import logging
3+ from pathlib import Path
4+ from typing import List , Optional
15from . import config
26
37
4- def clean_dump_file ():
5- print (f"Cleaning dump file: { config .OUTPUT_DUMP } " )
6- import re
7- with open (config .OUTPUT_DUMP , 'r' ) as infile , open (config .CLEANED_DUMP , 'w' ) as outfile :
8- for line in infile :
9- if re .search (r'^(CREATE|ALTER)ROLE' , line ):
10- continue
11- if re .search (r'OWNER TO' , line ):
12- line = re .sub (r'OWNER TO .*?;' , 'OWNER TO public;' , line )
13- outfile .write (line )
14- print (f"Cleaned dump file saved as: { config .CLEANED_DUMP } " )
8+ logger = logging .getLogger ('cloudsql_to_supabase.clean' )
9+
10+
11+ class DumpCleaner :
12+ def __init__ (self , input_file : Path = None , output_file : Path = None ) -> None :
13+ self .input_file = input_file or Path (config .OUTPUT_DUMP )
14+ self .output_file = output_file or Path (config .OUTPUT_DUMP )
15+ self .skip_patterns = [
16+ r'^(CREATE|ALTER) ROLE' ,
17+ r'^COMMENT ON EXTENSION' ,
18+ ]
19+
20+ self .replacement_rules = [
21+ (r'OWNER TO .*?;' , 'OWNER TO public;' ),
22+ (r'CREATE SCHEMA .*?;' , '--schema creation removed' )
23+ ]
24+
25+ def clean_dump_file (self ) -> Path :
26+ """
27+ Clean the SQL dump file for Supabase import by removing/modifying
28+ incompatible statements.
29+
30+ Returns:
31+ Path to the cleaned dump file
32+ """
33+
34+ logger .info (f"cleaning dump file: { self .input_file } " )
35+
36+ if not self .input_file .exists ():
37+ raise FileNotFoundError (f"input file not found: { self .input_file } " )
38+
39+ skipped_lines : int = 0
40+ modified_lines : int = 0
41+
42+ with open (self .input_file , 'r' ) as infile , open (self .output_file , 'w' ) as outfile :
43+ for line_num , line in enumerate (infile , 1 ):
44+ if any (re .search (pattern , line ) for pattern in self .skip_patterns ):
45+ skipped_lines += 1
46+ continue
47+
48+ original_line = line
49+ for pattern , replacement in self .replacement_rules :
50+ if re .search (pattern , line ):
51+ line = re .sub (pattern , replacement , line )
52+ if line != original_line :
53+ modified_lines += 1
54+
55+ outfile .write (line )
56+
57+ logger .info (f'cleaning completed: { skipped_lines } lines skipped, { modified_lines } lines modified' )
58+ logger .info (f'cleaned dump saved as { self .output_file } ' )
59+
60+ return self .output_file
61+
62+
63+
64+ def clean_dump_file (input_file : Optional [Path ] = None , output_file : Optional [Path ] = None ) -> Path :
65+ """
66+ convenience function to clean a sql dump file
67+ """
68+
69+ cleaner = DumpCleaner (input_file , output_file )
70+ return cleaner .clean_dump_file ()
0 commit comments