11import json
22import os .path
3+ import random
34import time
45import pickle
56from logging import getLogger
67from enum import Enum
78from dataclasses import dataclass
89from collections import defaultdict
10+ import sys
11+ import subprocess
12+ import select
913
1014from .config import Settings , MysqlSettings , ClickhouseSettings
1115from .mysql_api import MySQLApi
@@ -106,10 +110,15 @@ class DbReplicator:
106110
107111 READ_LOG_INTERVAL = 0.3
108112
109- def __init__ (self , config : Settings , database : str , target_database : str = None , initial_only : bool = False ):
113+ def __init__ (self , config : Settings , database : str , target_database : str = None , initial_only : bool = False ,
114+ worker_id : int = None , total_workers : int = None , table : str = None ):
110115 self .config = config
111116 self .database = database
112-
117+ self .worker_id = worker_id
118+ self .total_workers = total_workers
119+ self .settings_file = config .settings_file
120+ self .single_table = table # Store the single table to process
121+
113122 # use same as source database by default
114123 self .target_database = database
115124
@@ -122,9 +131,29 @@ def __init__(self, config: Settings, database: str, target_database: str = None,
122131 if target_database :
123132 self .target_database = target_database
124133
125- self .target_database_tmp = self .target_database + '_tmp'
126134 self .initial_only = initial_only
127135
136+ # Handle state file differently for parallel workers
137+ if self .worker_id is not None and self .total_workers is not None :
138+ # For worker processes in parallel mode, use a different state file
139+ self .is_parallel_worker = True
140+ self .state_path = os .path .join (
141+ self .config .binlog_replicator .data_dir ,
142+ self .database ,
143+ f'state_worker_{ self .worker_id } _{ random .randint (0 ,9999999999 )} .pckl'
144+ )
145+ logger .info (f"Worker { self .worker_id } /{ self .total_workers } using state file: { self .state_path } " )
146+
147+ if self .single_table :
148+ logger .info (f"Worker { self .worker_id } focusing only on table: { self .single_table } " )
149+ else :
150+ self .state_path = os .path .join (self .config .binlog_replicator .data_dir , self .database , 'state.pckl' )
151+ self .is_parallel_worker = False
152+
153+ self .target_database_tmp = self .target_database + '_tmp'
154+ if self .is_parallel_worker :
155+ self .target_database_tmp = self .target_database
156+
128157 self .mysql_api = MySQLApi (
129158 database = self .database ,
130159 mysql_settings = config .mysql ,
@@ -148,7 +177,7 @@ def __init__(self, config: Settings, database: str, target_database: str = None,
148177 self .start_time = time .time ()
149178
150179 def create_state (self ):
151- return State (os . path . join ( self .config . binlog_replicator . data_dir , self . database , 'state.pckl' ) )
180+ return State (self .state_path )
152181
153182 def validate_database_settings (self ):
154183 if not self .initial_only :
@@ -196,7 +225,9 @@ def run(self):
196225
197226 logger .info ('recreating database' )
198227 self .clickhouse_api .database = self .target_database_tmp
199- self .clickhouse_api .recreate_database ()
228+ if not self .is_parallel_worker :
229+ self .clickhouse_api .recreate_database ()
230+
200231 self .state .tables = self .mysql_api .get_tables ()
201232 self .state .tables = [
202233 table for table in self .state .tables if self .config .is_table_matches (table )
@@ -220,6 +251,10 @@ def create_initial_structure(self):
220251 def create_initial_structure_table (self , table_name ):
221252 if not self .config .is_table_matches (table_name ):
222253 return
254+
255+ if self .single_table and self .single_table != table_name :
256+ return
257+
223258 mysql_create_statement = self .mysql_api .get_table_create_statement (table_name )
224259 mysql_structure = self .converter .parse_mysql_table_structure (
225260 mysql_create_statement , required_table_name = table_name ,
@@ -232,7 +267,9 @@ def create_initial_structure_table(self, table_name):
232267
233268 self .state .tables_structure [table_name ] = (mysql_structure , clickhouse_structure )
234269 indexes = self .config .get_indexes (self .database , table_name )
235- self .clickhouse_api .create_table (clickhouse_structure , additional_indexes = indexes )
270+
271+ if not self .is_parallel_worker :
272+ self .clickhouse_api .create_table (clickhouse_structure , additional_indexes = indexes )
236273
237274 def prevent_binlog_removal (self ):
238275 if time .time () - self .last_touch_time < self .BINLOG_TOUCH_INTERVAL :
@@ -253,22 +290,26 @@ def perform_initial_replication(self):
253290 for table in self .state .tables :
254291 if start_table and table != start_table :
255292 continue
293+ if self .single_table and self .single_table != table :
294+ continue
256295 self .perform_initial_replication_table (table )
257296 start_table = None
258- logger .info (f'initial replication - swapping database' )
259- if self .target_database in self .clickhouse_api .get_databases ():
260- self .clickhouse_api .execute_command (
261- f'RENAME DATABASE `{ self .target_database } ` TO `{ self .target_database } _old`' ,
262- )
263- self .clickhouse_api .execute_command (
264- f'RENAME DATABASE `{ self .target_database_tmp } ` TO `{ self .target_database } `' ,
265- )
266- self .clickhouse_api .drop_database (f'{ self .target_database } _old' )
267- else :
268- self .clickhouse_api .execute_command (
269- f'RENAME DATABASE `{ self .target_database_tmp } ` TO `{ self .target_database } `' ,
270- )
271- self .clickhouse_api .database = self .target_database
297+
298+ if not self .is_parallel_worker :
299+ logger .info (f'initial replication - swapping database' )
300+ if self .target_database in self .clickhouse_api .get_databases ():
301+ self .clickhouse_api .execute_command (
302+ f'RENAME DATABASE `{ self .target_database } ` TO `{ self .target_database } _old`' ,
303+ )
304+ self .clickhouse_api .execute_command (
305+ f'RENAME DATABASE `{ self .target_database_tmp } ` TO `{ self .target_database } `' ,
306+ )
307+ self .clickhouse_api .drop_database (f'{ self .target_database } _old' )
308+ else :
309+ self .clickhouse_api .execute_command (
310+ f'RENAME DATABASE `{ self .target_database_tmp } ` TO `{ self .target_database } `' ,
311+ )
312+ self .clickhouse_api .database = self .target_database
272313 logger .info (f'initial replication - done' )
273314
274315 def perform_initial_replication_table (self , table_name ):
@@ -278,6 +319,13 @@ def perform_initial_replication_table(self, table_name):
278319 logger .info (f'skip table { table_name } - not matching any allowed table' )
279320 return
280321
322+ if not self .is_parallel_worker and self .config .initial_replication_threads > 1 :
323+ self .state .initial_replication_table = table_name
324+ self .state .initial_replication_max_primary_key = None
325+ self .state .save ()
326+ self .perform_initial_replication_table_parallel (table_name )
327+ return
328+
281329 max_primary_key = None
282330 if self .state .initial_replication_table == table_name :
283331 # continue replication from saved position
@@ -322,6 +370,8 @@ def perform_initial_replication_table(self, table_name):
322370 order_by = primary_keys ,
323371 limit = DbReplicator .INITIAL_REPLICATION_BATCH_SIZE ,
324372 start_value = query_start_values ,
373+ worker_id = self .worker_id ,
374+ total_workers = self .total_workers ,
325375 )
326376 logger .debug (f'extracted { len (records )} records from mysql' )
327377
@@ -360,6 +410,66 @@ def perform_initial_replication_table(self, table_name):
360410 f'primary key: { max_primary_key } ' ,
361411 )
362412
413+ def perform_initial_replication_table_parallel (self , table_name ):
414+ """
415+ Execute initial replication for a table using multiple parallel worker processes.
416+ Each worker will handle a portion of the table based on its worker_id and total_workers.
417+ """
418+ logger .info (f"Starting parallel replication for table { table_name } with { self .config .initial_replication_threads } workers" )
419+
420+ # Create and launch worker processes
421+ processes = []
422+ for worker_id in range (self .config .initial_replication_threads ):
423+ # Prepare command to launch a worker process
424+ cmd = [
425+ sys .executable , "-m" , "mysql_ch_replicator.main" ,
426+ "db_replicator" , # Required positional mode argument
427+ "--config" , self .settings_file ,
428+ "--db" , self .database ,
429+ "--worker_id" , str (worker_id ),
430+ "--total_workers" , str (self .config .initial_replication_threads ),
431+ "--table" , table_name ,
432+ "--target_db" , self .target_database_tmp ,
433+ "--initial_only=True" ,
434+ ]
435+
436+ logger .info (f"Launching worker { worker_id } : { ' ' .join (cmd )} " )
437+ process = subprocess .Popen (cmd )
438+ processes .append (process )
439+
440+ # Wait for all worker processes to complete
441+ logger .info (f"Waiting for { len (processes )} workers to complete replication of { table_name } " )
442+
443+ try :
444+ while processes :
445+ for i , process in enumerate (processes [:]):
446+ # Check if process is still running
447+ if process .poll () is not None :
448+ exit_code = process .returncode
449+ if exit_code == 0 :
450+ logger .info (f"Worker process { i } completed successfully" )
451+ else :
452+ logger .error (f"Worker process { i } failed with exit code { exit_code } " )
453+ # Optional: can raise an exception here to abort the entire operation
454+ raise Exception (f"Worker process failed with exit code { exit_code } " )
455+
456+ processes .remove (process )
457+
458+ if processes :
459+ # Wait a bit before checking again
460+ time .sleep (0.1 )
461+
462+ # Every 30 seconds, log progress
463+ if int (time .time ()) % 30 == 0 :
464+ logger .info (f"Still waiting for { len (processes )} workers to complete" )
465+ except KeyboardInterrupt :
466+ logger .warning ("Received interrupt, terminating worker processes" )
467+ for process in processes :
468+ process .terminate ()
469+ raise
470+
471+ logger .info (f"All workers completed replication of table { table_name } " )
472+
363473 def run_realtime_replication (self ):
364474 if self .initial_only :
365475 logger .info ('skip running realtime replication, only initial replication was requested' )
0 commit comments