@@ -574,6 +574,129 @@ def test_initial_only():
574574 db_replicator_runner .stop ()
575575
576576
577+ def test_parallel_initial_replication_record_versions ():
578+ """
579+ Test that record versions are properly consolidated from worker states
580+ after parallel initial replication.
581+ """
582+ # Only run this test with parallel configuration
583+ cfg_file = 'tests_config_parallel.yaml'
584+ cfg = config .Settings ()
585+ cfg .load (cfg_file )
586+
587+ # Ensure we have parallel replication configured
588+ assert cfg .initial_replication_threads > 1 , "This test requires initial_replication_threads > 1"
589+
590+ mysql = mysql_api .MySQLApi (
591+ database = None ,
592+ mysql_settings = cfg .mysql ,
593+ )
594+
595+ ch = clickhouse_api .ClickhouseApi (
596+ database = TEST_DB_NAME ,
597+ clickhouse_settings = cfg .clickhouse ,
598+ )
599+
600+ prepare_env (cfg , mysql , ch )
601+
602+ # Create a table with sufficient records for parallel processing
603+ mysql .execute (f'''
604+ CREATE TABLE `{ TEST_TABLE_NAME } ` (
605+ id int NOT NULL AUTO_INCREMENT,
606+ name varchar(255),
607+ age int,
608+ version int NOT NULL DEFAULT 1,
609+ PRIMARY KEY (id)
610+ );
611+ ''' )
612+
613+ # Insert a large number of records to ensure parallel processing
614+ for i in range (1 , 1001 ):
615+ mysql .execute (
616+ f"INSERT INTO `{ TEST_TABLE_NAME } ` (name, age, version) VALUES ('User{ i } ', { 20 + i % 50 } , { i } );" ,
617+ commit = (i % 100 == 0 ) # Commit every 100 records
618+ )
619+
620+ # Run initial replication only with parallel workers
621+ db_replicator_runner = DbReplicatorRunner (TEST_DB_NAME , cfg_file = cfg_file )
622+ db_replicator_runner .run ()
623+
624+ assert_wait (lambda : TEST_DB_NAME in ch .get_databases (), max_wait_time = 10.0 )
625+
626+ ch .execute_command (f'USE `{ TEST_DB_NAME } `' )
627+
628+ assert_wait (lambda : TEST_TABLE_NAME in ch .get_tables (), max_wait_time = 10.0 )
629+ assert_wait (lambda : len (ch .select (TEST_TABLE_NAME )) == 1000 , max_wait_time = 10.0 )
630+
631+ db_replicator_runner .stop ()
632+
633+ # Verify database and table were created
634+ assert TEST_DB_NAME in ch .get_databases ()
635+ ch .execute_command (f'USE `{ TEST_DB_NAME } `' )
636+ assert TEST_TABLE_NAME in ch .get_tables ()
637+
638+ # Verify all records were replicated
639+ records = ch .select (TEST_TABLE_NAME )
640+ assert len (records ) == 1000
641+
642+ # Instead of reading the state file directly, verify the record versions are correctly handled
643+ # by checking the max _version in the ClickHouse table
644+ versions_query = ch .query (f"SELECT MAX(_version) FROM `{ TEST_DB_NAME } `.`{ TEST_TABLE_NAME } `" )
645+ max_version_in_ch = versions_query .result_rows [0 ][0 ]
646+ assert max_version_in_ch >= 200 , f"Expected max _version to be at least 200, got { max_version_in_ch } "
647+
648+
649+ # Now test realtime replication to verify versions continue correctly
650+ # Start binlog replication
651+ binlog_replicator_runner = BinlogReplicatorRunner (cfg_file = cfg_file )
652+ binlog_replicator_runner .run ()
653+
654+ time .sleep (3.0 )
655+
656+ # Start DB replicator in realtime mode
657+ realtime_db_replicator = DbReplicatorRunner (TEST_DB_NAME , cfg_file = cfg_file )
658+ realtime_db_replicator .run ()
659+
660+ # Insert a new record with version 1001
661+ mysql .execute (
662+ f"INSERT INTO `{ TEST_TABLE_NAME } ` (name, age, version) VALUES ('UserRealtime', 99, 1001);" ,
663+ commit = True
664+ )
665+
666+ # Wait for the record to be replicated
667+ assert_wait (lambda : len (ch .select (TEST_TABLE_NAME )) == 1001 )
668+
669+ # Verify the new record was replicated correctly
670+ realtime_record = ch .select (TEST_TABLE_NAME , where = "name='UserRealtime'" )[0 ]
671+ assert realtime_record ['age' ] == 99
672+ assert realtime_record ['version' ] == 1001
673+
674+ # Check that the _version column in CH is a reasonable value
675+ # With parallel workers, the _version won't be > 1000 because each worker
676+ # has its own independent version counter and they never intersect
677+ versions_query = ch .query (f"SELECT _version FROM `{ TEST_DB_NAME } `.`{ TEST_TABLE_NAME } ` WHERE name='UserRealtime'" )
678+ ch_version = versions_query .result_rows [0 ][0 ]
679+
680+
681+ # With parallel workers (default is 4), each worker would process ~250 records
682+ # So the version for the new record should be slightly higher than 250
683+ # but definitely lower than 1000
684+ assert ch_version > 0 , f"ClickHouse _version should be > 0, but got { ch_version } "
685+
686+ # We expect version to be roughly: (total_records / num_workers) + 1
687+ # For 1000 records and 4 workers, expect around 251
688+ expected_version_approx = 1000 // cfg .initial_replication_threads + 1
689+ # Allow some flexibility in the exact expected value
690+ assert abs (ch_version - expected_version_approx ) < 50 , (
691+ f"ClickHouse _version should be close to { expected_version_approx } , but got { ch_version } "
692+ )
693+
694+ # Clean up
695+ binlog_replicator_runner .stop ()
696+ realtime_db_replicator .stop ()
697+ db_replicator_runner .stop ()
698+
699+
577700def test_database_tables_filtering ():
578701 cfg = config .Settings ()
579702 cfg .load ('tests_config_databases_tables.yaml' )
@@ -693,8 +816,8 @@ def test_datetime_exception():
693816 name varchar(255),
694817 modified_date DateTime(3) NOT NULL,
695818 test_date date NOT NULL,
696- PRIMARY KEY (id)
697- );
819+ PRIMARY KEY (id)
820+ );
698821 ''' )
699822
700823 mysql .execute (
0 commit comments