Skip to content

Commit cc0fd55

Browse files
authored
Not needed to search max primary key because all rows already sorted by it
1 parent 922021f commit cc0fd55

File tree

1 file changed

+29
-6
lines changed

1 file changed

+29
-6
lines changed

mysql_ch_replicator/db_replicator.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from enum import Enum
77
from dataclasses import dataclass
88
from collections import defaultdict
9+
from datetime import date
910

1011
from .config import Settings, MysqlSettings, ClickhouseSettings
1112
from .mysql_api import MySQLApi
@@ -268,6 +269,30 @@ def perform_initial_replication(self):
268269
self.clickhouse_api.database = self.target_database
269270
logger.info(f'initial replication - done')
270271

272+
def to_date_if_str(self, value):
273+
if not isinstance(value, str):
274+
return value
275+
276+
if len(value) == 10 and value[4] == '-' and value[7] == '-':
277+
try:
278+
year = int(value[0:4])
279+
month = int(value[5:7])
280+
day = int(value[8:10])
281+
return date(year, month, day)
282+
except ValueError:
283+
return value
284+
285+
if len(value) == 12 and value[5] == '-' and value[8] == '-' and ((value[0] == '\'' and value[11] == '\'') or (value[0] == '"' and value[11] == '"')):
286+
try:
287+
year = int(value[1:5])
288+
month = int(value[6:8])
289+
day = int(value[9:11])
290+
return date(year, month, day)
291+
except ValueError:
292+
return value
293+
294+
return value
295+
271296
def perform_initial_replication_table(self, table_name):
272297
logger.info(f'running initial replication for table {table_name}')
273298

@@ -329,13 +354,11 @@ def perform_initial_replication_table(self, table_name):
329354

330355
if not records:
331356
break
357+
332358
self.clickhouse_api.insert(table_name, records, table_structure=clickhouse_table_structure)
333-
for record in records:
334-
record_primary_key = [record[key_idx] for key_idx in primary_key_ids]
335-
if max_primary_key is None:
336-
max_primary_key = record_primary_key
337-
else:
338-
max_primary_key = max(max_primary_key, record_primary_key)
359+
360+
last_record = records[-1]
361+
max_primary_key = [self.to_date_if_str(last_record[key_idx]) for key_idx in primary_key_ids]
339362

340363
self.state.initial_replication_max_primary_key = max_primary_key
341364
self.save_state_if_required()

0 commit comments

Comments
 (0)