Skip to content

Commit 041c219

Browse files
authored
Added performance tests (bakwc#82)
1 parent 420895b commit 041c219

File tree

6 files changed

+187
-2
lines changed

6 files changed

+187
-2
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ With a focus on high performance, it utilizes batching heavily and uses C++ exte
1818
## Features
1919

2020
- **Real-Time Replication**: Keeps your ClickHouse database in sync with MySQL in real-time.
21-
- **High Performance**: Utilizes batching and ports slow parts to C++ (e.g., MySQL internal JSON parsing) for optimal performance.
21+
- **High Performance**: Utilizes batching and ports slow parts to C++ (e.g., MySQL internal JSON parsing) for optimal performance (±20K events / second on a single core).
2222
- **Supports Migrations/Schema Changes**: Handles adding, altering, and removing tables without breaking the replication process.
2323
- **Recovery without Downtime**: Allows for preserving old data while performing initial replication, ensuring continuous operation.
2424
- **Correct Data Removal**: Unlike MaterializedMySQL, `mysql_ch_replicator` ensures physical removal of data.

conftest.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# conftest.py
2+
import pytest
3+
4+
5+
def pytest_addoption(parser):
6+
parser.addoption(
7+
"--run-optional",
8+
action="store_true",
9+
default=False,
10+
help="Run tests marked as optional",
11+
)
12+
13+
14+
def pytest_collection_modifyitems(config, items):
15+
run_optional = config.getoption("--run-optional")
16+
keyword = config.getoption("keyword") # Retrieves the value passed with -k
17+
18+
selected_tests = set()
19+
20+
if keyword:
21+
# Collect nodeids of tests that match the -k keyword expression
22+
for item in items:
23+
if keyword in item.name or keyword in item.nodeid:
24+
selected_tests.add(item.nodeid)
25+
26+
for item in items:
27+
if "optional" in item.keywords:
28+
if run_optional or item.nodeid in selected_tests:
29+
# Do not skip if --run-optional is set or if the test matches the -k expression
30+
continue
31+
else:
32+
# Skip the test
33+
skip_marker = pytest.mark.skip(reason="Optional test, use --run-optional to include")
34+
item.add_marker(skip_marker)

mysql_ch_replicator/binlog_replicator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,9 @@ def run(self):
473473

474474
self.data_writer.store_event(log_event)
475475

476+
if last_read_count > 1000:
477+
break
478+
476479
self.update_state_if_required(last_transaction_id)
477480
self.clear_old_binlog_if_required()
478481
#print("last read count", last_read_count)

pytest.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[pytest]
2+
markers =
3+
optional: mark test as optional to run by default

test_mysql_ch_replicator.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from mysql_ch_replicator import config
1111
from mysql_ch_replicator import mysql_api
1212
from mysql_ch_replicator import clickhouse_api
13-
from mysql_ch_replicator.binlog_replicator import State as BinlogState
13+
from mysql_ch_replicator.binlog_replicator import State as BinlogState, FileReader, EventType
1414
from mysql_ch_replicator.db_replicator import State as DbReplicatorState, DbReplicator
1515
from mysql_ch_replicator.converter import MysqlToClickhouseConverter
1616

@@ -1269,3 +1269,127 @@ def test_parse_mysql_table_structure():
12691269

12701270
assert structure.table_name == 'user_preferences_portal'
12711271

1272+
1273+
def get_last_file(directory, extension='.bin'):
1274+
max_num = -1
1275+
last_file = None
1276+
ext_len = len(extension)
1277+
1278+
with os.scandir(directory) as it:
1279+
for entry in it:
1280+
if entry.is_file() and entry.name.endswith(extension):
1281+
# Extract the numerical part by removing the extension
1282+
num_part = entry.name[:-ext_len]
1283+
try:
1284+
num = int(num_part)
1285+
if num > max_num:
1286+
max_num = num
1287+
last_file = entry.name
1288+
except ValueError:
1289+
# Skip files where the name before extension is not an integer
1290+
continue
1291+
return last_file
1292+
1293+
1294+
def get_last_insert_from_binlog(cfg: config.Settings, db_name: str):
1295+
binlog_dir_path = os.path.join(cfg.binlog_replicator.data_dir, db_name)
1296+
if not os.path.exists(binlog_dir_path):
1297+
return None
1298+
last_file = get_last_file(binlog_dir_path)
1299+
if last_file is None:
1300+
return None
1301+
reader = FileReader(os.path.join(binlog_dir_path, last_file))
1302+
last_insert = None
1303+
while True:
1304+
event = reader.read_next_event()
1305+
if event is None:
1306+
break
1307+
if event.event_type != EventType.ADD_EVENT.value:
1308+
continue
1309+
for record in event.records:
1310+
last_insert = record
1311+
return last_insert
1312+
1313+
1314+
@pytest.mark.optional
1315+
def test_performance_dbreplicator():
1316+
config_file = 'tests_config_perf.yaml'
1317+
num_records = 100000
1318+
1319+
cfg = config.Settings()
1320+
cfg.load(config_file)
1321+
1322+
mysql = mysql_api.MySQLApi(
1323+
database=None,
1324+
mysql_settings=cfg.mysql,
1325+
)
1326+
1327+
ch = clickhouse_api.ClickhouseApi(
1328+
database=TEST_DB_NAME,
1329+
clickhouse_settings=cfg.clickhouse,
1330+
)
1331+
1332+
prepare_env(cfg, mysql, ch)
1333+
1334+
mysql.execute(f'''
1335+
CREATE TABLE {TEST_TABLE_NAME} (
1336+
id int NOT NULL AUTO_INCREMENT,
1337+
name varchar(2048),
1338+
age int,
1339+
PRIMARY KEY (id)
1340+
);
1341+
''')
1342+
1343+
binlog_replicator_runner = BinlogReplicatorRunner(cfg_file=config_file)
1344+
binlog_replicator_runner.run()
1345+
1346+
time.sleep(1)
1347+
1348+
mysql.execute(f"INSERT INTO {TEST_TABLE_NAME} (name, age) VALUES ('TEST_VALUE_1', 33);", commit=True)
1349+
1350+
def _get_last_insert_name():
1351+
record = get_last_insert_from_binlog(cfg=cfg, db_name=TEST_DB_NAME)
1352+
if record is None:
1353+
return None
1354+
return record[1].decode('utf-8')
1355+
1356+
assert_wait(lambda: _get_last_insert_name() == 'TEST_VALUE_1', retry_interval=0.5)
1357+
1358+
binlog_replicator_runner.stop()
1359+
1360+
time.sleep(1)
1361+
1362+
print("populating mysql data")
1363+
1364+
base_value = 'a' * 2000
1365+
1366+
for i in range(num_records):
1367+
if i % 2000 == 0:
1368+
print(f'populated {i} elements')
1369+
mysql.execute(
1370+
f"INSERT INTO {TEST_TABLE_NAME} (name, age) "
1371+
f"VALUES ('TEST_VALUE_{i}_{base_value}', {i});", commit=i % 20 == 0,
1372+
)
1373+
1374+
mysql.execute(f"INSERT INTO {TEST_TABLE_NAME} (name, age) VALUES ('TEST_VALUE_FINAL', 0);", commit=True)
1375+
1376+
print("running db_replicator")
1377+
t1 = time.time()
1378+
binlog_replicator_runner = BinlogReplicatorRunner(cfg_file=config_file)
1379+
binlog_replicator_runner.run()
1380+
1381+
assert_wait(lambda: _get_last_insert_name() == 'TEST_VALUE_FINAL', retry_interval=0.5, max_wait_time=1000)
1382+
t2 = time.time()
1383+
1384+
binlog_replicator_runner.stop()
1385+
1386+
time_delta = t2 - t1
1387+
rps = num_records / time_delta
1388+
1389+
print('\n\n')
1390+
print("*****************************")
1391+
print("records per second:", int(rps))
1392+
print("total time (seconds):", round(time_delta, 2))
1393+
print("*****************************")
1394+
print('\n\n')
1395+

tests_config_perf.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
mysql:
3+
host: 'localhost'
4+
port: 9307
5+
user: 'root'
6+
password: 'admin'
7+
8+
clickhouse:
9+
host: 'localhost'
10+
port: 9123
11+
user: 'default'
12+
password: 'admin'
13+
14+
binlog_replicator:
15+
data_dir: '/root/binlog/'
16+
records_per_file: 1000
17+
18+
databases: '*test*'
19+
log_level: 'info'
20+
optimize_interval: 3
21+
check_db_updated_interval: 3

0 commit comments

Comments
 (0)