Skip to content

Commit 0abe336

Browse files
authored
Support for distributed migrations (#130)
* Handle distributed migrations * add extra test and update README.md
1 parent 59c1fe5 commit 0abe336

File tree

7 files changed

+371
-18
lines changed

7 files changed

+371
-18
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
### 1.5.0
22

3+
- feat: #130: Add `distributed_migrations` database setting to support distributed migration queries.
34
- feat: #129: Add `toYearWeek` datetime functionality
45

56
### 1.4.0

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,34 @@ Extra settings explanation:
525525
Do not hardcode database name when you define replicated table or distributed table.
526526
Because test database name is different from deployed database name.
527527

528+
#### Clickhouse cluster behind a load balancer
529+
530+
If your clickhouse cluster is running behind a load balancer, you can optionally set `distributed_migrations` to `True` under database OPTIONS.
531+
Then a distributed migration table will be created on all nodes of the cluster, and all migration operations will be performed on this
532+
distributed migrations table instead of a local migrations table. Otherwise, sequentially running migrations will have no effect on other nodes.
533+
534+
Configuration example:
535+
536+
```python
537+
DATABASES = {
538+
"default": {
539+
"HOST": "clickhouse-load-balancer",
540+
"PORT": 9000,
541+
"ENGINE": "clickhouse_backend.backend",
542+
"OPTIONS": {
543+
"migration_cluster": "cluster",
544+
"distributed_migrations": True,
545+
"settings": {
546+
"mutations_sync": 2,
547+
"insert_distributed_sync": 1,
548+
"insert_quorum": 2,
549+
"alter_sync": 2,
550+
},
551+
},
552+
}
553+
}
554+
```
555+
528556
### Model
529557

530558
`cluster` in `Meta` class will make models being created on cluster.

clickhouse_backend/backend/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ def __init__(self, settings_dict, alias=DEFAULT_DB_ALIAS):
163163
self.migration_cluster = self.settings_dict["OPTIONS"].pop(
164164
"migration_cluster", None
165165
)
166+
self.distributed_migrations = self.settings_dict["OPTIONS"].pop(
167+
"distributed_migrations", None
168+
)
166169
# https://clickhouse-driver.readthedocs.io/en/latest/quickstart.html#streaming-results
167170
self.max_block_size = self.settings_dict["OPTIONS"].pop("max_block_size", 65409)
168171
if not self.settings_dict["NAME"]:

clickhouse_backend/patch/migrations.py

Lines changed: 142 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from django.apps.registry import Apps
2+
from django.db import DatabaseError
23
from django.db import models as django_models
34
from django.db.migrations import Migration
4-
from django.db.migrations.exceptions import IrreversibleError
5+
from django.db.migrations.exceptions import IrreversibleError, MigrationSchemaMissing
56
from django.db.migrations.operations.fields import FieldOperation
67
from django.db.migrations.operations.models import (
78
DeleteModel,
@@ -15,6 +16,41 @@
1516
__all__ = ["patch_migrations", "patch_migration_recorder", "patch_migration"]
1617

1718

19+
def _should_distribute_migrations(connection):
20+
"""
21+
Check if the connection is configured for distributed migrations.
22+
"""
23+
return getattr(connection, "distributed_migrations", False) and getattr(
24+
connection, "migration_cluster", None
25+
)
26+
27+
28+
def _get_model_table_name(connection):
29+
"""
30+
Return the name of the table that will be used by the MigrationRecorder.
31+
If distributed migrations are enabled, return the distributed table name.
32+
Otherwise, return the regular django_migrations table name.
33+
"""
34+
if _should_distribute_migrations(connection):
35+
return "distributed_django_migrations"
36+
return "django_migrations"
37+
38+
39+
def _check_replicas(connection):
40+
"""
41+
Check if the connection has replicas configured for the migration cluster.
42+
"""
43+
if hasattr(connection, "has_replicas"):
44+
return connection.has_replicas
45+
46+
with connection.cursor() as cursor:
47+
cursor.execute(
48+
f"select replica_num from system.clusters where cluster={connection.migration_cluster}"
49+
)
50+
(replica_count,) = cursor.fetchone()
51+
return replica_count >= 1
52+
53+
1854
def patch_migrations():
1955
patch_migration_recorder()
2056
patch_migration()
@@ -29,22 +65,75 @@ def Migration(self):
2965
if self._migration_class is None:
3066
if self.connection.vendor == "clickhouse":
3167
from clickhouse_backend import models
68+
from clickhouse_backend.models import currentDatabase
3269

33-
class Migration(models.ClickhouseModel):
34-
app = models.StringField(max_length=255)
35-
name = models.StringField(max_length=255)
36-
applied = models.DateTime64Field(default=now)
37-
deleted = models.BoolField(default=False)
70+
# Only create a distributed migration model if the connection
71+
# has distributed migrations enabled and a migration cluster is set.
72+
# otherwise, create a regular merge tree.
73+
if _should_distribute_migrations(self.connection):
74+
has_replicas = _check_replicas(self.connection)
3875

39-
class Meta:
40-
apps = Apps()
41-
app_label = "migrations"
42-
db_table = "django_migrations"
43-
engine = models.MergeTree(order_by=("app", "name"))
44-
cluster = getattr(self.connection, "migration_cluster", None)
76+
Engine = models.MergeTree
77+
if has_replicas:
78+
Engine = models.ReplicatedMergeTree
4579

46-
def __str__(self):
47-
return "Migration %s for %s" % (self.name, self.app)
80+
self.connection.has_replicas = has_replicas
81+
82+
class _Migration(models.ClickhouseModel):
83+
app = models.StringField(max_length=255)
84+
name = models.StringField(max_length=255)
85+
applied = models.DateTime64Field(default=now)
86+
deleted = models.BoolField(default=False)
87+
88+
class Meta:
89+
apps = Apps()
90+
app_label = "migrations"
91+
db_table = "django_migrations"
92+
engine = Engine(order_by=("app", "name"))
93+
cluster = self.connection.migration_cluster
94+
95+
def __str__(self):
96+
return "Migration %s for %s" % (self.name, self.app)
97+
98+
class Migration(models.ClickhouseModel):
99+
app = models.StringField(max_length=255)
100+
name = models.StringField(max_length=255)
101+
applied = models.DateTime64Field(default=now)
102+
deleted = models.BoolField(default=False)
103+
104+
class Meta:
105+
apps = Apps()
106+
app_label = "migrations"
107+
db_table = _get_model_table_name(self.connection)
108+
engine = models.Distributed(
109+
self.connection.migration_cluster,
110+
currentDatabase(),
111+
_Migration._meta.db_table,
112+
models.Rand(),
113+
)
114+
cluster = self.connection.migration_cluster
115+
116+
Migration._meta.local_model_class = _Migration
117+
118+
else:
119+
120+
class Migration(models.ClickhouseModel):
121+
app = models.StringField(max_length=255)
122+
name = models.StringField(max_length=255)
123+
applied = models.DateTime64Field(default=now)
124+
deleted = models.BoolField(default=False)
125+
126+
class Meta:
127+
apps = Apps()
128+
app_label = "migrations"
129+
db_table = _get_model_table_name(self.connection)
130+
engine = models.MergeTree(order_by=("app", "name"))
131+
cluster = getattr(
132+
self.connection, "migration_cluster", None
133+
)
134+
135+
def __str__(self):
136+
return "Migration %s for %s" % (self.name, self.app)
48137

49138
else:
50139

@@ -69,15 +158,45 @@ def has_table(self):
69158
# Assert migration table won't be deleted once created.
70159
if not getattr(self, "_has_table", False):
71160
with self.connection.cursor() as cursor:
161+
table = self.Migration._meta.db_table
72162
tables = self.connection.introspection.table_names(cursor)
73-
self._has_table = self.Migration._meta.db_table in tables
163+
self._has_table = table in tables
74164
if self._has_table and self.connection.vendor == "clickhouse":
75165
# fix https://github.com/jayvynl/django-clickhouse-backend/issues/51
76166
cursor.execute(
77-
"ALTER table django_migrations ADD COLUMN IF NOT EXISTS deleted Bool"
167+
f"ALTER table {table} ADD COLUMN IF NOT EXISTS deleted Bool"
78168
)
79169
return self._has_table
80170

171+
def ensure_schema(self):
172+
"""Ensure the table exists and has the correct schema."""
173+
# If the table's there, that's fine - we've never changed its schema
174+
# in the codebase.
175+
if self.has_table():
176+
return
177+
178+
# In case of distributed migrations, we need to ensure the local model exists first and
179+
# then create the distributed model.
180+
try:
181+
with self.connection.schema_editor() as editor:
182+
if (
183+
editor.connection.vendor == "clickhouse"
184+
and _should_distribute_migrations(editor.connection)
185+
):
186+
with editor.connection.cursor() as cursor:
187+
tables = editor.connection.introspection.table_names(cursor)
188+
local_model_class = self.Migration._meta.local_model_class
189+
local_table = local_model_class._meta.db_table
190+
if local_table not in tables:
191+
# Create the local model first
192+
editor.create_model(self.Migration._meta.local_model_class)
193+
194+
editor.create_model(self.Migration)
195+
except DatabaseError as exc:
196+
raise MigrationSchemaMissing(
197+
"Unable to create the django_migrations table (%s)" % exc
198+
)
199+
81200
def migration_qs(self):
82201
if self.connection.vendor == "clickhouse":
83202
return self.Migration.objects.using(self.connection.alias).filter(
@@ -118,6 +237,7 @@ def flush(self):
118237

119238
MigrationRecorder.Migration = property(Migration)
120239
MigrationRecorder.has_table = has_table
240+
MigrationRecorder.ensure_schema = ensure_schema
121241
MigrationRecorder.migration_qs = property(migration_qs)
122242
MigrationRecorder.record_applied = record_applied
123243
MigrationRecorder.record_unapplied = record_unapplied
@@ -136,13 +256,15 @@ def apply(self, project_state, schema_editor, collect_sql=False):
136256
"""
137257
applied_on_remote = False
138258
if getattr(schema_editor.connection, "migration_cluster", None):
259+
_table = _get_model_table_name(schema_editor.connection)
260+
139261
with schema_editor.connection.cursor() as cursor:
140262
cursor.execute(
141263
"select EXISTS(select 1 from clusterAllReplicas(%s, currentDatabase(), %s)"
142264
" where app=%s and name=%s and deleted=false)",
143265
[
144266
schema_editor.connection.migration_cluster,
145-
"django_migrations",
267+
_table,
146268
self.app_label,
147269
self.name,
148270
],
@@ -203,13 +325,15 @@ def unapply(self, project_state, schema_editor, collect_sql=False):
203325
"""
204326
unapplied_on_remote = False
205327
if getattr(schema_editor.connection, "migration_cluster", None):
328+
_table = _get_model_table_name(schema_editor.connection)
329+
206330
with schema_editor.connection.cursor() as cursor:
207331
cursor.execute(
208332
"select EXISTS(select 1 from clusterAllReplicas(%s, currentDatabase(), %s)"
209333
" where app=%s and name=%s and deleted=true)",
210334
[
211335
schema_editor.connection.migration_cluster,
212-
"django_migrations",
336+
_table,
213337
self.app_label,
214338
self.name,
215339
],

compose.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ services:
2121
- "node1:/var/lib/clickhouse/"
2222
- "./clickhouse-config/node1/:/etc/clickhouse-server/config.d/"
2323
ports:
24+
- "127.0.0.1:8123:8123"
2425
- "127.0.0.1:9000:9000"
2526
node2:
2627
<<: *base-service
@@ -29,6 +30,7 @@ services:
2930
- "node2:/var/lib/clickhouse/"
3031
- "./clickhouse-config/node2/:/etc/clickhouse-server/config.d/"
3132
ports:
33+
- "127.0.0.1:8124:8123"
3234
- "127.0.0.1:9001:9000"
3335
node3:
3436
<<: *base-service
@@ -37,6 +39,7 @@ services:
3739
- "node3:/var/lib/clickhouse/"
3840
- "./clickhouse-config/node3/:/etc/clickhouse-server/config.d/"
3941
ports:
42+
- "127.0.0.1:8125:8123"
4043
- "127.0.0.1:9002:9000"
4144
node4:
4245
<<: *base-service
@@ -45,8 +48,25 @@ services:
4548
- "node4:/var/lib/clickhouse/"
4649
- "./clickhouse-config/node4/:/etc/clickhouse-server/config.d/"
4750
ports:
51+
- "127.0.0.1:8126:8123"
4852
- "127.0.0.1:9003:9000"
4953

54+
haproxy:
55+
image: haproxy:latest
56+
container_name: clickhouse-haproxy
57+
command: sh -c "haproxy -f /usr/local/etc/haproxy/haproxy.cfg"
58+
restart: always
59+
volumes:
60+
- "./proxy-config/haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg"
61+
ports:
62+
- "127.0.0.1:8127:8123"
63+
- "127.0.0.1:9004:9000"
64+
depends_on:
65+
node1:
66+
condition: service_healthy
67+
node3:
68+
condition: service_healthy
69+
5070
volumes:
5171
node1:
5272
name: clickhouse-node1

proxy-config/haproxy.cfg

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# TCP frontend for native ClickHouse protocol
2+
frontend clickhouse_tcp
3+
bind *:9000
4+
mode tcp
5+
default_backend clickhouse_tcp_nodes
6+
7+
backend clickhouse_tcp_nodes
8+
mode tcp
9+
balance roundrobin
10+
option tcp-check
11+
tcp-check connect
12+
server ch1 clickhouse-node1:9000 check
13+
server ch2 clickhouse-node3:9000 check
14+
15+
# HTTP frontend for ClickHouse's HTTP interface
16+
frontend clickhouse_http
17+
bind *:8123
18+
mode http
19+
default_backend clickhouse_http_nodes
20+
21+
backend clickhouse_http_nodes
22+
mode http
23+
balance roundrobin
24+
option httpchk GET /ping
25+
server ch1 clickhouse-node1:8123 check
26+
server ch2 clickhouse-node3:8123 check

0 commit comments

Comments
 (0)