Skip to content

Commit 639ee99

Browse files
committed
Add check for separate runpython data migration
Vibed with ClaudeCode
1 parent 8c8a0c6 commit 639ee99

File tree

12 files changed

+306
-2
lines changed

12 files changed

+306
-2
lines changed

docs/incompatibilities.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ You can ignore checks through the `--exclude-migration-tests` option by specifyi
3838
| `RUNPYTHON_ARGS_NAMING_CONVENTION` | By convention, RunPython names two arguments: apps, schema_editor | Warning |
3939
| `RUNPYTHON_MODEL_IMPORT` | Missing apps.get_model() calls for model | Error |
4040
| `RUNPYTHON_MODEL_VARIABLE_NAME` | The model variable name is different from the model class itself | Warning |
41+
| `RUNPYTHON_MIXED_OPERATIONS` | RunPython operation is mixed with schema operations (data migrations should be in separate files) | Error |
4142
| `RUNSQL_REVERSIBLE` | RunSQL data migration is not reversible (missing reverse SQL) | Warning |
4243
| `CREATE_INDEX` | (Postgresql specific) Creating an index without the concurrent keyword will lock the table and may generate downtime | Warning |
4344
| `CREATE_INDEX_EXCLUSIVE` | (Postgresql specific) Creating an index in a transaction acquiring an `EXCLUSIVE` lock (e.g. most `ALTER TABLE` statements acquire one) prolongs the exclusive lock on the table. Using concurrently clause does not address the issue. On the contrary, it prolongs the transaction, making it more dangerous in this situation. | Warning |
@@ -177,6 +178,51 @@ It could happen that you use a `RunPython` operation to fill a new column. But i
177178

178179
:white_check_mark: **Solution**: use `apps.get_model` to get the model class
179180

181+
### :arrow_forward: Mixing RunPython with schema operations
182+
183+
Mixing RunPython data migrations with schema operations (like AddField, AlterField, etc.) in the same migration file can lead to serious production issues.
184+
185+
**Why this is problematic**:
186+
187+
1. **Database lock issues**: When running RunPython with data chunking in a transactional migration, you may hold database locks for extended periods, potentially causing downtime
188+
2. **Atomic flag complications**: If you set `atomic = False` on a migration that contains both data and schema operations, schema changes won't rollback if the data migration fails
189+
3. **Rollback problems**: When mixing operations, failures in data migration can leave schema changes applied but data changes reverted, creating inconsistent database states
190+
191+
**Forward migration**:
192+
1. Migration adds a new field and runs RunPython to populate it
193+
2. If the RunPython operation fails mid-way through a large dataset, the schema change might be committed but data is only partially migrated
194+
3. Your application may crash because it expects the field to be fully populated
195+
196+
**Rollback**:
197+
1. If you set `atomic = False` to prevent locks, rolling back becomes problematic
198+
2. Schema changes may have been committed separately from data changes
199+
3. You lose the safety of transactional rollback
200+
201+
:white_check_mark: **Solution**: Always separate data migrations into their own files
202+
203+
**Good pattern** (three separate files):
204+
```python
205+
# 0001_add_slug_field.py
206+
operations = [
207+
migrations.AddField('model', 'slug', models.CharField(default='')),
208+
]
209+
210+
# 0002_backfill_slug.py
211+
class Migration(migrations.Migration):
212+
atomic = False # Safe to set now, only data operations
213+
214+
operations = [
215+
migrations.RunPython(backfill_slug_values),
216+
]
217+
218+
# 0003_make_slug_required.py
219+
operations = [
220+
migrations.AlterField('model', 'slug', models.CharField()),
221+
]
222+
```
223+
224+
**Note**: Combining RunPython with RunSQL is acceptable, as both are data migration operations.
225+
180226
## The special case of sqlite
181227

182228
While on PostgreSQL and MySQL a table modification can be expressed by one `ALTER TABLE` statement, sqlite is handled in a different way.

src/django_migration_linter/migration_linter.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,8 @@ def _gather_migrations_git(
415415

416416
migrations = []
417417
for line in map(
418-
clean_bytes_to_str, diff_process.stdout.readlines() # type: ignore
418+
clean_bytes_to_str,
419+
diff_process.stdout.readlines(), # type: ignore
419420
):
420421
# Only gather lines that include added migrations.
421422
if self.is_migration_file(line):
@@ -455,7 +456,8 @@ def _gather_migrations_git(
455456
if diff_process.returncode != 0:
456457
output = []
457458
for line in map(
458-
clean_bytes_to_str, diff_process.stderr.readlines() # type: ignore
459+
clean_bytes_to_str,
460+
diff_process.stderr.readlines(), # type: ignore
459461
):
460462
output.append(line)
461463
logger.error("Error while git diff command:\n{}".format("".join(output)))
@@ -529,6 +531,17 @@ def analyse_data_migration(
529531
if op_warnings:
530532
warnings += op_warnings
531533

534+
# Check for mixed RunPython and schema operations
535+
mixed_errors, mixed_ignored, mixed_warnings = (
536+
self.check_mixed_runpython_operations(migration.operations)
537+
)
538+
if mixed_errors:
539+
errors += mixed_errors
540+
if mixed_ignored:
541+
ignored += mixed_ignored
542+
if mixed_warnings:
543+
warnings += mixed_warnings
544+
532545
return errors, ignored, warnings
533546

534547
@staticmethod
@@ -764,3 +777,63 @@ def lint_runsql(
764777
warning += sql_warnings
765778

766779
return error, ignored, warning
780+
781+
def check_mixed_runpython_operations(
782+
self, operations: Iterable[Operation]
783+
) -> tuple[list[Issue], list[Issue], list[Issue]]:
784+
"""
785+
Check if migration mixes RunPython with schema operations.
786+
787+
Data migrations (RunPython/RunSQL) should be in separate migration files
788+
from schema operations to avoid database lock issues and ensure proper
789+
rollback behavior.
790+
791+
Returns:
792+
Tuple of (errors, ignored, warnings) where each is a list of Issue objects
793+
"""
794+
errors: list[Issue] = []
795+
ignored: list[Issue] = []
796+
warnings: list[Issue] = []
797+
798+
# Check if there are any RunPython operations
799+
has_runpython = any(isinstance(op, RunPython) for op in operations)
800+
801+
if not has_runpython:
802+
# No RunPython, nothing to check
803+
return errors, ignored, warnings
804+
805+
# Check for other operation types (schema operations)
806+
# RunSQL is allowed with RunPython as both are data migrations
807+
# IgnoreMigration is a marker operation
808+
# SeparateDatabaseAndState is OK with RunPython (advanced usage)
809+
schema_operations = [
810+
op
811+
for op in operations
812+
if not isinstance(op, (RunPython, RunSQL, IgnoreMigration))
813+
and op.__class__.__name__ != "SeparateDatabaseAndState"
814+
]
815+
816+
if schema_operations:
817+
# Get unique operation names for better error message
818+
operation_names = list(
819+
{op.__class__.__name__ for op in schema_operations[:3]}
820+
)
821+
operation_str = ", ".join(operation_names)
822+
if len(schema_operations) > 3:
823+
operation_str += "..."
824+
825+
issue = Issue(
826+
code="RUNPYTHON_MIXED_OPERATIONS",
827+
message=(
828+
"RunPython operation is mixed with schema operations. "
829+
"Data migrations should be in separate files. "
830+
f"Found schema operations: {operation_str}"
831+
),
832+
)
833+
834+
if issue.code in self.exclude_migration_tests:
835+
ignored.append(issue)
836+
else:
837+
errors.append(issue)
838+
839+
return errors, ignored, warnings

tests/fixtures.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
DROP_UNIQUE_TOGETHER = "app_unique_together"
1717
ADD_MANYTOMANY_FIELD = "app_add_manytomany_field"
1818
DATA_MIGRATIONS = "app_data_migrations"
19+
MIXED_RUNPYTHON_OPERATIONS = "app_mixed_runpython_operations"
1920
MAKE_NOT_NULL_WITH_DJANGO_DEFAULT = "app_make_not_null_with_django_default"
2021
MAKE_NOT_NULL_WITH_LIB_DEFAULT = "app_make_not_null_with_lib_default"
2122
CREATE_INDEX_EXCLUSIVE = "app_create_index_exclusive"

tests/functional/test_data_migrations.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,67 @@ def forward_op(apps, schema_editor):
306306
self.assertEqual(1, len(issues))
307307

308308

309+
class MixedRunPythonOperationsTestCase(unittest.TestCase):
310+
def setUp(self):
311+
test_project_path = os.path.dirname(settings.BASE_DIR)
312+
self.linter = MigrationLinter(
313+
test_project_path,
314+
include_apps=fixtures.MIXED_RUNPYTHON_OPERATIONS,
315+
)
316+
317+
def test_mixed_runpython_with_schema_operations(self):
318+
"""Test that mixing RunPython with schema operations produces an error."""
319+
mixed_migration = self.linter.migration_loader.disk_migrations[
320+
("app_mixed_runpython_operations", "0002_mixed_operations")
321+
]
322+
self.linter.lint_migration(mixed_migration)
323+
324+
self.assertEqual(1, self.linter.nb_erroneous)
325+
self.assertTrue(self.linter.has_errors)
326+
self.assertEqual(0, self.linter.nb_valid)
327+
328+
def test_runpython_only(self):
329+
"""Test that RunPython-only migrations pass without errors."""
330+
runpython_only_migration = self.linter.migration_loader.disk_migrations[
331+
("app_mixed_runpython_operations", "0003_runpython_only")
332+
]
333+
self.linter.lint_migration(runpython_only_migration)
334+
335+
self.assertEqual(0, self.linter.nb_erroneous)
336+
self.assertFalse(self.linter.has_errors)
337+
# Might have warnings for other checks (reversible, etc), but no errors
338+
self.assertTrue(self.linter.nb_valid >= 1 or self.linter.nb_warnings >= 1)
339+
340+
def test_runpython_with_runsql(self):
341+
"""Test that RunPython with RunSQL is allowed (both are data migrations)."""
342+
mixed_data_migration = self.linter.migration_loader.disk_migrations[
343+
("app_mixed_runpython_operations", "0004_runpython_with_runsql")
344+
]
345+
self.linter.lint_migration(mixed_data_migration)
346+
347+
self.assertEqual(0, self.linter.nb_erroneous)
348+
self.assertFalse(self.linter.has_errors)
349+
350+
def test_exclude_mixed_operations_check(self):
351+
"""Test that RUNPYTHON_MIXED_OPERATIONS can be excluded."""
352+
self.linter = MigrationLinter(
353+
os.path.dirname(settings.BASE_DIR),
354+
include_apps=fixtures.MIXED_RUNPYTHON_OPERATIONS,
355+
exclude_migration_tests=("RUNPYTHON_MIXED_OPERATIONS",),
356+
)
357+
358+
mixed_migration = self.linter.migration_loader.disk_migrations[
359+
("app_mixed_runpython_operations", "0002_mixed_operations")
360+
]
361+
self.linter.lint_migration(mixed_migration)
362+
363+
# Should not error when excluded
364+
self.assertEqual(0, self.linter.nb_erroneous)
365+
self.assertFalse(self.linter.has_errors)
366+
# Should be valid since the check is excluded
367+
self.assertEqual(1, self.linter.nb_valid)
368+
369+
309370
class RunSQLMigrationTestCase(unittest.TestCase):
310371
def setUp(self):
311372
test_project_path = os.path.dirname(settings.BASE_DIR)

tests/test_project/app_mixed_runpython_operations/__init__.py

Whitespace-only changes.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Generated by Django 5.0 on 2025-10-22 11:00
2+
3+
from __future__ import annotations
4+
5+
from django.db import migrations, models
6+
7+
8+
class Migration(migrations.Migration):
9+
initial = True
10+
11+
dependencies = []
12+
13+
operations = [
14+
migrations.CreateModel(
15+
name="TestModel",
16+
fields=[
17+
(
18+
"id",
19+
models.AutoField(
20+
auto_created=True,
21+
primary_key=True,
22+
serialize=False,
23+
verbose_name="ID",
24+
),
25+
),
26+
("name", models.CharField(max_length=100)),
27+
],
28+
)
29+
]
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Generated by Django 5.0 on 2025-10-22 11:01
2+
3+
from __future__ import annotations
4+
5+
from django.db import migrations, models
6+
7+
8+
def set_slug_from_name(apps, schema_editor):
9+
TestModel = apps.get_model("app_mixed_runpython_operations", "TestModel")
10+
for obj in TestModel.objects.all():
11+
obj.slug = obj.name.lower().replace(" ", "-")
12+
obj.save()
13+
14+
15+
class Migration(migrations.Migration):
16+
dependencies = [
17+
("app_mixed_runpython_operations", "0001_initial"),
18+
]
19+
20+
operations = [
21+
# This is BAD - mixing schema operations with RunPython
22+
migrations.AddField(
23+
model_name="testmodel",
24+
name="slug",
25+
field=models.CharField(max_length=100, default="", null=True),
26+
),
27+
migrations.RunPython(
28+
set_slug_from_name, reverse_code=migrations.RunPython.noop
29+
),
30+
migrations.AlterField(
31+
model_name="testmodel",
32+
name="slug",
33+
field=models.CharField(max_length=100, null=True),
34+
),
35+
]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 5.0 on 2025-10-22 11:02
2+
3+
from __future__ import annotations
4+
5+
from django.db import migrations
6+
7+
8+
def backfill_data(apps, schema_editor):
9+
TestModel = apps.get_model("app_mixed_runpython_operations", "TestModel")
10+
for obj in TestModel.objects.filter(slug__isnull=True):
11+
obj.slug = "default"
12+
obj.save()
13+
14+
15+
class Migration(migrations.Migration):
16+
dependencies = [
17+
("app_mixed_runpython_operations", "0002_mixed_operations"),
18+
]
19+
20+
operations = [
21+
# This is GOOD - only RunPython operations
22+
migrations.RunPython(backfill_data, reverse_code=migrations.RunPython.noop),
23+
]
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Generated by Django 5.0 on 2025-10-22 11:03
2+
3+
from __future__ import annotations
4+
5+
from django.db import migrations
6+
7+
8+
def update_data(apps, schema_editor):
9+
TestModel = apps.get_model("app_mixed_runpython_operations", "TestModel")
10+
for obj in TestModel.objects.all():
11+
obj.slug = obj.slug.upper()
12+
obj.save()
13+
14+
15+
class Migration(migrations.Migration):
16+
dependencies = [
17+
("app_mixed_runpython_operations", "0003_runpython_only"),
18+
]
19+
20+
operations = [
21+
# This is GOOD - RunPython with RunSQL is fine (both are data migrations)
22+
migrations.RunPython(update_data, reverse_code=migrations.RunPython.noop),
23+
migrations.RunSQL(
24+
"UPDATE app_mixed_runpython_operations_testmodel SET name = UPPER(name);",
25+
reverse_sql=migrations.RunSQL.noop,
26+
),
27+
]

tests/test_project/app_mixed_runpython_operations/migrations/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)