Skip to content

Commit eeee06e

Browse files
committed
Optimize alias migration using AdvisoryRelatedAlias
- Use AdvisoryRelatedAlias to manage advisory alias relation Signed-off-by: Keshav Priyadarshi <[email protected]>
1 parent b3d5fc5 commit eeee06e

File tree

2 files changed

+129
-23
lines changed

2 files changed

+129
-23
lines changed

vulnerabilities/migrations/0090_migrate_advisory_aliases.py

Lines changed: 113 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,32 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
from timeit import default_timer as timer
11+
12+
import django.db.models.deletion
1013
from aboutcode.pipeline import LoopProgress
14+
from aboutcode.pipeline import humanize_time
1115
from django.db import migrations
1216
from django.db import models
13-
import django.db.models.deletion
1417

1518
"""
16-
Model and data migration for converting the Advisory aliases
17-
JSON field to a concrete M2M Advisory Alias relationship.
19+
Model and data migration to convert Advisory.aliases
20+
JSON field to a concrete M2M Advisory-Alias relationship.
21+
22+
To achieve this following steps are executed in chronological order.
23+
- Create AdvisoryRelatedAlias model for Advisory-Alias M2M relationship.
24+
- Make unique_content_id non-nullable and a required field.
25+
- Make Alias.vulnerability field nullable, as vulnerability may not
26+
exist for a corresponding alias during initial data collection.
27+
- Rename existing Advisory.aliases JSON field to old_aliases.
28+
- Create a new Advisory.aliases M2M relation through AdvisoryRelatedAlias model.
29+
- Run a data migration to populate new M2M Advisory.aliases relation using
30+
Advisory.old_aliases data.
31+
- Delete Advisory.old_aliases field.
32+
1833
"""
1934

35+
2036
def bulk_update(model, items, fields, logger):
2137
item_count = 0
2238
if items:
@@ -25,7 +41,17 @@ def bulk_update(model, items, fields, logger):
2541
item_count += len(items)
2642
except Exception as e:
2743
logger(f"Error updating Advisory: {e}")
28-
items.clear()
44+
return item_count
45+
46+
47+
def bulk_create(model, items, logger):
48+
item_count = 0
49+
if items:
50+
try:
51+
model.objects.bulk_create(objs=items)
52+
item_count += len(items)
53+
except Exception as e:
54+
logger(f"Error creating AdvisoryRelatedAlias: {e}")
2955
return item_count
3056

3157

@@ -36,56 +62,122 @@ class Migration(migrations.Migration):
3662
]
3763

3864
def populate_new_advisory_aliases_field(apps, schema_editor):
65+
"""Populate the new Advisory.aliases relation using old_aliases JSON data."""
66+
migration_start_time = timer()
3967
Advisory = apps.get_model("vulnerabilities", "Advisory")
4068
Alias = apps.get_model("vulnerabilities", "Alias")
69+
AdvisoryRelatedAlias = apps.get_model("vulnerabilities", "AdvisoryRelatedAlias")
4170
advisories = Advisory.objects.all()
71+
aliases = {i.alias: i for i in Alias.objects.all()}
4272

43-
chunk_size = 10000
73+
chunk_size = 5000
4474
advisories_count = advisories.count()
45-
print(f"\nPopulate new advisory aliases relationship.")
75+
batch_size = 5000
76+
relation_to_create = []
77+
advisory_alias_relation_count = 0
4678
progress = LoopProgress(
4779
total_iterations=advisories_count,
4880
logger=print,
4981
progress_step=1,
5082
)
83+
print(f"\nPopulate new advisory aliases relationship.")
5184
for advisory in progress.iter(advisories.iterator(chunk_size=chunk_size)):
52-
aliases = Alias.objects.filter(alias__in=advisory.old_aliases)
53-
advisory.aliases.set(aliases)
85+
advisory_alias_relations = [
86+
AdvisoryRelatedAlias(advisory=advisory, alias=aliases[alias])
87+
for alias in advisory.old_aliases
88+
if alias in aliases
89+
]
90+
relation_to_create.extend(advisory_alias_relations)
91+
92+
if len(relation_to_create) > batch_size:
93+
advisory_alias_relation_count += bulk_create(
94+
model=AdvisoryRelatedAlias,
95+
items=relation_to_create,
96+
logger=print,
97+
)
98+
relation_to_create.clear()
99+
100+
advisory_alias_relation_count += bulk_create(
101+
model=AdvisoryRelatedAlias,
102+
items=relation_to_create,
103+
logger=print,
104+
)
105+
migration_run_time = timer() - migration_start_time
106+
print(
107+
f"\nSuccessfully created {advisory_alias_relation_count} advisory-alias relationship."
108+
)
109+
print(f"\nData Migration: completed in {humanize_time(migration_run_time)}")
54110

55111
def reverse_populate_new_advisory_aliases_field(apps, schema_editor):
112+
"""Use the Advisory.aliases relation to populate old_aliases JSON field."""
113+
migration_start_time = timer()
56114
Advisory = apps.get_model("vulnerabilities", "Advisory")
57-
advisories = Advisory.objects.all()
115+
advisories = Advisory.objects.prefetch_related("aliases").all()
58116

59117
updated_advisory_count = 0
60-
batch_size = 10000
61-
chunk_size = 10000
62-
updated_advisory = []
118+
batch_size = 5000
119+
chunk_size = 5000
120+
advisory_to_update = []
63121
progress = LoopProgress(
64122
total_iterations=advisories.count(),
65123
logger=print,
66124
progress_step=1,
67125
)
126+
print(f"\nReverse alias migration to M2M relation.")
68127
for advisory in progress.iter(advisories.iterator(chunk_size=chunk_size)):
69128
aliases = advisory.aliases.all()
70129
advisory.old_aliases = [alias.alias for alias in aliases]
71-
updated_advisory.append(advisory)
130+
advisory_to_update.append(advisory)
72131

73-
if len(updated_advisory) > batch_size:
132+
if len(advisory_to_update) > batch_size:
74133
updated_advisory_count += bulk_update(
75134
model=Advisory,
76-
items=updated_advisory,
135+
items=advisory_to_update,
77136
fields=["old_aliases"],
78137
logger=print,
79138
)
139+
advisory_to_update.clear()
80140

81141
updated_advisory_count += bulk_update(
82142
model=Advisory,
83-
items=updated_advisory,
143+
items=advisory_to_update,
84144
fields=["old_aliases"],
85145
logger=print,
86146
)
87147

148+
migration_run_time = timer() - migration_start_time
149+
print(
150+
f"\nSuccessfully reversed the alias relationship for {updated_advisory_count} advisories."
151+
)
152+
print(f"\nData Migration: completed in {humanize_time(migration_run_time)}")
153+
88154
operations = [
155+
migrations.CreateModel(
156+
name="AdvisoryRelatedAlias",
157+
fields=[
158+
(
159+
"id",
160+
models.AutoField(
161+
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
162+
),
163+
),
164+
(
165+
"advisory",
166+
models.ForeignKey(
167+
on_delete=django.db.models.deletion.CASCADE, to="vulnerabilities.advisory"
168+
),
169+
),
170+
(
171+
"alias",
172+
models.ForeignKey(
173+
on_delete=django.db.models.deletion.CASCADE, to="vulnerabilities.alias"
174+
),
175+
),
176+
],
177+
options={
178+
"unique_together": {("advisory", "alias")},
179+
},
180+
),
89181
migrations.AlterField(
90182
model_name="advisory",
91183
name="unique_content_id",
@@ -96,8 +188,6 @@ def reverse_populate_new_advisory_aliases_field(apps, schema_editor):
96188
null=False,
97189
),
98190
),
99-
100-
# Make vulnerability relation optional
101191
migrations.AlterField(
102192
model_name="alias",
103193
name="vulnerability",
@@ -109,8 +199,6 @@ def reverse_populate_new_advisory_aliases_field(apps, schema_editor):
109199
to="vulnerabilities.vulnerability",
110200
),
111201
),
112-
113-
# Rename aliases field to old_aliases
114202
migrations.AlterModelOptions(
115203
name="advisory",
116204
options={"ordering": ["date_published", "unique_content_id"]},
@@ -127,14 +215,16 @@ def reverse_populate_new_advisory_aliases_field(apps, schema_editor):
127215
migrations.AddField(
128216
model_name="advisory",
129217
name="aliases",
130-
field=models.ManyToManyField(related_name="advisories", to="vulnerabilities.alias"),
218+
field=models.ManyToManyField(
219+
related_name="advisories",
220+
through="vulnerabilities.AdvisoryRelatedAlias",
221+
to="vulnerabilities.alias",
222+
),
131223
),
132-
# Populate the new M2M aliases relation
133224
migrations.RunPython(
134225
code=populate_new_advisory_aliases_field,
135226
reverse_code=reverse_populate_new_advisory_aliases_field,
136227
),
137-
# Delete JSON aliases field
138228
migrations.RemoveField(
139229
model_name="advisory",
140230
name="old_aliases",

vulnerabilities/models.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,7 @@ class Advisory(models.Model):
13251325
)
13261326
aliases = models.ManyToManyField(
13271327
Alias,
1328+
through="AdvisoryRelatedAlias",
13281329
related_name="advisories",
13291330
)
13301331
summary = models.TextField(
@@ -1386,6 +1387,21 @@ def to_advisory_data(self) -> "AdvisoryData":
13861387
)
13871388

13881389

1390+
class AdvisoryRelatedAlias(models.Model):
1391+
advisory = models.ForeignKey(
1392+
Advisory,
1393+
on_delete=models.CASCADE,
1394+
)
1395+
1396+
alias = models.ForeignKey(
1397+
Alias,
1398+
on_delete=models.CASCADE,
1399+
)
1400+
1401+
class Meta:
1402+
unique_together = ("advisory", "alias")
1403+
1404+
13891405
UserModel = get_user_model()
13901406

13911407

0 commit comments

Comments
 (0)