77# See https://aboutcode.org for more information about nexB OSS projects.
88#
99
10+ from timeit import default_timer as timer
11+
12+ import django .db .models .deletion
1013from aboutcode .pipeline import LoopProgress
14+ from aboutcode .pipeline import humanize_time
1115from django .db import migrations
1216from django .db import models
13- import django .db .models .deletion
1417
1518"""
16- Model and data migration for converting the Advisory aliases
17- JSON field to a concrete M2M Advisory Alias relationship.
19+ Model and data migration to convert Advisory.aliases
20+ JSON field to a concrete M2M Advisory-Alias relationship.
21+
22+ To achieve this following steps are executed in chronological order.
23+ - Create AdvisoryRelatedAlias model for Advisory-Alias M2M relationship.
24+ - Make unique_content_id non-nullable and a required field.
25+ - Make Alias.vulnerability field nullable, as vulnerability may not
26+ exist for a corresponding alias during initial data collection.
27+ - Rename existing Advisory.aliases JSON field to old_aliases.
28+ - Create a new Advisory.aliases M2M relation through AdvisoryRelatedAlias model.
29+ - Run a data migration to populate new M2M Advisory.aliases relation using
30+ Advisory.old_aliases data.
31+ - Delete Advisory.old_aliases field.
32+
1833"""
1934
35+
2036def bulk_update (model , items , fields , logger ):
2137 item_count = 0
2238 if items :
@@ -25,7 +41,17 @@ def bulk_update(model, items, fields, logger):
2541 item_count += len (items )
2642 except Exception as e :
2743 logger (f"Error updating Advisory: { e } " )
28- items .clear ()
44+ return item_count
45+
46+
47+ def bulk_create (model , items , logger ):
48+ item_count = 0
49+ if items :
50+ try :
51+ model .objects .bulk_create (objs = items )
52+ item_count += len (items )
53+ except Exception as e :
54+ logger (f"Error creating AdvisoryRelatedAlias: { e } " )
2955 return item_count
3056
3157
@@ -36,56 +62,122 @@ class Migration(migrations.Migration):
3662 ]
3763
3864 def populate_new_advisory_aliases_field (apps , schema_editor ):
65+ """Populate the new Advisory.aliases relation using old_aliases JSON data."""
66+ migration_start_time = timer ()
3967 Advisory = apps .get_model ("vulnerabilities" , "Advisory" )
4068 Alias = apps .get_model ("vulnerabilities" , "Alias" )
69+ AdvisoryRelatedAlias = apps .get_model ("vulnerabilities" , "AdvisoryRelatedAlias" )
4170 advisories = Advisory .objects .all ()
71+ aliases = {i .alias : i for i in Alias .objects .all ()}
4272
43- chunk_size = 10000
73+ chunk_size = 5000
4474 advisories_count = advisories .count ()
45- print (f"\n Populate new advisory aliases relationship." )
75+ batch_size = 5000
76+ relation_to_create = []
77+ advisory_alias_relation_count = 0
4678 progress = LoopProgress (
4779 total_iterations = advisories_count ,
4880 logger = print ,
4981 progress_step = 1 ,
5082 )
83+ print (f"\n Populate new advisory aliases relationship." )
5184 for advisory in progress .iter (advisories .iterator (chunk_size = chunk_size )):
52- aliases = Alias .objects .filter (alias__in = advisory .old_aliases )
53- advisory .aliases .set (aliases )
85+ advisory_alias_relations = [
86+ AdvisoryRelatedAlias (advisory = advisory , alias = aliases [alias ])
87+ for alias in advisory .old_aliases
88+ if alias in aliases
89+ ]
90+ relation_to_create .extend (advisory_alias_relations )
91+
92+ if len (relation_to_create ) > batch_size :
93+ advisory_alias_relation_count += bulk_create (
94+ model = AdvisoryRelatedAlias ,
95+ items = relation_to_create ,
96+ logger = print ,
97+ )
98+ relation_to_create .clear ()
99+
100+ advisory_alias_relation_count += bulk_create (
101+ model = AdvisoryRelatedAlias ,
102+ items = relation_to_create ,
103+ logger = print ,
104+ )
105+ migration_run_time = timer () - migration_start_time
106+ print (
107+ f"\n Successfully created { advisory_alias_relation_count } advisory-alias relationship."
108+ )
109+ print (f"\n Data Migration: completed in { humanize_time (migration_run_time )} " )
54110
55111 def reverse_populate_new_advisory_aliases_field (apps , schema_editor ):
112+ """Use the Advisory.aliases relation to populate old_aliases JSON field."""
113+ migration_start_time = timer ()
56114 Advisory = apps .get_model ("vulnerabilities" , "Advisory" )
57- advisories = Advisory .objects .all ()
115+ advisories = Advisory .objects .prefetch_related ( "aliases" ). all ()
58116
59117 updated_advisory_count = 0
60- batch_size = 10000
61- chunk_size = 10000
62- updated_advisory = []
118+ batch_size = 5000
119+ chunk_size = 5000
120+ advisory_to_update = []
63121 progress = LoopProgress (
64122 total_iterations = advisories .count (),
65123 logger = print ,
66124 progress_step = 1 ,
67125 )
126+ print (f"\n Reverse alias migration to M2M relation." )
68127 for advisory in progress .iter (advisories .iterator (chunk_size = chunk_size )):
69128 aliases = advisory .aliases .all ()
70129 advisory .old_aliases = [alias .alias for alias in aliases ]
71- updated_advisory .append (advisory )
130+ advisory_to_update .append (advisory )
72131
73- if len (updated_advisory ) > batch_size :
132+ if len (advisory_to_update ) > batch_size :
74133 updated_advisory_count += bulk_update (
75134 model = Advisory ,
76- items = updated_advisory ,
135+ items = advisory_to_update ,
77136 fields = ["old_aliases" ],
78137 logger = print ,
79138 )
139+ advisory_to_update .clear ()
80140
81141 updated_advisory_count += bulk_update (
82142 model = Advisory ,
83- items = updated_advisory ,
143+ items = advisory_to_update ,
84144 fields = ["old_aliases" ],
85145 logger = print ,
86146 )
87147
148+ migration_run_time = timer () - migration_start_time
149+ print (
150+ f"\n Successfully reversed the alias relationship for { updated_advisory_count } advisories."
151+ )
152+ print (f"\n Data Migration: completed in { humanize_time (migration_run_time )} " )
153+
88154 operations = [
155+ migrations .CreateModel (
156+ name = "AdvisoryRelatedAlias" ,
157+ fields = [
158+ (
159+ "id" ,
160+ models .AutoField (
161+ auto_created = True , primary_key = True , serialize = False , verbose_name = "ID"
162+ ),
163+ ),
164+ (
165+ "advisory" ,
166+ models .ForeignKey (
167+ on_delete = django .db .models .deletion .CASCADE , to = "vulnerabilities.advisory"
168+ ),
169+ ),
170+ (
171+ "alias" ,
172+ models .ForeignKey (
173+ on_delete = django .db .models .deletion .CASCADE , to = "vulnerabilities.alias"
174+ ),
175+ ),
176+ ],
177+ options = {
178+ "unique_together" : {("advisory" , "alias" )},
179+ },
180+ ),
89181 migrations .AlterField (
90182 model_name = "advisory" ,
91183 name = "unique_content_id" ,
@@ -96,8 +188,6 @@ def reverse_populate_new_advisory_aliases_field(apps, schema_editor):
96188 null = False ,
97189 ),
98190 ),
99-
100- # Make vulnerability relation optional
101191 migrations .AlterField (
102192 model_name = "alias" ,
103193 name = "vulnerability" ,
@@ -109,8 +199,6 @@ def reverse_populate_new_advisory_aliases_field(apps, schema_editor):
109199 to = "vulnerabilities.vulnerability" ,
110200 ),
111201 ),
112-
113- # Rename aliases field to old_aliases
114202 migrations .AlterModelOptions (
115203 name = "advisory" ,
116204 options = {"ordering" : ["date_published" , "unique_content_id" ]},
@@ -127,14 +215,16 @@ def reverse_populate_new_advisory_aliases_field(apps, schema_editor):
127215 migrations .AddField (
128216 model_name = "advisory" ,
129217 name = "aliases" ,
130- field = models .ManyToManyField (related_name = "advisories" , to = "vulnerabilities.alias" ),
218+ field = models .ManyToManyField (
219+ related_name = "advisories" ,
220+ through = "vulnerabilities.AdvisoryRelatedAlias" ,
221+ to = "vulnerabilities.alias" ,
222+ ),
131223 ),
132- # Populate the new M2M aliases relation
133224 migrations .RunPython (
134225 code = populate_new_advisory_aliases_field ,
135226 reverse_code = reverse_populate_new_advisory_aliases_field ,
136227 ),
137- # Delete JSON aliases field
138228 migrations .RemoveField (
139229 model_name = "advisory" ,
140230 name = "old_aliases" ,
0 commit comments