Skip to content

Commit f167562

Browse files
authored
Do not create vulnerabilities for empty aliases (#1334)
* Do not create vulnerabilities for empty aliases Signed-off-by: Tushar Goel <[email protected]> * Add test for process_inferences Signed-off-by: Tushar Goel <[email protected]> * Refactor get_or_create_vulnerability_and_aliases function Signed-off-by: Tushar Goel <[email protected]> * Copy changes of improver runner in import runner Signed-off-by: Tushar Goel <[email protected]> * Add migration to remove vulnerabilities with empty aliases Signed-off-by: Tushar Goel <[email protected]> * Add tests for migration Signed-off-by: Tushar Goel <[email protected]> * Address review comments Signed-off-by: Tushar Goel <[email protected]> --------- Signed-off-by: Tushar Goel <[email protected]>
1 parent 8f8190e commit f167562

File tree

6 files changed

+284
-109
lines changed

6 files changed

+284
-109
lines changed

vulnerabilities/import_runner.py

Lines changed: 78 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -245,69 +245,97 @@ def create_valid_vulnerability_reference(url, reference_id=None):
245245
return reference
246246

247247

248-
def get_or_create_vulnerability_and_aliases(alias_names, vulnerability_id=None, summary=None):
248+
def get_or_create_vulnerability_and_aliases(
249+
aliases: List[str], vulnerability_id=None, summary=None
250+
):
249251
"""
250252
Get or create vulnerabilitiy and aliases such that all existing and new
251253
aliases point to the same vulnerability
252254
"""
253-
existing_vulns = set()
254-
alias_names = set(alias_names)
255-
new_alias_names = set()
256-
for alias_name in alias_names:
257-
try:
258-
alias = Alias.objects.get(alias=alias_name)
259-
existing_vulns.add(alias.vulnerability)
260-
except Alias.DoesNotExist:
261-
new_alias_names.add(alias_name)
262-
263-
# If given set of aliases point to different vulnerabilities in the
264-
# database, request is malformed
265-
# TODO: It is possible that all those vulnerabilities are actually
266-
# the same at data level, figure out a way to merge them
267-
if len(existing_vulns) > 1:
268-
logger.warning(
269-
f"Given aliases {alias_names} already exist and do not point "
270-
f"to a single vulnerability. Cannot improve. Skipped."
271-
)
272-
return
255+
aliases = set(alias.strip() for alias in aliases if alias and alias.strip())
256+
new_alias_names, existing_vulns = get_vulns_for_aliases_and_get_new_aliases(aliases)
257+
258+
# All aliases must point to the same vulnerability
259+
vulnerability = None
260+
if existing_vulns:
261+
if len(existing_vulns) != 1:
262+
vcids = ", ".join(v.vulnerability_id for v in existing_vulns)
263+
logger.error(
264+
f"Cannot create vulnerability. "
265+
f"Aliases {aliases} already exist and point "
266+
f"to multiple vulnerabilities {vcids}."
267+
)
268+
return
269+
else:
270+
vulnerability = existing_vulns.pop()
273271

274-
existing_alias_vuln = existing_vulns.pop() if existing_vulns else None
275-
276-
if (
277-
existing_alias_vuln
278-
and vulnerability_id
279-
and existing_alias_vuln.vulnerability_id != vulnerability_id
280-
):
281-
logger.warning(
282-
f"Given aliases {alias_names!r} already exist and point to existing"
283-
f"vulnerability {existing_alias_vuln}. Unable to create Vulnerability "
284-
f"with vulnerability_id {vulnerability_id}. Skipped"
285-
)
286-
return
272+
if vulnerability_id and vulnerability.vulnerability_id != vulnerability_id:
273+
logger.error(
274+
f"Cannot create vulnerability. "
275+
f"Aliases {aliases} already exist and point to a different "
276+
f"vulnerability {vulnerability} than the requested "
277+
f"vulnerability {vulnerability_id}."
278+
)
279+
return
287280

288-
if existing_alias_vuln:
289-
vulnerability = existing_alias_vuln
290-
elif vulnerability_id:
281+
if vulnerability_id and not vulnerability:
291282
try:
292283
vulnerability = Vulnerability.objects.get(vulnerability_id=vulnerability_id)
293284
except Vulnerability.DoesNotExist:
294-
logger.warning(
295-
f"Given vulnerability_id: {vulnerability_id} does not exist in the database"
296-
)
285+
logger.error(f"Cannot get requested vulnerability {vulnerability_id}.")
297286
return
287+
if vulnerability:
288+
# TODO: We should keep multiple summaries, one for each advisory
289+
# if summary and summary != vulnerability.summary:
290+
# logger.warning(
291+
# f"Inconsistent summary for {vulnerability.vulnerability_id}. "
292+
# f"Existing: {vulnerability.summary!r}, provided: {summary!r}"
293+
# )
294+
associate_vulnerability_with_aliases(vulnerability=vulnerability, aliases=new_alias_names)
298295
else:
299-
vulnerability = Vulnerability(summary=summary)
300-
vulnerability.save()
296+
try:
297+
vulnerability = create_vulnerability_and_add_aliases(
298+
aliases=new_alias_names, summary=summary
299+
)
300+
except Exception as e:
301+
logger.error(
302+
f"Cannot create vulnerability with summary {summary!r} and {new_alias_names!r} {e!r}.\n{traceback_format_exc()}."
303+
)
304+
return
305+
306+
return vulnerability
307+
308+
309+
def get_vulns_for_aliases_and_get_new_aliases(aliases):
310+
"""
311+
Return ``new_aliases`` that are not in the database and
312+
``existing_vulns`` that point to the given ``aliases``.
313+
"""
314+
new_aliases = set(aliases)
315+
existing_vulns = set()
316+
for alias in Alias.objects.filter(alias__in=aliases):
317+
existing_vulns.add(alias.vulnerability)
318+
new_aliases.remove(alias.alias)
319+
return new_aliases, existing_vulns
301320

302-
if summary and summary != vulnerability.summary:
303-
logger.warning(
304-
f"Inconsistent summary for {vulnerability!r}. "
305-
f"Existing: {vulnerability.summary}, provided: {summary}"
306-
)
307321

308-
for alias_name in new_alias_names:
322+
@transaction.atomic
323+
def create_vulnerability_and_add_aliases(aliases, summary):
324+
"""
325+
Return a new ``vulnerability`` created with ``summary``
326+
and associate the ``vulnerability`` with ``aliases``.
327+
Raise exception if no alias is associated with the ``vulnerability``.
328+
"""
329+
vulnerability = Vulnerability(summary=summary)
330+
vulnerability.save()
331+
associate_vulnerability_with_aliases(aliases, vulnerability)
332+
if not vulnerability.aliases.count():
333+
raise Exception(f"Vulnerability {vulnerability.vcid} must have one or more aliases")
334+
return vulnerability
335+
336+
337+
def associate_vulnerability_with_aliases(aliases, vulnerability):
338+
for alias_name in aliases:
309339
alias = Alias(alias=alias_name, vulnerability=vulnerability)
310340
alias.save()
311341
logger.info(f"New alias for {vulnerability!r}: {alias_name}")
312-
313-
return vulnerability

vulnerabilities/improve_runner.py

Lines changed: 81 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import logging
1111
from datetime import datetime
1212
from datetime import timezone
13+
from traceback import format_exc as traceback_format_exc
1314
from typing import List
1415

1516
from django.core.exceptions import ValidationError
@@ -79,7 +80,7 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver
7980
for inference in inferences:
8081
vulnerability = get_or_create_vulnerability_and_aliases(
8182
vulnerability_id=inference.vulnerability_id,
82-
alias_names=inference.aliases,
83+
aliases=inference.aliases,
8384
summary=inference.summary,
8485
)
8586

@@ -173,69 +174,97 @@ def create_valid_vulnerability_reference(url, reference_id=None):
173174
return reference
174175

175176

176-
def get_or_create_vulnerability_and_aliases(alias_names, vulnerability_id=None, summary=None):
177+
def get_or_create_vulnerability_and_aliases(
178+
aliases: List[str], vulnerability_id=None, summary=None
179+
):
177180
"""
178181
Get or create vulnerabilitiy and aliases such that all existing and new
179182
aliases point to the same vulnerability
180183
"""
181-
existing_vulns = set()
182-
alias_names = set(alias_names)
183-
new_alias_names = set()
184-
for alias_name in alias_names:
185-
try:
186-
alias = Alias.objects.get(alias=alias_name)
187-
existing_vulns.add(alias.vulnerability)
188-
except Alias.DoesNotExist:
189-
new_alias_names.add(alias_name)
190-
191-
# If given set of aliases point to different vulnerabilities in the
192-
# database, request is malformed
193-
# TODO: It is possible that all those vulnerabilities are actually
194-
# the same at data level, figure out a way to merge them
195-
if len(existing_vulns) > 1:
196-
logger.warning(
197-
f"Given aliases {alias_names} already exist and do not point "
198-
f"to a single vulnerability. Cannot improve. Skipped."
199-
)
200-
return
201-
202-
existing_alias_vuln = existing_vulns.pop() if existing_vulns else None
203-
204-
if (
205-
existing_alias_vuln
206-
and vulnerability_id
207-
and existing_alias_vuln.vulnerability_id != vulnerability_id
208-
):
209-
logger.warning(
210-
f"Given aliases {alias_names!r} already exist and point to existing"
211-
f"vulnerability {existing_alias_vuln}. Unable to create Vulnerability "
212-
f"with vulnerability_id {vulnerability_id}. Skipped"
213-
)
214-
return
184+
aliases = set(alias.strip() for alias in aliases if alias and alias.strip())
185+
new_alias_names, existing_vulns = get_vulns_for_aliases_and_get_new_aliases(aliases)
186+
187+
# All aliases must point to the same vulnerability
188+
vulnerability = None
189+
if existing_vulns:
190+
if len(existing_vulns) != 1:
191+
vcids = ", ".join(v.vulnerability_id for v in existing_vulns)
192+
logger.error(
193+
f"Cannot create vulnerability. "
194+
f"Aliases {aliases} already exist and point "
195+
f"to multiple vulnerabilities {vcids}."
196+
)
197+
return
198+
else:
199+
vulnerability = existing_vulns.pop()
200+
201+
if vulnerability_id and vulnerability.vulnerability_id != vulnerability_id:
202+
logger.error(
203+
f"Cannot create vulnerability. "
204+
f"Aliases {aliases} already exist and point to a different "
205+
f"vulnerability {vulnerability} than the requested "
206+
f"vulnerability {vulnerability_id}."
207+
)
208+
return
215209

216-
if existing_alias_vuln:
217-
vulnerability = existing_alias_vuln
218-
elif vulnerability_id:
210+
if vulnerability_id and not vulnerability:
219211
try:
220212
vulnerability = Vulnerability.objects.get(vulnerability_id=vulnerability_id)
221213
except Vulnerability.DoesNotExist:
222-
logger.warning(
223-
f"Given vulnerability_id: {vulnerability_id} does not exist in the database"
224-
)
214+
logger.error(f"Cannot get requested vulnerability {vulnerability_id}.")
225215
return
216+
if vulnerability:
217+
# TODO: We should keep multiple summaries, one for each advisory
218+
# if summary and summary != vulnerability.summary:
219+
# logger.warning(
220+
# f"Inconsistent summary for {vulnerability.vulnerability_id}. "
221+
# f"Existing: {vulnerability.summary!r}, provided: {summary!r}"
222+
# )
223+
associate_vulnerability_with_aliases(vulnerability=vulnerability, aliases=new_alias_names)
226224
else:
227-
vulnerability = Vulnerability(summary=summary)
228-
vulnerability.save()
225+
try:
226+
vulnerability = create_vulnerability_and_add_aliases(
227+
aliases=new_alias_names, summary=summary
228+
)
229+
except Exception as e:
230+
logger.error(
231+
f"Cannot create vulnerability with summary {summary!r} and {new_alias_names!r} {e!r}.\n{traceback_format_exc()}."
232+
)
233+
return
229234

230-
if summary and summary != vulnerability.summary:
231-
logger.warning(
232-
f"Inconsistent summary for {vulnerability!r}. "
233-
f"Existing: {vulnerability.summary}, provided: {summary}"
234-
)
235+
return vulnerability
236+
237+
238+
def get_vulns_for_aliases_and_get_new_aliases(aliases):
239+
"""
240+
Return ``new_aliases`` that are not in the database and
241+
``existing_vulns`` that point to the given ``aliases``.
242+
"""
243+
new_aliases = set(aliases)
244+
existing_vulns = set()
245+
for alias in Alias.objects.filter(alias__in=aliases):
246+
existing_vulns.add(alias.vulnerability)
247+
new_aliases.remove(alias.alias)
248+
return new_aliases, existing_vulns
235249

236-
for alias_name in new_alias_names:
250+
251+
@transaction.atomic
252+
def create_vulnerability_and_add_aliases(aliases, summary):
253+
"""
254+
Return a new ``vulnerability`` created with ``summary``
255+
and associate the ``vulnerability`` with ``aliases``.
256+
Raise exception if no alias is associated with the ``vulnerability``.
257+
"""
258+
vulnerability = Vulnerability(summary=summary)
259+
vulnerability.save()
260+
associate_vulnerability_with_aliases(aliases, vulnerability)
261+
if not vulnerability.aliases.count():
262+
raise Exception(f"Vulnerability {vulnerability.vcid} must have one or more aliases")
263+
return vulnerability
264+
265+
266+
def associate_vulnerability_with_aliases(aliases, vulnerability):
267+
for alias_name in aliases:
237268
alias = Alias(alias=alias_name, vulnerability=vulnerability)
238269
alias.save()
239270
logger.info(f"New alias for {vulnerability!r}: {alias_name}")
240-
241-
return vulnerability
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from django.db import migrations
11+
12+
13+
class Migration(migrations.Migration):
14+
15+
dependencies = [
16+
("vulnerabilities", "0040_remove_advisory_date_improved_advisory_date_imported"),
17+
]
18+
19+
def remove_vulns_with_empty_aliases(apps, _):
20+
Vulnerability = apps.get_model("vulnerabilities", "Vulnerability")
21+
Package = apps.get_model("vulnerabilities", "Package")
22+
packages = []
23+
vulnerabilities = []
24+
for vuln in Vulnerability.objects.filter(aliases=None).prefetch_related(
25+
"packages"
26+
):
27+
# Delete packages associated with that vulnerability
28+
for package in vuln.packages.all():
29+
packages.append(package.id)
30+
vulnerabilities.append(vuln.id)
31+
32+
Vulnerability.objects.filter(id__in=vulnerabilities).delete()
33+
Package.objects.filter(id__in=packages).delete()
34+
35+
operations = [
36+
migrations.RunPython(remove_vulns_with_empty_aliases, reverse_code=migrations.RunPython.noop),
37+
]

vulnerabilities/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import json
1212
import logging
1313
from contextlib import suppress
14+
from typing import Any
1415

1516
from cwe2.database import Database
1617
from django.contrib.auth import get_user_model
@@ -759,6 +760,8 @@ class Alias(models.Model):
759760
alias = models.CharField(
760761
max_length=50,
761762
unique=True,
763+
blank=False,
764+
null=False,
762765
help_text="An alias is a unique vulnerability identifier in some database, "
763766
"such as CVE-2020-2233",
764767
)

vulnerabilities/tests/test_data_migrations.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,3 +568,29 @@ def test_removal_of_corrupted_advisory(self):
568568
# using get_model to avoid circular import
569569
Advisory = self.apps.get_model("vulnerabilities", "Advisory")
570570
Advisory.objects.all().count() == 0
571+
572+
573+
class RemoveVulnerabilitiesWithEmptyAliases(TestMigrations):
574+
app_name = "vulnerabilities"
575+
migrate_from = "0040_remove_advisory_date_improved_advisory_date_imported"
576+
migrate_to = "0041_remove_vulns_with_empty_aliases"
577+
578+
def setUpBeforeMigration(self, apps):
579+
# using get_model to avoid circular import
580+
Vulnerability = apps.get_model("vulnerabilities", "Vulnerability")
581+
Alias = apps.get_model("vulnerabilities", "Alias")
582+
583+
vuln = Vulnerability.objects.create(
584+
summary="Corrupted vuln",
585+
)
586+
vuln.save()
587+
vuln = Vulnerability.objects.create(
588+
summary="vuln",
589+
)
590+
vuln.save()
591+
Alias.objects.create(alias="CVE-123", vulnerability=vuln)
592+
593+
def test_removal_of_corrupted_vulns(self):
594+
# using get_model to avoid circular import
595+
Vulnerability = self.apps.get_model("vulnerabilities", "Vulnerability")
596+
Vulnerability.objects.all().count() == 1

0 commit comments

Comments
 (0)