Skip to content

Commit 6d379d0

Browse files
pombredanneTG1999
andcommitted
Make VulnerabilityReference.url unique #818
Also validate full_clean in the improve_runner to ensure we do not have empty, invalid or blank URLs. Refactor code to add new Manager to VulnerabilityReference and Package Add convenience method accordingly to create Pckage from purls Reference: #818 Co-authored-by: Tushar Goel <[email protected]> Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent caa7268 commit 6d379d0

File tree

8 files changed

+234
-53
lines changed

8 files changed

+234
-53
lines changed

vulnerabilities/improve_runner.py

Lines changed: 64 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,19 @@
1111
from datetime import datetime
1212
from datetime import timezone
1313
from typing import List
14-
from typing import Tuple
1514

15+
from django.core.exceptions import ValidationError
1616
from django.db import transaction
1717

18-
from vulnerabilities import models
19-
from vulnerabilities.importer import PackageURL
2018
from vulnerabilities.improver import Inference
2119
from vulnerabilities.models import Advisory
20+
from vulnerabilities.models import Alias
21+
from vulnerabilities.models import Package
22+
from vulnerabilities.models import PackageRelatedVulnerability
23+
from vulnerabilities.models import Vulnerability
24+
from vulnerabilities.models import VulnerabilityReference
25+
from vulnerabilities.models import VulnerabilityRelatedReference
26+
from vulnerabilities.models import VulnerabilitySeverity
2227

2328
logger = logging.getLogger(__name__)
2429

@@ -63,46 +68,59 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver
6368
logger.info(f"Improving advisory id: {advisory.id}")
6469

6570
for inference in inferences:
66-
vuln = get_or_create_vulnerability_and_aliases(
67-
inference.vulnerability_id, inference.aliases, inference.summary
71+
vulnerability = get_or_create_vulnerability_and_aliases(
72+
vulnerability_id=inference.vulnerability_id,
73+
alias_names=inference.aliases,
74+
summary=inference.summary,
6875
)
69-
if not vuln:
76+
77+
if not vulnerability:
7078
logger.warn(f"Unable to get vulnerability for inference: {inference!r}")
7179
continue
7280

7381
for ref in inference.references:
74-
reference, _ = models.VulnerabilityReference.objects.get_or_create(
75-
reference_id=ref.reference_id, url=ref.url
82+
83+
reference = VulnerabilityReference.objects.get_or_none(
84+
reference_id=ref.reference_id,
85+
url=ref.url,
7686
)
7787

78-
models.VulnerabilityRelatedReference.objects.update_or_create(
79-
reference=reference, vulnerability=vuln
88+
if not reference:
89+
reference = create_valid_vulnerability_reference(
90+
reference_id=ref.reference_id,
91+
url=ref.url,
92+
)
93+
if not reference:
94+
continue
95+
96+
VulnerabilityRelatedReference.objects.update_or_create(
97+
reference=reference,
98+
vulnerability=vulnerability,
8099
)
81100

82101
for severity in ref.severities:
83-
_vs, updated = models.VulnerabilitySeverity.objects.update_or_create(
102+
_vs, updated = VulnerabilitySeverity.objects.update_or_create(
84103
scoring_system=severity.system.identifier,
85104
reference=reference,
86105
defaults={"value": str(severity.value)},
87106
)
88107
if updated:
89108
logger.info(f"Severity updated for reference {ref!r} to {severity.value!r}")
90109

91-
if inference.affected_purls:
92-
for pkg in inference.affected_purls:
93-
vulnerable_package, _ = _get_or_create_package(pkg)
94-
models.PackageRelatedVulnerability(
95-
vulnerability=vuln,
96-
package=vulnerable_package,
97-
created_by=improver_name,
98-
confidence=inference.confidence,
99-
fix=False,
100-
).update_or_create()
110+
for affected_purl in inference.affected_purls or []:
111+
vulnerable_package = Package.objects.get_or_create_from_purl(purl=affected_purl)
112+
PackageRelatedVulnerability(
113+
vulnerability=vulnerability,
114+
package=vulnerable_package,
115+
created_by=improver_name,
116+
confidence=inference.confidence,
117+
fix=False,
118+
).update_or_create()
101119

102120
if inference.fixed_purl:
103-
fixed_package, _ = _get_or_create_package(inference.fixed_purl)
104-
models.PackageRelatedVulnerability(
105-
vulnerability=vuln,
121+
fixed_package = Package.objects.get_or_create_from_purl(purl=inference.fixed_purl)
122+
PackageRelatedVulnerability(
123+
vulnerability=vulnerability,
106124
package=fixed_package,
107125
created_by=improver_name,
108126
confidence=inference.confidence,
@@ -113,26 +131,25 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver
113131
advisory.save()
114132

115133

116-
def _get_or_create_package(p: PackageURL) -> Tuple[models.Package, bool]:
117-
query_kwargs = {}
118-
# TODO: this should be revisited as this should best be a model or manager method... and possibly streamlined
119-
query_kwargs = dict(
120-
type=p.type or "",
121-
namespace=p.namespace or "",
122-
name=p.name or "",
123-
version=p.version or "",
124-
qualifiers=p.qualifiers or {},
125-
subpath=p.subpath or "",
134+
def create_valid_vulnerability_reference(url, reference_id=None):
135+
"""
136+
Create and return a new validated VulnerabilityReference from a
137+
``url`` and ``reference_id``.
138+
Return None and log a warning if this is not a valid reference.
139+
"""
140+
reference = VulnerabilityReference(
141+
reference_id=reference_id,
142+
url=url,
126143
)
127144

128-
return models.Package.objects.get_or_create(**query_kwargs)
129-
145+
try:
146+
reference.full_clean()
147+
except ValidationError as e:
148+
logger.warning(f"Invalid vulnerability reference: {reference!r}: {e}")
149+
return
130150

131-
def _package_url_to_package(purl: PackageURL) -> models.Package:
132-
# FIXME: this is is likely creating a package from a purl?
133-
p = models.Package()
134-
p.set_package_url(purl)
135-
return p
151+
reference.save()
152+
return reference
136153

137154

138155
def get_or_create_vulnerability_and_aliases(vulnerability_id, alias_names, summary):
@@ -145,9 +162,9 @@ def get_or_create_vulnerability_and_aliases(vulnerability_id, alias_names, summa
145162
new_alias_names = set()
146163
for alias_name in alias_names:
147164
try:
148-
alias = models.Alias.objects.get(alias=alias_name)
165+
alias = Alias.objects.get(alias=alias_name)
149166
existing_vulns.add(alias.vulnerability)
150-
except models.Alias.DoesNotExist:
167+
except Alias.DoesNotExist:
151168
new_alias_names.add(alias_name)
152169

153170
# If given set of aliases point to different vulnerabilities in the
@@ -179,14 +196,14 @@ def get_or_create_vulnerability_and_aliases(vulnerability_id, alias_names, summa
179196
vulnerability = existing_alias_vuln
180197
elif vulnerability_id:
181198
try:
182-
vulnerability = models.Vulnerability.objects.get(vulnerability_id=vulnerability_id)
183-
except models.Vulnerability.DoesNotExist:
199+
vulnerability = Vulnerability.objects.get(vulnerability_id=vulnerability_id)
200+
except Vulnerability.DoesNotExist:
184201
logger.warn(
185202
f"Given vulnerability_id: {vulnerability_id} does not exist in the database"
186203
)
187204
return
188205
else:
189-
vulnerability = models.Vulnerability(summary=summary)
206+
vulnerability = Vulnerability(summary=summary)
190207
vulnerability.save()
191208

192209
if summary and summary != vulnerability.summary:
@@ -196,7 +213,7 @@ def get_or_create_vulnerability_and_aliases(vulnerability_id, alias_names, summa
196213
)
197214

198215
for alias_name in new_alias_names:
199-
alias = models.Alias(alias=alias_name, vulnerability=vulnerability)
216+
alias = Alias(alias=alias_name, vulnerability=vulnerability)
200217
alias.save()
201218
logger.info(f"New alias for {vulnerability!r}: {alias_name}")
202219

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from django.db import migrations
2+
from django.db.models import Count
3+
from django.db.models import Max
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('vulnerabilities', '0024_alter_all_models_to_add_ordering'),
10+
]
11+
12+
def remove_duplicate_reference_urls(apps, _):
13+
"""
14+
Find all duplicate references and remove all of them except for one.
15+
Any duplication will be reprocessed by reimports if needed to correct
16+
trhe relationships.
17+
"""
18+
19+
VulnerabilityReference = apps.get_model("vulnerabilities", "VulnerabilityReference")
20+
21+
duplicates = (
22+
VulnerabilityReference.objects.values("url")
23+
.order_by("url")
24+
.annotate(max_id=Max("id"), count_id=Count("id"))
25+
.filter(count_id__gt=1)
26+
)
27+
28+
for duplicate in duplicates:
29+
# Get all rows with the same url,
30+
# exclude the latest one
31+
# and delete rest of them
32+
(
33+
VulnerabilityReference.objects
34+
.filter(url=duplicate["url"])
35+
.exclude(id=duplicate["max_id"])
36+
.delete()
37+
)
38+
39+
operations = [
40+
migrations.RunPython(remove_duplicate_reference_urls, migrations.RunPython.noop),
41+
]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 4.0.7 on 2022-09-09 12:34
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('vulnerabilities', '0025_remove_duplicate_reference_urls'),
10+
]
11+
12+
operations = [
13+
migrations.AlterUniqueTogether(
14+
name='vulnerabilityreference',
15+
unique_together=set(),
16+
),
17+
migrations.AlterField(
18+
model_name='vulnerabilityreference',
19+
name='url',
20+
field=models.URLField(help_text='URL to the vulnerability reference', max_length=1024, unique=True),
21+
),
22+
]

vulnerabilities/models.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,21 @@
1010
import hashlib
1111
import json
1212
import logging
13+
from contextlib import suppress
1314

1415
from django.conf import settings
16+
from django.core.exceptions import ValidationError
1517
from django.core.validators import MaxValueValidator
1618
from django.core.validators import MinValueValidator
1719
from django.db import models
20+
from django.db.models.functions import Length
21+
from django.db.models.functions import Trim
1822
from django.dispatch import receiver
1923
from django.urls import reverse
24+
from packageurl import PackageURL
2025
from packageurl.contrib.django.models import PackageURLMixin
26+
from packageurl.contrib.django.models import PackageURLQuerySet
27+
from packageurl.contrib.django.models import without_empty_values
2128
from rest_framework.authtoken.models import Token
2229

2330
from vulnerabilities.importer import AdvisoryData
@@ -29,6 +36,18 @@
2936

3037
logger = logging.getLogger(__name__)
3138

39+
models.CharField.register_lookup(Length)
40+
models.CharField.register_lookup(Trim)
41+
42+
43+
class BaseQuerySet(models.QuerySet):
44+
def get_or_none(self, *args, **kwargs):
45+
"""
46+
Returns a single object matching the given keyword arguments, `None` otherwise.
47+
"""
48+
with suppress(self.model.DoesNotExist, ValidationError):
49+
return self.get(*args, **kwargs)
50+
3251

3352
class Vulnerability(models.Model):
3453
"""
@@ -111,15 +130,21 @@ class VulnerabilityReference(models.Model):
111130
through="VulnerabilityRelatedReference",
112131
)
113132

114-
url = models.URLField(max_length=1024, help_text="URL to the vulnerability reference")
133+
url = models.URLField(
134+
max_length=1024,
135+
help_text="URL to the vulnerability reference",
136+
unique=True,
137+
)
138+
115139
reference_id = models.CharField(
116140
max_length=200,
117141
help_text="An optional reference ID, such as DSA-4465-1 when available",
118142
blank=True,
119143
)
120144

145+
objects = BaseQuerySet.as_manager()
146+
121147
class Meta:
122-
unique_together = ["url", "reference_id"]
123148
ordering = ["reference_id", "url"]
124149

125150
def __str__(self):
@@ -147,6 +172,33 @@ class Meta:
147172
ordering = ["vulnerability", "reference"]
148173

149174

175+
class PackageQuerySet(BaseQuerySet, PackageURLQuerySet):
176+
def get_or_create_from_purl(self, purl: PackageURL):
177+
"""
178+
Return an existing or new Package (created if neeed) given a
179+
``purl`` PackageURL.
180+
"""
181+
purl_fields = without_empty_values(purl.to_dict(encode=True))
182+
package, _ = Package.objects.get_or_create(**purl_fields)
183+
return package
184+
185+
def for_package_url_object(self, purl):
186+
"""
187+
Filter the QuerySet with the provided Package URL object or string. The
188+
``purl`` string is validated and transformed into filtering lookups. If
189+
this is a PackageURL object it is reused as-is.
190+
"""
191+
if isinstance(purl, PackageURL):
192+
lookups = without_empty_values(purl.to_dict(encode=True))
193+
return self.filter(**lookups)
194+
195+
elif isinstance(purl, str):
196+
return self.for_package_url(purl)
197+
198+
else:
199+
return self.none()
200+
201+
150202
class Package(PackageURLMixin):
151203
"""
152204
A software package with related vulnerabilities.
@@ -168,6 +220,8 @@ class Package(PackageURLMixin):
168220
to="Vulnerability", through="PackageRelatedVulnerability"
169221
)
170222

223+
objects = PackageQuerySet.as_manager()
224+
171225
@property
172226
def purl(self):
173227
return self.package_url

vulnerabilities/tests/test_fix_api.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def setUp(self):
269269
for i in range(0, 10):
270270
ref, _ = VulnerabilityReference.objects.get_or_create(
271271
reference_id=f"cpe:/a:nginx:{i}",
272+
url=f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query=cpe:/a:nginx:{i}",
272273
)
273274
VulnerabilityRelatedReference.objects.create(
274275
reference=ref, vulnerability=self.vulnerability
@@ -356,7 +357,10 @@ def setUp(self):
356357
]
357358
vuln = Vulnerability.objects.create(summary="test")
358359
for cpe in self.exclusive_cpes:
359-
ref = VulnerabilityReference.objects.create(reference_id=cpe)
360+
ref = VulnerabilityReference.objects.create(
361+
reference_id=cpe,
362+
url=f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}",
363+
)
360364
VulnerabilityRelatedReference.objects.create(reference=ref, vulnerability=vuln)
361365
second_vuln = Vulnerability.objects.create(summary="test-A")
362366
self.non_exclusive_cpes = [
@@ -370,7 +374,10 @@ def setUp(self):
370374
]
371375
third_vuln = Vulnerability.objects.create(summary="test-B")
372376
for cpe in self.non_exclusive_cpes:
373-
ref = VulnerabilityReference.objects.create(reference_id=cpe)
377+
ref = VulnerabilityReference.objects.create(
378+
reference_id=cpe,
379+
url=f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}",
380+
)
374381
VulnerabilityRelatedReference.objects.create(reference=ref, vulnerability=second_vuln)
375382
VulnerabilityRelatedReference.objects.create(reference=ref, vulnerability=third_vuln)
376383

0 commit comments

Comments
 (0)