Skip to content

Commit 768bdd0

Browse files
authored
Merge pull request #1017 from TG1999/fast_bulk_search
Make bulk search fast
2 parents d19a78e + 860ebdf commit 768bdd0

File tree

7 files changed

+199
-28
lines changed

7 files changed

+199
-28
lines changed

CHANGELOG.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@ Version v30.3.2
88

99
- We re-enabled support for the PostgreSQL securities advisories importer.
1010
- We fixed the API key request form UI and made it consistent with rest of UI.
11-
11+
- We made bulk search faster by pre-computing
12+
`package_url` and `plain_package_url` in Package model.
13+
And provided two options in package bulk search
14+
``purl_only`` option to get only vulnerable
15+
purls without any extra details, ``plain_purl`` option
16+
to filter purls without qualifiers and
17+
subpath and also return them without qualifiers and subpath.
1218

1319
Version v30.3.1
1420
----------------

vulnerabilities/api.py

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -238,40 +238,62 @@ def bulk_search(self, request):
238238
"""
239239
Lookup for vulnerable packages using many Package URLs at once.
240240
"""
241-
response = []
241+
242242
purls = request.data.get("purls", []) or []
243+
purl_only = request.data.get("purl_only", False)
244+
plain_purl = request.data.get("plain_purl", False)
243245
if not purls or not isinstance(purls, list):
244246
return Response(
245247
status=400,
246-
data={"Error": "A non-empty 'purls' list of package URLs is required."},
248+
data={"Error": "A non-empty 'purls' list of PURLs is required."},
249+
)
250+
251+
if plain_purl:
252+
purl_objects = [PackageURL.from_string(purl) for purl in purls]
253+
plain_purl_objects = [
254+
PackageURL(
255+
type=purl.type,
256+
namespace=purl.namespace,
257+
name=purl.name,
258+
version=purl.version,
259+
)
260+
for purl in purl_objects
261+
]
262+
plain_purls = [str(purl) for purl in plain_purl_objects]
263+
264+
query = (
265+
Package.objects.filter(plain_package_url__in=plain_purls)
266+
.order_by("plain_package_url")
267+
.distinct("plain_package_url")
247268
)
248-
for purl in request.data["purls"]:
249-
try:
250-
purl_string = purl
251-
purl = PackageURL.from_string(purl)
252-
except ValueError:
253-
return Response(status=400, data={"Error": f"Invalid Package URL: {purl}"})
254-
lookups = get_purl_query_lookups(purl)
255-
purl_data = Package.objects.filter(**lookups)
256-
purl_response = {}
257-
if purl_data:
258-
purl_response = PackageSerializer(purl_data[0], context={"request": request}).data
259-
else:
260-
purl_response = purl.to_dict()
261-
purl_response["unresolved_vulnerabilities"] = []
262-
purl_response["resolved_vulnerabilities"] = []
263-
purl_response["purl"] = purl_string
264-
response.append(purl_response)
265-
266-
return Response(response)
269+
270+
if not purl_only:
271+
return Response(
272+
PackageSerializer(query, many=True, context={"request": request}).data
273+
)
274+
275+
# using order by and distinct because there will be
276+
# many fully qualified purl for a single plain purl
277+
vulnerable_purls = query.vulnerable().only("plain_package_url")
278+
vulnerable_purls = [str(package.plain_package_url) for package in vulnerable_purls]
279+
return Response(data=vulnerable_purls)
280+
281+
query = Package.objects.filter(package_url__in=purls).distinct()
282+
283+
if not purl_only:
284+
return Response(PackageSerializer(query, many=True, context={"request": request}).data)
285+
286+
vulnerable_purls = query.vulnerable().only("package_url")
287+
vulnerable_purls = [str(package.package_url) for package in vulnerable_purls]
288+
return Response(data=vulnerable_purls)
267289

268290
@action(detail=False, methods=["get"], throttle_scope="vulnerable_packages")
269291
def all(self, request):
270292
"""
271293
Return the Package URLs of all packages known to be vulnerable.
272294
"""
273-
vulnerable_packages = Package.objects.vulnerable().only(*PackageURL._fields).distinct()
274-
vulnerable_purls = [str(package.purl) for package in vulnerable_packages]
295+
vulnerable_packages = Package.objects.vulnerable().only("package_url").distinct()
296+
vulnerable_purls = [str(package.package_url) for package in vulnerable_packages]
275297
return Response(vulnerable_purls)
276298

277299

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 4.0.7 on 2022-11-28 12:58
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('vulnerabilities', '0033_alter_vulnerabilityseverity_scoring_system'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='package',
15+
name='package_url',
16+
field=models.CharField(blank=True, db_index=True, help_text='The Package URL for this package.', max_length=1000),
17+
),
18+
migrations.AddField(
19+
model_name='package',
20+
name='plain_package_url',
21+
field=models.CharField(blank=True, db_index=True, help_text='The Package URL for this package without qualifiers and subpath.', max_length=1000),
22+
),
23+
]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from django.db import migrations
2+
from packageurl import PackageURL
3+
4+
class Migration(migrations.Migration):
5+
6+
def save_purls(apps, schema_editor):
7+
Package = apps.get_model("vulnerabilities", "Package")
8+
updatables = []
9+
for package in Package.objects.all():
10+
purl = PackageURL(
11+
type=package.type,
12+
namespace=package.namespace,
13+
name=package.name,
14+
version=package.version,
15+
qualifiers=package.qualifiers,
16+
subpath=package.subpath,
17+
)
18+
plain_purl = PackageURL(
19+
type=package.type,
20+
namespace=package.namespace,
21+
name=package.name,
22+
version=package.version,
23+
)
24+
package.package_url = str(purl)
25+
package.plain_package_url = str(plain_purl)
26+
updatables.append(package)
27+
28+
updated = Package.objects.bulk_update(
29+
objs = updatables,
30+
fields=["package_url", "plain_package_url"],
31+
batch_size=500,
32+
)
33+
print(f"Migrated {updated} packages with package_url")
34+
35+
dependencies = [
36+
("vulnerabilities", "0034_package_package_url_package_plain_package_url"),
37+
]
38+
39+
operations = [
40+
migrations.RunPython(save_purls, reverse_code=migrations.RunPython.noop),
41+
]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 4.0.7 on 2022-11-28 13:00
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('vulnerabilities', '0035_add_package_url_to_packages'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='package',
15+
name='package_url',
16+
field=models.CharField(db_index=True, help_text='The Package URL for this package.', max_length=1000),
17+
),
18+
migrations.AlterField(
19+
model_name='package',
20+
name='plain_package_url',
21+
field=models.CharField(db_index=True, help_text='The Package URL for this package without qualifiers and subpath.', max_length=1000),
22+
),
23+
]

vulnerabilities/models.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,41 @@ class Package(PackageURLMixin):
531531
to="Vulnerability", through="PackageRelatedVulnerability"
532532
)
533533

534+
package_url = models.CharField(
535+
max_length=1000,
536+
null=False,
537+
help_text="The Package URL for this package.",
538+
db_index=True,
539+
)
540+
541+
plain_package_url = models.CharField(
542+
max_length=1000,
543+
null=False,
544+
help_text="The Package URL for this package without qualifiers and subpath.",
545+
db_index=True,
546+
)
547+
534548
objects = PackageQuerySet.as_manager()
535549

550+
def save(self, *args, **kwargs):
551+
purl_object = PackageURL(
552+
type=self.type,
553+
namespace=self.namespace,
554+
name=self.name,
555+
version=self.version,
556+
qualifiers=self.qualifiers,
557+
subpath=self.subpath,
558+
)
559+
plain_purl = PackageURL(
560+
type=self.type,
561+
namespace=self.namespace,
562+
name=self.name,
563+
version=self.version,
564+
)
565+
self.package_url = str(purl_object)
566+
self.plain_package_url = str(plain_purl)
567+
super().save(*args, **kwargs)
568+
536569
@property
537570
def purl(self):
538571
return self.package_url

vulnerabilities/tests/test_fix_api.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,19 @@ def setUp(self):
358358
attrs = {k: v for k, v in purl.to_dict().items() if v}
359359
Package.objects.create(**attrs)
360360

361+
vulnerable_packages = [
362+
"pkg:nginx/[email protected]?foo=bar",
363+
"pkg:nginx/[email protected]?foo=baz",
364+
]
365+
366+
vuln = Vulnerability.objects.create(summary="test")
367+
368+
for package in vulnerable_packages:
369+
purl = PackageURL.from_string(package)
370+
attrs = {k: v for k, v in purl.to_dict().items() if v}
371+
pkg = Package.objects.create(**attrs)
372+
PackageRelatedVulnerability.objects.create(package=pkg, vulnerability=vuln)
373+
361374
def test_bulk_api_response(self):
362375
request_body = {
363376
"purls": self.packages,
@@ -370,9 +383,7 @@ def test_bulk_api_response(self):
370383
assert len(response) == 13
371384

372385
def test_bulk_api_response_with_ignoring_qualifiers(self):
373-
request_body = {
374-
"purls": ["pkg:nginx/[email protected]?qualifiers=dev"],
375-
}
386+
request_body = {"purls": ["pkg:nginx/[email protected]?qualifiers=dev"], "plain_purl": True}
376387
response = self.csrf_client.post(
377388
"/api/packages/bulk_search",
378389
data=json.dumps(request_body),
@@ -382,16 +393,28 @@ def test_bulk_api_response_with_ignoring_qualifiers(self):
382393
assert response[0]["purl"] == "pkg:nginx/[email protected]"
383394

384395
def test_bulk_api_response_with_ignoring_subpath(self):
396+
request_body = {"purls": ["pkg:nginx/[email protected]#dev/subpath"], "plain_purl": True}
397+
response = self.csrf_client.post(
398+
"/api/packages/bulk_search",
399+
data=json.dumps(request_body),
400+
content_type="application/json",
401+
).json()
402+
assert len(response) == 1
403+
assert response[0]["purl"] == "pkg:nginx/[email protected]"
404+
405+
def test_bulk_api_with_purl_only_option(self):
385406
request_body = {
386407
"purls": ["pkg:nginx/[email protected]#dev/subpath"],
408+
"purl_only": True,
409+
"plain_purl": True,
387410
}
388411
response = self.csrf_client.post(
389412
"/api/packages/bulk_search",
390413
data=json.dumps(request_body),
391414
content_type="application/json",
392415
).json()
393416
assert len(response) == 1
394-
assert response[0]["purl"] == "pkg:nginx/[email protected]"
417+
assert response[0] == "pkg:nginx/[email protected]"
395418

396419

397420
class BulkSearchAPICPE(TestCase):

0 commit comments

Comments
 (0)