Skip to content

Commit 6c0e248

Browse files
authored
Merge pull request #1243 from TG1999/minimize_query_objects
Paginate default improver
2 parents ff95e09 + 021df24 commit 6c0e248

File tree

7 files changed

+72
-42
lines changed

7 files changed

+72
-42
lines changed

requirements.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ django-widget-tweaks==1.4.12
2828
djangorestframework==3.13.1
2929
doc8==0.11.1
3030
docker==5.0.3
31-
docker-compose==1.29.2
3231
dockerpty==0.4.1
3332
docopt==0.6.2
3433
docutils==0.17.1
@@ -79,12 +78,12 @@ pyrsistent==0.18.1
7978
pytest==7.1.1
8079
pytest-django==4.5.2
8180
python-dateutil==2.8.2
82-
python-dotenv==0.20.0
81+
python-dotenv==1.0.0
8382
pytz==2022.1
84-
PyYAML==5.4.1
83+
PyYAML==6.0.1
8584
requests==2.27.1
8685
restructuredtext-lint==1.4.0
87-
saneyaml==0.5.2
86+
saneyaml==0.6.0
8887
semantic-version==2.9.0
8988
six==1.16.0
9089
smmap==5.0.0

setup.cfg

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ install_requires =
7575

7676
# file and data formats
7777
binaryornot>=0.4.4
78-
saneyaml>=0.5.2
78+
saneyaml>=0.6.0
7979
beautifulsoup4>=4.9.3
8080
python-dateutil>=2.8.1
8181
toml>=0.10.2
@@ -91,6 +91,11 @@ install_requires =
9191
requests>=2.25.1
9292
fetchcode>=0.2.0
9393

94+
#vulntotal
95+
python-dotenv
96+
texttable
97+
98+
9499
[options.extras_require]
95100
dev =
96101
# Validation
@@ -107,7 +112,6 @@ dev =
107112
pytest-django>=4.5.2
108113
freezegun>=1.1.0
109114
# misc
110-
docker-compose
111115
ipython==8.10.0
112116
# used for testing
113117
commoncode

vulnerabilities/improve_runner.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,14 @@ def run(self) -> None:
4343
improver = self.improver_class()
4444
logger.info(f"Running improver: {improver.qualified_name}")
4545
for advisory in improver.interesting_advisories:
46-
inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data())
47-
process_inferences(
48-
inferences=inferences, advisory=advisory, improver_name=improver.qualified_name
49-
)
46+
logger.info(f"Processing advisory: {advisory!r}")
47+
try:
48+
inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data())
49+
process_inferences(
50+
inferences=inferences, advisory=advisory, improver_name=improver.qualified_name
51+
)
52+
except Exception as e:
53+
logger.info(f"Failed to process advisory: {advisory!r} with error {e!r}")
5054
logger.info("Finished improving using %s.", self.improver_class.qualified_name)
5155

5256

vulnerabilities/improvers/default.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ class DefaultImprover(Improver):
3636

3737
@property
3838
def interesting_advisories(self) -> QuerySet:
39-
return Advisory.objects.all()
39+
for advisory in Advisory.objects.all().paginated():
40+
yield advisory
4041

4142
def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
4243
if not advisory_data:
@@ -94,11 +95,11 @@ def get_exact_purls(affected_package: AffectedPackage) -> Tuple[List[PackageURL]
9495
>>> assert expected == got
9596
"""
9697

97-
vr = affected_package.affected_version_range
98-
# We need ``if c`` below because univers returns None as version
99-
# in case of vers:nginx/*
100-
# TODO: Revisit after https://github.com/nexB/univers/issues/33
10198
try:
99+
vr = affected_package.affected_version_range
100+
# We need ``if c`` below because univers returns None as version
101+
# in case of vers:nginx/*
102+
# TODO: Revisit after https://github.com/nexB/univers/issues/33
102103
affected_purls = []
103104
fixed_versions = []
104105
if vr:
@@ -120,5 +121,5 @@ def get_exact_purls(affected_package: AffectedPackage) -> Tuple[List[PackageURL]
120121
]
121122
return affected_purls, fixed_purls
122123
except Exception as e:
123-
logger.error(f"Failed to get exact purls for {affected_package} {e}")
124+
logger.error(f"Failed to get exact purls for: {affected_package!r} with error: {e!r}")
124125
return [], []

vulnerabilities/management/commands/import.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,20 @@ def add_arguments(self, parser):
2929
parser.add_argument("sources", nargs="*", help="Fully qualified importer name to run")
3030

3131
def handle(self, *args, **options):
32-
if options["list"]:
33-
self.list_sources()
34-
return
35-
36-
if options["all"]:
37-
self.import_data(importers=IMPORTERS_REGISTRY.values())
38-
return
39-
40-
sources = options["sources"]
41-
if not sources:
42-
raise CommandError('Please provide at least one importer to run or use "--all".')
43-
44-
self.import_data(validate_importers(sources))
32+
try:
33+
if options["list"]:
34+
self.list_sources()
35+
elif options["all"]:
36+
self.import_data(importers=IMPORTERS_REGISTRY.values())
37+
else:
38+
sources = options["sources"]
39+
if not sources:
40+
raise CommandError(
41+
'Please provide at least one importer to run or use "--all".'
42+
)
43+
self.import_data(validate_importers(sources))
44+
except KeyboardInterrupt:
45+
raise CommandError("Keyboard interrupt received. Stopping...")
4546

4647
def list_sources(self):
4748
self.stdout.write("Vulnerability data can be imported from the following importers:")

vulnerabilities/management/commands/improve.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,20 @@ def add_arguments(self, parser):
3131
parser.add_argument("sources", nargs="*", help="Fully qualified improver name to run")
3232

3333
def handle(self, *args, **options):
34-
if options["list"]:
35-
self.list_sources()
36-
return
37-
38-
if options["all"]:
39-
self.improve_data(IMPROVERS_REGISTRY.values())
40-
return
41-
42-
sources = options["sources"]
43-
if not sources:
44-
raise CommandError('Please provide at least one improver to run or use "--all".')
45-
46-
self.improve_data(validate_improvers(sources))
34+
try:
35+
if options["list"]:
36+
self.list_sources()
37+
elif options["all"]:
38+
self.improve_data(IMPROVERS_REGISTRY.values())
39+
else:
40+
sources = options["sources"]
41+
if not sources:
42+
raise CommandError(
43+
'Please provide at least one improver to run or use "--all".'
44+
)
45+
self.improve_data(validate_improvers(sources))
46+
except KeyboardInterrupt:
47+
raise CommandError("Keyboard interrupt received. Stopping...")
4748

4849
def list_sources(self):
4950
improvers = list(IMPROVERS_REGISTRY)

vulnerabilities/models.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from django.contrib.auth.models import UserManager
1818
from django.core import exceptions
1919
from django.core.exceptions import ValidationError
20+
from django.core.paginator import Paginator
2021
from django.core.validators import MaxValueValidator
2122
from django.core.validators import MinValueValidator
2223
from django.db import models
@@ -53,6 +54,20 @@ def get_or_none(self, *args, **kwargs):
5354
with suppress(self.model.DoesNotExist, ValidationError):
5455
return self.get(*args, **kwargs)
5556

57+
def paginated(self, per_page=5000):
58+
"""
59+
Iterate over a (large) QuerySet by chunks of ``per_page`` items.
60+
This technique is essential for preventing memory issues when iterating
61+
See these links for inspiration:
62+
https://nextlinklabs.com/resources/insights/django-big-data-iteration
63+
https://stackoverflow.com/questions/4222176/why-is-iterating-through-a-large-django-queryset-consuming-massive-amounts-of-me/
64+
"""
65+
paginator = Paginator(self, per_page=per_page)
66+
for page_number in paginator.page_range:
67+
page = paginator.page(page_number)
68+
for object in page.object_list:
69+
yield object
70+
5671

5772
class VulnerabilityQuerySet(BaseQuerySet):
5873
def with_cpes(self):
@@ -770,6 +785,10 @@ def url(self):
770785
return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json"
771786

772787

788+
class AdvisoryQuerySet(BaseQuerySet):
789+
pass
790+
791+
773792
class Advisory(models.Model):
774793
"""
775794
An advisory represents data directly obtained from upstream transformed
@@ -809,6 +828,7 @@ class Advisory(models.Model):
809828
"module name importing the advisory. Eg:"
810829
"vulnerabilities.importers.nginx.NginxImporter",
811830
)
831+
objects = AdvisoryQuerySet.as_manager()
812832

813833
class Meta:
814834
unique_together = ["aliases", "unique_content_id", "date_published"]

0 commit comments

Comments
 (0)