Skip to content

Commit 021b568

Browse files
authored
Merge pull request #1587 from aboutcode-org/nvd-importer-pipeline
Migrate NVD importer to aboutcode pipeline
2 parents 1ea270a + 2c2dfff commit 021b568

File tree

7 files changed

+145
-33
lines changed

7 files changed

+145
-33
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from vulnerabilities.importers import github_osv
2323
from vulnerabilities.importers import istio
2424
from vulnerabilities.importers import mozilla
25-
from vulnerabilities.importers import nvd
2625
from vulnerabilities.importers import openssl
2726
from vulnerabilities.importers import oss_fuzz
2827
from vulnerabilities.importers import postgresql
@@ -41,10 +40,10 @@
4140
from vulnerabilities.pipelines import gitlab_importer
4241
from vulnerabilities.pipelines import nginx_importer
4342
from vulnerabilities.pipelines import npm_importer
43+
from vulnerabilities.pipelines import nvd_importer
4444
from vulnerabilities.pipelines import pypa_importer
4545

4646
IMPORTERS_REGISTRY = [
47-
nvd.NVDImporter,
4847
pysec.PyPIImporter,
4948
alpine_linux.AlpineImporter,
5049
openssl.OpensslImporter,
@@ -78,6 +77,7 @@
7877
nginx_importer.NginxImporterPipeline,
7978
gitlab_importer.GitLabImporterPipeline,
8079
github_importer.GitHubAPIImporterPipeline,
80+
nvd_importer.NVDImporterPipeline,
8181
]
8282

8383
IMPORTERS_REGISTRY = {

vulnerabilities/improvers/vulnerability_status.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@
1414
from django.db.models.query import QuerySet
1515

1616
from vulnerabilities.importer import AdvisoryData
17-
from vulnerabilities.importers.nvd import NVDImporter
1817
from vulnerabilities.improver import Improver
1918
from vulnerabilities.improver import Inference
2019
from vulnerabilities.models import Advisory
2120
from vulnerabilities.models import Alias
2221
from vulnerabilities.models import Vulnerability
2322
from vulnerabilities.models import VulnerabilityChangeLog
2423
from vulnerabilities.models import VulnerabilityStatusType
24+
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline
2525
from vulnerabilities.utils import fetch_response
2626
from vulnerabilities.utils import get_item
2727

@@ -38,7 +38,7 @@ class VulnerabilityStatusImprover(Improver):
3838
@property
3939
def interesting_advisories(self) -> QuerySet:
4040
return (
41-
Advisory.objects.filter(Q(created_by=NVDImporter.qualified_name))
41+
Advisory.objects.filter(Q(created_by=NVDImporterPipeline.pipeline_id))
4242
.distinct("aliases")
4343
.paginated()
4444
)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Generated by Django 4.2.15 on 2024-09-27 19:38
2+
3+
from django.db import migrations
4+
5+
"""
6+
Update the created_by field on Advisory from the old qualified_name
7+
to the new pipeline_id.
8+
"""
9+
10+
11+
def update_created_by(apps, schema_editor):
12+
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline
13+
14+
Advisory = apps.get_model("vulnerabilities", "Advisory")
15+
Advisory.objects.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").update(
16+
created_by=NVDImporterPipeline.pipeline_id
17+
)
18+
19+
20+
21+
def reverse_update_created_by(apps, schema_editor):
22+
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline
23+
24+
Advisory = apps.get_model("vulnerabilities", "Advisory")
25+
Advisory.objects.filter(created_by=NVDImporterPipeline.pipeline_id).update(
26+
created_by="vulnerabilities.importers.nvd.NVDImporter"
27+
)
28+
29+
30+
class Migration(migrations.Migration):
31+
32+
dependencies = [
33+
("vulnerabilities", "0067_update_github_advisory_created_by"),
34+
]
35+
36+
operations = [
37+
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
38+
]

vulnerabilities/importers/nvd.py renamed to vulnerabilities/pipelines/nvd_importer.py

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,29 @@
99

1010
import gzip
1111
import json
12+
import logging
1213
from datetime import date
14+
from traceback import format_exc as traceback_format_exc
15+
from typing import Iterable
1316

1417
import attr
1518
import requests
1619
from dateutil import parser as dateparser
1720

1821
from vulnerabilities import severity_systems
1922
from vulnerabilities.importer import AdvisoryData
20-
from vulnerabilities.importer import Importer
2123
from vulnerabilities.importer import Reference
2224
from vulnerabilities.importer import VulnerabilitySeverity
25+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
2326
from vulnerabilities.utils import get_cwe_id
2427
from vulnerabilities.utils import get_item
2528

2629

27-
class NVDImporter(Importer):
30+
class NVDImporterPipeline(VulnerableCodeBaseImporterPipeline):
31+
"""Collect advisories from NVD."""
32+
33+
pipeline_id = "nvd_importer"
34+
2835
# See https://github.com/nexB/vulnerablecode/issues/665 for follow up
2936
spdx_license_expression = (
3037
"LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou"
@@ -61,19 +68,46 @@ class NVDImporter(Importer):
6168
"""
6269
importer_name = "NVD Importer"
6370

64-
def advisory_data(self):
65-
for _year, cve_data in fetch_cve_data_1_1():
71+
@classmethod
72+
def steps(cls):
73+
return (
74+
cls.collect_and_store_advisories,
75+
cls.import_new_advisories,
76+
)
77+
78+
def advisories_count(self):
79+
url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1"
80+
81+
advisory_count = 0
82+
try:
83+
response = requests.get(url)
84+
response.raise_for_status()
85+
data = response.json()
86+
except requests.HTTPError as http_err:
87+
self.log(
88+
f"HTTP error occurred: {http_err} \n {traceback_format_exc()}",
89+
level=logging.ERROR,
90+
)
91+
return advisory_count
92+
93+
advisory_count = data.get("totalResults", 0)
94+
return advisory_count
95+
96+
def collect_advisories(self) -> Iterable[AdvisoryData]:
97+
for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
6698
yield from to_advisories(cve_data=cve_data)
6799

68100

69101
# Isolating network calls for simplicity of testing
70-
def fetch(url):
102+
def fetch(url, logger=None):
103+
if logger:
104+
logger(f"Fetching `{url}`")
71105
gz_file = requests.get(url)
72106
data = gzip.decompress(gz_file.content)
73107
return json.loads(data)
74108

75109

76-
def fetch_cve_data_1_1(starting_year=2002):
110+
def fetch_cve_data_1_1(starting_year=2002, logger=None):
77111
"""
78112
Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
79113
year since ``starting_year`` defaulting to 2002.
@@ -82,7 +116,7 @@ def fetch_cve_data_1_1(starting_year=2002):
82116
# NVD json feeds start from 2002.
83117
for year in range(starting_year, current_year + 1):
84118
download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
85-
yield year, fetch(url=download_url)
119+
yield year, fetch(url=download_url, logger=logger)
86120

87121

88122
def to_advisories(cve_data):

vulnerabilities/tests/test_nvd.py renamed to vulnerabilities/tests/pipelines/test_nvd_importer_pipeline.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,12 @@
88
#
99

1010
import json
11-
import os
11+
from pathlib import Path
1212

13-
from vulnerabilities.importers import nvd
13+
from vulnerabilities.pipelines import nvd_importer
1414
from vulnerabilities.tests.util_tests import VULNERABLECODE_REGEN_TEST_FIXTURES as REGEN
1515

16-
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
17-
TEST_DATA = os.path.join(BASE_DIR, "test_data/nvd/nvd_test.json")
18-
REJECTED_CVE = os.path.join(BASE_DIR, "test_data/nvd/rejected_nvd.json")
16+
TEST_DATA = Path(__file__).parent.parent / "test_data" / "nvd"
1917

2018

2119
def load_test_data(file):
@@ -37,10 +35,11 @@ def sorted_advisory_data(advisory_data):
3735

3836

3937
def test_to_advisories_skips_hardware(regen=REGEN):
40-
expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-expected.json")
38+
expected_file = TEST_DATA / "nvd-expected.json"
4139

42-
test_data = load_test_data(file=TEST_DATA)
43-
result = [data.to_dict() for data in nvd.to_advisories(test_data)]
40+
test_file = TEST_DATA / "nvd_test.json"
41+
test_data = load_test_data(file=test_file)
42+
result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)]
4443
result = sorted_advisory_data(result)
4544

4645
if regen:
@@ -56,10 +55,11 @@ def test_to_advisories_skips_hardware(regen=REGEN):
5655

5756

5857
def test_to_advisories_marks_rejected_cve(regen=REGEN):
59-
expected_file = os.path.join(BASE_DIR, "test_data/nvd/nvd-rejected-expected.json")
58+
expected_file = TEST_DATA / "nvd-rejected-expected.json"
6059

61-
test_data = load_test_data(file=REJECTED_CVE)
62-
result = [data.to_dict() for data in nvd.to_advisories(test_data)]
60+
test_file = TEST_DATA / "rejected_nvd.json"
61+
test_data = load_test_data(file=test_file)
62+
result = [data.to_dict() for data in nvd_importer.to_advisories(test_data)]
6363
result = sorted_advisory_data(result)
6464

6565
if regen:
@@ -168,14 +168,16 @@ def test_CveItem_cpes():
168168
"cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*",
169169
]
170170

171-
found_cpes = nvd.CveItem(cve_item=get_test_cve_item()).cpes
171+
found_cpes = nvd_importer.CveItem(cve_item=get_test_cve_item()).cpes
172172
assert found_cpes == expected_cpes
173173

174174

175175
def test_is_related_to_hardware():
176-
assert nvd.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*")
177-
assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*")
178-
assert not nvd.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*")
176+
assert nvd_importer.is_related_to_hardware("cpe:2.3:h:csilvers:gperftools:0.2:*:*:*:*:*:*:*")
177+
assert not nvd_importer.is_related_to_hardware(
178+
"cpe:2.3:a:csilvers:gperftools:0.1:*:*:*:*:*:*:*"
179+
)
180+
assert not nvd_importer.is_related_to_hardware("cpe:2.3:a:csilvers:gperftools:*:*:*:*:*:*:*:*")
179181

180182

181183
def test_CveItem_summary_with_single_summary():
@@ -186,7 +188,7 @@ def test_CveItem_summary_with_single_summary():
186188
"be allocated than expected."
187189
)
188190

189-
assert nvd.CveItem(cve_item=get_test_cve_item()).summary == expected_summary
191+
assert nvd_importer.CveItem(cve_item=get_test_cve_item()).summary == expected_summary
190192

191193

192194
def test_CveItem_reference_urls():
@@ -195,4 +197,4 @@ def test_CveItem_reference_urls():
195197
"http://kqueue.org/blog/2012/03/05/memory-allocator-security-revisited/",
196198
]
197199

198-
assert nvd.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls
200+
assert nvd_importer.CveItem(cve_item=get_test_cve_item()).reference_urls == expected_urls

vulnerabilities/tests/test_data_migrations.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,3 +802,42 @@ def test_removal_of_duped_purls(self):
802802
adv.filter(created_by="vulnerabilities.importers.github.GitHubAPIImporter").count() == 0
803803
)
804804
assert adv.filter(created_by="github_importer").count() == 1
805+
806+
807+
class TestUpdateNVDAdvisoryCreatedByField(TestMigrations):
808+
app_name = "vulnerabilities"
809+
migrate_from = "0067_update_github_advisory_created_by"
810+
migrate_to = "0068_update_nvd_advisory_created_by"
811+
812+
advisory_data1 = AdvisoryData(
813+
aliases=["CVE-2020-13371337"],
814+
summary="vulnerability description here",
815+
affected_packages=[
816+
AffectedPackage(
817+
package=PackageURL(type="pypi", name="foobar"),
818+
affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"),
819+
)
820+
],
821+
references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")],
822+
date_published=timezone.now(),
823+
url="https://test.com",
824+
)
825+
826+
def setUpBeforeMigration(self, apps):
827+
Advisory = apps.get_model("vulnerabilities", "Advisory")
828+
adv1 = Advisory.objects.create(
829+
aliases=self.advisory_data1.aliases,
830+
summary=self.advisory_data1.summary,
831+
affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages],
832+
references=[ref.to_dict() for ref in self.advisory_data1.references],
833+
url=self.advisory_data1.url,
834+
created_by="vulnerabilities.importers.nvd.NVDImporter",
835+
date_collected=timezone.now(),
836+
)
837+
838+
def test_removal_of_duped_purls(self):
839+
Advisory = apps.get_model("vulnerabilities", "Advisory")
840+
adv = Advisory.objects.all()
841+
842+
assert adv.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").count() == 0
843+
assert adv.filter(created_by="nvd_importer").count() == 1

vulnerabilities/tests/test_vulnerability_status_improver.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,12 @@
1313

1414
import pytest
1515

16-
from vulnerabilities.importers.nvd import NVDImporter
1716
from vulnerabilities.improvers.vulnerability_status import VulnerabilityStatusImprover
18-
from vulnerabilities.improvers.vulnerability_status import get_status_from_api
1917
from vulnerabilities.models import Advisory
2018
from vulnerabilities.models import Alias
2119
from vulnerabilities.models import Vulnerability
2220
from vulnerabilities.models import VulnerabilityStatusType
21+
from vulnerabilities.pipelines.nvd_importer import NVDImporterPipeline
2322

2423
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
2524

@@ -34,13 +33,13 @@
3433
def test_interesting_advisories():
3534
Advisory.objects.create(
3635
aliases=["CVE-1"],
37-
created_by=NVDImporter.qualified_name,
36+
created_by=NVDImporterPipeline.pipeline_id,
3837
summary="1",
3938
date_collected=datetime.now(),
4039
)
4140
Advisory.objects.create(
4241
aliases=["CVE-1"],
43-
created_by=NVDImporter.qualified_name,
42+
created_by=NVDImporterPipeline.pipeline_id,
4443
summary="2",
4544
date_collected=datetime.now(),
4645
)
@@ -55,7 +54,7 @@ def test_improver_end_to_end(mock_response):
5554
mock_response.return_value = response
5655
adv = Advisory.objects.create(
5756
aliases=["CVE-2023-35866"],
58-
created_by=NVDImporter.qualified_name,
57+
created_by=NVDImporterPipeline.pipeline_id,
5958
summary="1",
6059
date_collected=datetime.now(),
6160
)

0 commit comments

Comments
 (0)