Skip to content

Commit 590c91a

Browse files
authored
Merge pull request #1628 from aboutcode-org/1627-migrate-pysec
Migrate pysec importer to aboutcode pipeline
2 parents 45070e8 + a02e211 commit 590c91a

File tree

7 files changed

+158
-61
lines changed

7 files changed

+158
-61
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from vulnerabilities.importers import oss_fuzz
2727
from vulnerabilities.importers import postgresql
2828
from vulnerabilities.importers import project_kb_msr2019
29-
from vulnerabilities.importers import pysec
3029
from vulnerabilities.importers import redhat
3130
from vulnerabilities.importers import retiredotnet
3231
from vulnerabilities.importers import ruby
@@ -42,9 +41,9 @@
4241
from vulnerabilities.pipelines import npm_importer
4342
from vulnerabilities.pipelines import nvd_importer
4443
from vulnerabilities.pipelines import pypa_importer
44+
from vulnerabilities.pipelines import pysec_importer
4545

4646
IMPORTERS_REGISTRY = [
47-
pysec.PyPIImporter,
4847
alpine_linux.AlpineImporter,
4948
openssl.OpensslImporter,
5049
redhat.RedhatImporter,
@@ -78,6 +77,7 @@
7877
gitlab_importer.GitLabImporterPipeline,
7978
github_importer.GitHubAPIImporterPipeline,
8079
nvd_importer.NVDImporterPipeline,
80+
pysec_importer.PyPIImporterPipeline,
8181
]
8282

8383
IMPORTERS_REGISTRY = {

vulnerabilities/importers/pysec.py

Lines changed: 0 additions & 44 deletions
This file was deleted.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Generated by Django 4.2.16 on 2024-10-24 13:51
2+
3+
from django.db import migrations
4+
5+
"""
6+
Update the created_by field on Advisory from the old qualified_name
7+
to the new pipeline_id.
8+
"""
9+
10+
11+
def update_created_by(apps, schema_editor):
12+
from vulnerabilities.pipelines.pysec_importer import PyPIImporterPipeline
13+
14+
Advisory = apps.get_model("vulnerabilities", "Advisory")
15+
Advisory.objects.filter(created_by="vulnerabilities.importers.pysec.PyPIImporter").update(
16+
created_by=PyPIImporterPipeline.pipeline_id
17+
)
18+
19+
20+
def reverse_update_created_by(apps, schema_editor):
21+
from vulnerabilities.pipelines.pysec_importer import PyPIImporterPipeline
22+
23+
Advisory = apps.get_model("vulnerabilities", "Advisory")
24+
Advisory.objects.filter(created_by=PyPIImporterPipeline.pipeline_id).update(
25+
created_by="vulnerabilities.importers.pysec.PyPIImporter"
26+
)
27+
28+
29+
class Migration(migrations.Migration):
30+
31+
dependencies = [
32+
("vulnerabilities", "0073_delete_packagerelatedvulnerability"),
33+
]
34+
35+
operations = [
36+
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
37+
]

vulnerabilities/pipelines/pypa_importer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
9-
import logging
9+
1010
from pathlib import Path
1111
from typing import Iterable
1212

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import json
10+
import logging
11+
from io import BytesIO
12+
from typing import Iterable
13+
from zipfile import ZipFile
14+
15+
import requests
16+
17+
from vulnerabilities.importer import AdvisoryData
18+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
19+
20+
21+
class PyPIImporterPipeline(VulnerableCodeBaseImporterPipeline):
22+
"""Collect advisories from PyPI."""
23+
24+
pipeline_id = "pysec_importer"
25+
26+
license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE"
27+
url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip"
28+
spdx_license_expression = "CC-BY-4.0"
29+
importer_name = "PyPI Importer"
30+
31+
@classmethod
32+
def steps(cls):
33+
return (
34+
cls.fetch_zip,
35+
cls.collect_and_store_advisories,
36+
cls.import_new_advisories,
37+
)
38+
39+
def fetch_zip(self):
40+
self.log(f"Fetching `{self.url}`")
41+
self.advisory_zip = requests.get(self.url).content
42+
43+
def advisories_count(self) -> int:
44+
with ZipFile(BytesIO(self.advisory_zip)) as zip:
45+
advisory_count = sum(1 for file in zip.namelist() if file.startswith("PYSEC-"))
46+
return advisory_count
47+
48+
def collect_advisories(self) -> Iterable[AdvisoryData]:
49+
"""Yield AdvisoryData using a zipped data dump of OSV data"""
50+
from vulnerabilities.importers.osv import parse_advisory_data
51+
52+
with ZipFile(BytesIO(self.advisory_zip)) as zip_file:
53+
for file_name in zip_file.namelist():
54+
if not file_name.startswith("PYSEC-"):
55+
self.log(
56+
f"Unsupported PyPI advisory data file: {file_name}",
57+
level=logging.ERROR,
58+
)
59+
continue
60+
with zip_file.open(file_name) as f:
61+
vul_info = json.load(f)
62+
yield parse_advisory_data(
63+
raw_data=vul_info,
64+
supported_ecosystems=["pypi"],
65+
advisory_url=self.url,
66+
)

vulnerabilities/tests/test_pysec.py renamed to vulnerabilities/tests/pipelines/test_pysec_importer_pipeline.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,52 +7,51 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99
import json
10-
import os
10+
from pathlib import Path
1111
from unittest import TestCase
1212

1313
from vulnerabilities.importers.osv import parse_advisory_data
1414
from vulnerabilities.tests.util_tests import VULNERABLECODE_REGEN_TEST_FIXTURES as REGEN
1515
from vulnerabilities.tests.util_tests import check_results_against_json
1616

17-
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
18-
TEST_DATA = os.path.join(BASE_DIR, "test_data/pysec")
17+
TEST_DATA = Path(__file__).parent.parent / "test_data" / "pysec"
1918

2019

2120
class TestPyPIImporter(TestCase):
2221
def test_to_advisories_with_summary(self):
23-
with open(os.path.join(TEST_DATA, "pysec-advisories_with_summary.json")) as f:
22+
with open(TEST_DATA / "pysec-advisories_with_summary.json") as f:
2423
mock_response = json.load(f)
2524
results = parse_advisory_data(mock_response, ["pypi"], "https://test.com").to_dict()
2625

27-
expected_file = os.path.join(TEST_DATA, "pysec-advisories_with_summary-expected.json")
26+
expected_file = TEST_DATA / "pysec-advisories_with_summary-expected.json"
2827
check_results_against_json(
2928
results=results,
3029
expected_file=expected_file,
3130
regen=REGEN,
3231
)
3332

3433
def test_to_advisories_without_summary(self):
35-
with open(os.path.join(TEST_DATA, "pysec-advisories_without_summary.json")) as f:
34+
with open(TEST_DATA / "pysec-advisories_without_summary.json") as f:
3635
mock_response = json.load(f)
3736

3837
results = parse_advisory_data(mock_response, ["pypi"], "https://test.com").to_dict()
3938

40-
expected_file = os.path.join(TEST_DATA, "pysec-advisories_without_summary-expected.json")
39+
expected_file = TEST_DATA / "pysec-advisories_without_summary-expected.json"
4140
check_results_against_json(
4241
results=results,
4342
expected_file=expected_file,
4443
regen=REGEN,
4544
)
4645

4746
def test_to_advisories_with_cwe(self):
48-
with open(os.path.join(TEST_DATA, "pysec-advisory_with_cwe.json")) as f:
47+
with open(TEST_DATA / "pysec-advisory_with_cwe.json") as f:
4948
mock_response = json.load(f)
5049

5150
results = parse_advisory_data(
5251
raw_data=mock_response, supported_ecosystems=["pypi"], advisory_url="https://tes.com"
5352
).to_dict()
5453

55-
expected_file = os.path.join(TEST_DATA, "pysec-advisories_with_cwe-expected.json")
54+
expected_file = TEST_DATA / "pysec-advisories_with_cwe-expected.json"
5655
check_results_against_json(
5756
results=results,
5857
expected_file=expected_file,

vulnerabilities/tests/test_data_migrations.py

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def setUpBeforeMigration(self, apps):
672672
date_collected=timezone.now(),
673673
)
674674

675-
def test_removal_of_duped_purls(self):
675+
def test_update_npm_pypa_created_by_field(self):
676676
Advisory = apps.get_model("vulnerabilities", "Advisory")
677677
adv = Advisory.objects.all()
678678

@@ -714,7 +714,7 @@ def setUpBeforeMigration(self, apps):
714714
date_collected=timezone.now(),
715715
)
716716

717-
def test_removal_of_duped_purls(self):
717+
def test_update_nginx_created_by_field(self):
718718
Advisory = apps.get_model("vulnerabilities", "Advisory")
719719
adv = Advisory.objects.all()
720720

@@ -753,7 +753,7 @@ def setUpBeforeMigration(self, apps):
753753
date_collected=timezone.now(),
754754
)
755755

756-
def test_removal_of_duped_purls(self):
756+
def test_update_gitlab_created_by_field(self):
757757
Advisory = apps.get_model("vulnerabilities", "Advisory")
758758
adv = Advisory.objects.all()
759759

@@ -794,7 +794,7 @@ def setUpBeforeMigration(self, apps):
794794
date_collected=timezone.now(),
795795
)
796796

797-
def test_removal_of_duped_purls(self):
797+
def test_update_github_created_by_field(self):
798798
Advisory = apps.get_model("vulnerabilities", "Advisory")
799799
adv = Advisory.objects.all()
800800

@@ -835,9 +835,48 @@ def setUpBeforeMigration(self, apps):
835835
date_collected=timezone.now(),
836836
)
837837

838-
def test_removal_of_duped_purls(self):
838+
def test_update_nvd_created_by_field(self):
839839
Advisory = apps.get_model("vulnerabilities", "Advisory")
840840
adv = Advisory.objects.all()
841841

842842
assert adv.filter(created_by="vulnerabilities.importers.nvd.NVDImporter").count() == 0
843843
assert adv.filter(created_by="nvd_importer").count() == 1
844+
845+
846+
class TestUpdatePysecAdvisoryCreatedByField(TestMigrations):
847+
app_name = "vulnerabilities"
848+
migrate_from = "0073_delete_packagerelatedvulnerability"
849+
migrate_to = "0074_update_pysec_advisory_created_by"
850+
851+
advisory_data1 = AdvisoryData(
852+
aliases=["CVE-2020-13371337"],
853+
summary="vulnerability description here",
854+
affected_packages=[
855+
AffectedPackage(
856+
package=PackageURL(type="pypi", name="foobar"),
857+
affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"),
858+
)
859+
],
860+
references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")],
861+
date_published=timezone.now(),
862+
url="https://test.com",
863+
)
864+
865+
def setUpBeforeMigration(self, apps):
866+
Advisory = apps.get_model("vulnerabilities", "Advisory")
867+
adv1 = Advisory.objects.create(
868+
aliases=self.advisory_data1.aliases,
869+
summary=self.advisory_data1.summary,
870+
affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages],
871+
references=[ref.to_dict() for ref in self.advisory_data1.references],
872+
url=self.advisory_data1.url,
873+
created_by="vulnerabilities.importers.pysec.PyPIImporter",
874+
date_collected=timezone.now(),
875+
)
876+
877+
def test_update_pysec_created_by_field(self):
878+
Advisory = apps.get_model("vulnerabilities", "Advisory")
879+
adv = Advisory.objects.all()
880+
881+
assert adv.filter(created_by="vulnerabilities.importers.pysec.PyPIImporter").count() == 0
882+
assert adv.filter(created_by="pysec_importer").count() == 1

0 commit comments

Comments
 (0)