Skip to content

Commit 094c2bf

Browse files
authored
Merge pull request #1574 from aboutcode-org/npm-importer-pipeline
Migrate Npm importer to aboutcode pipeline
2 parents 9c6c219 + 58e738c commit 094c2bf

21 files changed

+287
-65
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from vulnerabilities.importers import istio
2626
from vulnerabilities.importers import mozilla
2727
from vulnerabilities.importers import nginx
28-
from vulnerabilities.importers import npm
2928
from vulnerabilities.importers import nvd
3029
from vulnerabilities.importers import openssl
3130
from vulnerabilities.importers import oss_fuzz
@@ -40,13 +39,14 @@
4039
from vulnerabilities.importers import ubuntu_usn
4140
from vulnerabilities.importers import vulnrichment
4241
from vulnerabilities.importers import xen
42+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
43+
from vulnerabilities.pipelines import npm_importer
4344
from vulnerabilities.pipelines import pypa_importer
4445

4546
IMPORTERS_REGISTRY = [
4647
nvd.NVDImporter,
4748
github.GitHubAPIImporter,
4849
gitlab.GitLabAPIImporter,
49-
npm.NpmImporter,
5050
nginx.NginxImporter,
5151
pysec.PyPIImporter,
5252
alpine_linux.AlpineImporter,
@@ -77,6 +77,10 @@
7777
epss.EPSSImporter,
7878
vulnrichment.VulnrichImporter,
7979
pypa_importer.PyPaImporterPipeline,
80+
npm_importer.NpmImporterPipeline,
8081
]
8182

82-
IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
83+
IMPORTERS_REGISTRY = {
84+
x.pipeline_id if issubclass(x, VulnerableCodeBaseImporterPipeline) else x.qualified_name: x
85+
for x in IMPORTERS_REGISTRY
86+
}

vulnerabilities/improvers/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from vulnerabilities.improvers import valid_versions
1111
from vulnerabilities.improvers import vulnerability_kev
1212
from vulnerabilities.improvers import vulnerability_status
13+
from vulnerabilities.pipelines import VulnerableCodePipeline
1314
from vulnerabilities.pipelines import flag_ghost_packages
1415

1516
IMPROVERS_REGISTRY = [
@@ -34,4 +35,7 @@
3435
flag_ghost_packages.FlagGhostPackagePipeline,
3536
]
3637

37-
IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}
38+
IMPROVERS_REGISTRY = {
39+
x.pipeline_id if issubclass(x, VulnerableCodePipeline) else x.qualified_name: x
40+
for x in IMPROVERS_REGISTRY
41+
}

vulnerabilities/improvers/valid_versions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@
3737
from vulnerabilities.importers.gitlab import GitLabAPIImporter
3838
from vulnerabilities.importers.istio import IstioImporter
3939
from vulnerabilities.importers.nginx import NginxImporter
40-
from vulnerabilities.importers.npm import NpmImporter
4140
from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter
4241
from vulnerabilities.importers.ruby import RubyImporter
4342
from vulnerabilities.importers.ubuntu import UbuntuImporter
4443
from vulnerabilities.improver import MAX_CONFIDENCE
4544
from vulnerabilities.improver import Improver
4645
from vulnerabilities.improver import Inference
4746
from vulnerabilities.models import Advisory
47+
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
4848
from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage
4949
from vulnerabilities.utils import clean_nginx_git_tag
5050
from vulnerabilities.utils import get_affected_packages_by_patched_package
@@ -436,7 +436,7 @@ class GitHubBasicImprover(ValidVersionImprover):
436436

437437

438438
class NpmImprover(ValidVersionImprover):
439-
importer = NpmImporter
439+
importer = NpmImporterPipeline
440440
ignorable_versions = []
441441

442442

vulnerabilities/management/commands/import.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,15 @@ def import_data(self, importers):
5757
failed_importers = []
5858

5959
for importer in importers:
60-
self.stdout.write(f"Importing data using {importer.qualified_name}")
6160
if issubclass(importer, VulnerableCodeBaseImporterPipeline):
61+
self.stdout.write(f"Importing data using {importer.pipeline_id}")
6262
status, error = importer().execute()
6363
if status != 0:
6464
self.stdout.write(error)
65-
failed_importers.append(importer.qualified_name)
65+
failed_importers.append(importer.pipeline_id)
6666
continue
6767

68+
self.stdout.write(f"Importing data using {importer.qualified_name}")
6869
try:
6970
ImportRunner(importer).run()
7071
self.stdout.write(

vulnerabilities/management/commands/improve.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,15 @@ def improve_data(self, improvers):
5656
failed_improvers = []
5757

5858
for improver in improvers:
59-
self.stdout.write(f"Improving data using {improver.qualified_name}")
6059
if issubclass(improver, VulnerableCodePipeline):
60+
self.stdout.write(f"Improving data using {improver.pipeline_id}")
6161
status, error = improver().execute()
6262
if status != 0:
6363
self.stdout.write(error)
64-
failed_improvers.append(improver.qualified_name)
64+
failed_improvers.append(improver.pipeline_id)
6565
continue
6666

67+
self.stdout.write(f"Improving data using {improver.qualified_name}")
6768
try:
6869
ImproveRunner(improver_class=improver).run()
6970
self.stdout.write(
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Generated by Django 4.2.15 on 2024-09-12 12:56
2+
3+
from django.db import migrations
4+
5+
"""
6+
Update the created_by field on Advisory from the old qualified_name
7+
to the new pipeline_id.
8+
"""
9+
10+
11+
def update_created_by(apps, schema_editor):
12+
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
13+
from vulnerabilities.pipelines.pypa_importer import PyPaImporterPipeline
14+
15+
Advisory = apps.get_model("vulnerabilities", "Advisory")
16+
Advisory.objects.filter(created_by="vulnerabilities.importers.npm.NpmImporter").update(
17+
created_by=NpmImporterPipeline.pipeline_id
18+
)
19+
Advisory.objects.filter(created_by="vulnerabilities.importers.pypa.PyPaImporter").update(
20+
created_by=PyPaImporterPipeline.pipeline_id
21+
)
22+
23+
24+
25+
def reverse_update_created_by(apps, schema_editor):
26+
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
27+
from vulnerabilities.pipelines.pypa_importer import PyPaImporterPipeline
28+
29+
Advisory = apps.get_model("vulnerabilities", "Advisory")
30+
Advisory.objects.filter(created_by=NpmImporterPipeline.pipeline_id).update(
31+
created_by="vulnerabilities.importers.npm.NpmImporter"
32+
)
33+
Advisory.objects.filter(created_by=PyPaImporterPipeline.pipeline_id).update(
34+
created_by="vulnerabilities.importers.pypa.PyPaImporter"
35+
)
36+
37+
38+
class Migration(migrations.Migration):
39+
40+
dependencies = [
41+
("vulnerabilities", "0063_alter_packagechangelog_software_version_and_more"),
42+
]
43+
44+
operations = [
45+
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
46+
]

vulnerabilities/pipelines/__init__.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828

2929
class VulnerableCodePipeline(BasePipeline):
30+
pipeline_id = None # Unique Pipeline ID
31+
3032
def log(self, message, level=logging.INFO):
3133
"""Log the given `message` to the current module logger and execution_log."""
3234
now_local = datetime.now(timezone.utc).astimezone()
@@ -36,11 +38,12 @@ def log(self, message, level=logging.INFO):
3638
self.append_to_log(message)
3739

3840
@classproperty
39-
def qualified_name(cls):
40-
"""
41-
Fully qualified name prefixed with the module name of the pipeline used in logging.
42-
"""
43-
return f"{cls.__module__}.{cls.__qualname__}"
41+
def pipeline_id(cls):
42+
"""Return unique pipeline_id set in cls.pipeline_id"""
43+
44+
if cls.pipeline_id is None or cls.pipeline_id == "":
45+
raise NotImplementedError("pipeline_id is not defined or is empty")
46+
return cls.pipeline_id
4447

4548

4649
class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline):
@@ -52,6 +55,7 @@ class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline):
5255
Also override the ``steps`` and ``advisory_confidence`` as needed.
5356
"""
5457

58+
pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module.
5559
license_url = None
5660
spdx_license_expression = None
5761
repo_url = None
@@ -89,7 +93,7 @@ def collect_and_store_advisories(self):
8993
for advisory in progress.iter(self.collect_advisories()):
9094
if _obj := insert_advisory(
9195
advisory=advisory,
92-
pipeline_name=self.qualified_name,
96+
pipeline_id=self.pipeline_id,
9397
logger=self.log,
9498
):
9599
collected_advisory_count += 1
@@ -98,7 +102,7 @@ def collect_and_store_advisories(self):
98102

99103
def import_new_advisories(self):
100104
new_advisories = Advisory.objects.filter(
101-
created_by=self.qualified_name,
105+
created_by=self.pipeline_id,
102106
date_imported__isnull=True,
103107
)
104108

@@ -119,7 +123,7 @@ def import_advisory(self, advisory: Advisory) -> int:
119123
try:
120124
import_advisory(
121125
advisory=advisory,
122-
pipeline_name=self.qualified_name,
126+
pipeline_id=self.pipeline_id,
123127
confidence=self.advisory_confidence,
124128
logger=self.log,
125129
)

vulnerabilities/pipelines/flag_ghost_packages.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
class FlagGhostPackagePipeline(VulnerableCodePipeline):
2424
"""Detect and flag packages that do not exist upstream."""
2525

26+
pipeline_id = "flag_ghost_packages"
27+
2628
@classmethod
2729
def steps(cls):
2830
return (cls.flag_ghost_packages,)

vulnerabilities/importers/npm.py renamed to vulnerabilities/pipelines/npm_importer.py

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,44 +11,58 @@
1111

1212
from pathlib import Path
1313
from typing import Iterable
14-
from typing import List
1514

1615
import pytz
1716
from dateutil.parser import parse
17+
from fetchcode.vcs import fetch_via_vcs
1818
from packageurl import PackageURL
1919
from univers.version_range import NpmVersionRange
2020

2121
from vulnerabilities.importer import AdvisoryData
2222
from vulnerabilities.importer import AffectedPackage
23-
from vulnerabilities.importer import Importer
2423
from vulnerabilities.importer import Reference
2524
from vulnerabilities.importer import VulnerabilitySeverity
25+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
2626
from vulnerabilities.severity_systems import CVSSV2
2727
from vulnerabilities.severity_systems import CVSSV3
2828
from vulnerabilities.utils import build_description
2929
from vulnerabilities.utils import load_json
3030

3131

32-
class NpmImporter(Importer):
32+
class NpmImporterPipeline(VulnerableCodeBaseImporterPipeline):
33+
"""Collect advisories from nodejs GitHub repository."""
34+
35+
pipeline_id = "npm_importer"
36+
3337
spdx_license_expression = "MIT"
3438
license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md"
3539
repo_url = "git+https://github.com/nodejs/security-wg"
3640
importer_name = "Npm Importer"
3741

38-
def advisory_data(self) -> Iterable[AdvisoryData]:
39-
try:
40-
self.clone(self.repo_url)
41-
path = Path(self.vcs_response.dest_dir)
42+
@classmethod
43+
def steps(cls):
44+
return (
45+
cls.clone,
46+
cls.collect_and_store_advisories,
47+
cls.import_new_advisories,
48+
cls.clean_downloads,
49+
)
50+
51+
def clone(self):
52+
self.log(f"Cloning `{self.repo_url}`")
53+
self.vcs_response = fetch_via_vcs(self.repo_url)
4254

43-
vuln = path / "vuln"
44-
npm_vulns = vuln / "npm"
45-
for file in npm_vulns.glob("*.json"):
46-
yield from self.to_advisory_data(file)
47-
finally:
48-
if self.vcs_response:
49-
self.vcs_response.delete()
55+
def advisories_count(self):
56+
vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm"
57+
return sum(1 for _ in vuln_directory.glob("*.json"))
5058

51-
def to_advisory_data(self, file: Path) -> List[AdvisoryData]:
59+
def collect_advisories(self) -> Iterable[AdvisoryData]:
60+
vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm"
61+
62+
for advisory in vuln_directory.glob("*.json"):
63+
yield from self.to_advisory_data(advisory)
64+
65+
def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
5266
data = load_json(file)
5367
id = data.get("id")
5468
description = data.get("overview") or ""
@@ -144,3 +158,8 @@ def get_affected_package(self, data, package_name):
144158
affected_version_range=affected_version_range,
145159
fixed_version=fixed_version,
146160
)
161+
162+
def clean_downloads(self):
163+
if self.vcs_response:
164+
self.log(f"Removing cloned repository")
165+
self.vcs_response.delete()

vulnerabilities/pipelines/pypa_importer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from fetchcode.vcs import fetch_via_vcs
1515

1616
from vulnerabilities.importer import AdvisoryData
17-
from vulnerabilities.importers.osv import parse_advisory_data
1817
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
1918
from vulnerabilities.utils import get_advisory_url
2019

@@ -24,6 +23,8 @@
2423
class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline):
2524
"""Collect advisories from PyPA GitHub repository."""
2625

26+
pipeline_id = "pypa_importer"
27+
2728
spdx_license_expression = "CC-BY-4.0"
2829
license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE"
2930
repo_url = "git+https://github.com/pypa/advisory-database"
@@ -47,9 +48,10 @@ def advisories_count(self):
4748
return sum(1 for _ in vulns_directory.rglob("*.yaml"))
4849

4950
def collect_advisories(self) -> Iterable[AdvisoryData]:
51+
from vulnerabilities.importers.osv import parse_advisory_data
52+
5053
base_directory = Path(self.vcs_response.dest_dir)
5154
vulns_directory = base_directory / "vulns"
52-
self.advisories_count = sum(1 for _ in vulns_directory.rglob("*.yaml"))
5355

5456
for advisory in vulns_directory.rglob("*.yaml"):
5557
advisory_url = get_advisory_url(

0 commit comments

Comments
 (0)