Skip to content

Commit f35640f

Browse files
authored
Merge pull request #1575 from aboutcode-org/nginx-importer-pipeline
Migrate Nginx importer to aboutcode pipeline
2 parents 3637b1c + 6773d76 commit f35640f

File tree

9 files changed

+154
-75
lines changed

9 files changed

+154
-75
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from vulnerabilities.importers import gitlab
2525
from vulnerabilities.importers import istio
2626
from vulnerabilities.importers import mozilla
27-
from vulnerabilities.importers import nginx
2827
from vulnerabilities.importers import nvd
2928
from vulnerabilities.importers import openssl
3029
from vulnerabilities.importers import oss_fuzz
@@ -40,14 +39,14 @@
4039
from vulnerabilities.importers import vulnrichment
4140
from vulnerabilities.importers import xen
4241
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
42+
from vulnerabilities.pipelines import nginx_importer
4343
from vulnerabilities.pipelines import npm_importer
4444
from vulnerabilities.pipelines import pypa_importer
4545

4646
IMPORTERS_REGISTRY = [
4747
nvd.NVDImporter,
4848
github.GitHubAPIImporter,
4949
gitlab.GitLabAPIImporter,
50-
nginx.NginxImporter,
5150
pysec.PyPIImporter,
5251
alpine_linux.AlpineImporter,
5352
openssl.OpensslImporter,
@@ -78,6 +77,7 @@
7877
vulnrichment.VulnrichImporter,
7978
pypa_importer.PyPaImporterPipeline,
8079
npm_importer.NpmImporterPipeline,
80+
nginx_importer.NginxImporterPipeline,
8181
]
8282

8383
IMPORTERS_REGISTRY = {

vulnerabilities/improvers/valid_versions.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,15 @@
3636
from vulnerabilities.importers.github_osv import GithubOSVImporter
3737
from vulnerabilities.importers.gitlab import GitLabAPIImporter
3838
from vulnerabilities.importers.istio import IstioImporter
39-
from vulnerabilities.importers.nginx import NginxImporter
4039
from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter
4140
from vulnerabilities.importers.ruby import RubyImporter
4241
from vulnerabilities.importers.ubuntu import UbuntuImporter
4342
from vulnerabilities.improver import MAX_CONFIDENCE
4443
from vulnerabilities.improver import Improver
4544
from vulnerabilities.improver import Inference
4645
from vulnerabilities.models import Advisory
46+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
47+
from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline
4748
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
4849
from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage
4950
from vulnerabilities.utils import clean_nginx_git_tag
@@ -63,6 +64,8 @@ class ValidVersionImprover(Improver):
6364

6465
@property
6566
def interesting_advisories(self) -> QuerySet:
67+
if issubclass(self.importer, VulnerableCodeBaseImporterPipeline):
68+
return Advisory.objects.filter(Q(created_by=self.importer.pipeline_id)).paginated()
6669
return Advisory.objects.filter(Q(created_by=self.importer.qualified_name)).paginated()
6770

6871
def get_package_versions(
@@ -220,7 +223,7 @@ class NginxBasicImprover(Improver):
220223

221224
@property
222225
def interesting_advisories(self) -> QuerySet:
223-
return Advisory.objects.filter(created_by=NginxImporter.qualified_name).paginated()
226+
return Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).paginated()
224227

225228
def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
226229
all_versions = list(self.fetch_nginx_version_from_git_tags())
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Generated by Django 4.2.15 on 2024-09-23 13:06
2+
3+
from django.db import migrations
4+
5+
"""
6+
Update the created_by field on Advisory from the old qualified_name
7+
to the new pipeline_id.
8+
"""
9+
10+
11+
def update_created_by(apps, schema_editor):
12+
from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline
13+
14+
Advisory = apps.get_model("vulnerabilities", "Advisory")
15+
Advisory.objects.filter(created_by="vulnerabilities.importers.nginx.NginxImporter").update(
16+
created_by=NginxImporterPipeline.pipeline_id
17+
)
18+
19+
20+
21+
def reverse_update_created_by(apps, schema_editor):
22+
from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline
23+
24+
Advisory = apps.get_model("vulnerabilities", "Advisory")
25+
Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).update(
26+
created_by="vulnerabilities.importers.nginx.NginxImporter"
27+
)
28+
29+
30+
class Migration(migrations.Migration):
31+
32+
dependencies = [
33+
("vulnerabilities", "0064_update_npm_pypa_advisory_created_by"),
34+
]
35+
36+
operations = [
37+
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
38+
]

vulnerabilities/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1103,7 +1103,7 @@ class Advisory(models.Model):
11031103
max_length=100,
11041104
help_text="Fully qualified name of the importer prefixed with the"
11051105
"module name importing the advisory. Eg:"
1106-
"vulnerabilities.importers.nginx.NginxImporter",
1106+
"vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline",
11071107
)
11081108
url = models.URLField(
11091109
blank=True,

vulnerabilities/importers/nginx.py renamed to vulnerabilities/pipelines/nginx_importer.py

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,58 +3,62 @@
33
# VulnerableCode is a trademark of nexB Inc.
44
# SPDX-License-Identifier: Apache-2.0
55
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6-
# See https://github.com/nexB/vulnerablecode for support or download.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10-
import logging
1110
from typing import Iterable
12-
from typing import List
1311
from typing import NamedTuple
1412

1513
import requests
1614
from bs4 import BeautifulSoup
17-
from django.db.models.query import QuerySet
1815
from packageurl import PackageURL
1916
from univers.version_range import NginxVersionRange
2017
from univers.versions import NginxVersion
2118

2219
from vulnerabilities.importer import AdvisoryData
2320
from vulnerabilities.importer import AffectedPackage
24-
from vulnerabilities.importer import Importer
2521
from vulnerabilities.importer import Reference
2622
from vulnerabilities.importer import VulnerabilitySeverity
23+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
2724
from vulnerabilities.severity_systems import GENERIC
2825

29-
logger = logging.getLogger(__name__)
3026

27+
class NginxImporterPipeline(VulnerableCodeBaseImporterPipeline):
28+
"""Collect Nginx security advisories."""
3129

32-
class NginxImporter(Importer):
33-
34-
url = "https://nginx.org/en/security_advisories.html"
30+
pipeline_id = "nginx_importer"
3531

3632
spdx_license_expression = "BSD-2-Clause"
3733
license_url = "https://nginx.org/LICENSE"
34+
url = "https://nginx.org/en/security_advisories.html"
3835
importer_name = "Nginx Importer"
3936

40-
def advisory_data(self) -> Iterable[AdvisoryData]:
41-
text = self.fetch()
42-
yield from advisory_data_from_text(text)
37+
@classmethod
38+
def steps(cls):
39+
return (
40+
cls.fetch,
41+
cls.collect_and_store_advisories,
42+
cls.import_new_advisories,
43+
)
4344

4445
def fetch(self):
45-
return requests.get(self.url).content
46+
self.log(f"Fetch `{self.url}`")
47+
self.advisory_data = requests.get(self.url).text
4648

49+
def advisories_count(self):
50+
return self.advisory_data.count("<li><p>")
4751

48-
def advisory_data_from_text(text):
49-
"""
50-
Yield AdvisoryData from the ``text`` of the nginx security advisories HTML
51-
web page.
52-
"""
53-
soup = BeautifulSoup(text, features="lxml")
54-
vuln_list = soup.select("li p")
55-
for vuln_info in vuln_list:
56-
ngnix_adv = parse_advisory_data_from_paragraph(vuln_info)
57-
yield to_advisory_data(ngnix_adv)
52+
def collect_advisories(self) -> Iterable[AdvisoryData]:
53+
"""
54+
Yield AdvisoryData from nginx security advisories HTML
55+
web page.
56+
"""
57+
soup = BeautifulSoup(self.advisory_data, features="lxml")
58+
vulnerability_list = soup.select("li p")
59+
for vulnerability_info in vulnerability_list:
60+
ngnix_advisory = parse_advisory_data_from_paragraph(vulnerability_info)
61+
yield to_advisory_data(ngnix_advisory)
5862

5963

6064
class NginxAdvisory(NamedTuple):
@@ -69,15 +73,15 @@ def to_dict(self):
6973
return self._asdict()
7074

7175

72-
def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData:
76+
def to_advisory_data(nginx_adv: NginxAdvisory) -> AdvisoryData:
7377
"""
7478
Return AdvisoryData from an NginxAdvisory tuple.
7579
"""
7680
package_name = "nginx"
7781
package_type = "nginx"
7882
qualifiers = {}
7983

80-
_, _, affected_version_range = ngnx_adv.vulnerable.partition(":")
84+
_, _, affected_version_range = nginx_adv.vulnerable.partition(":")
8185
if "nginx/Windows" in affected_version_range:
8286
qualifiers["os"] = "windows"
8387
affected_version_range = affected_version_range.replace("nginx/Windows", "")
@@ -87,7 +91,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData:
8791
affected_version_range = NginxVersionRange.from_native(affected_version_range)
8892

8993
affected_packages = []
90-
_, _, fixed_versions = ngnx_adv.not_vulnerable.partition(":")
94+
_, _, fixed_versions = nginx_adv.not_vulnerable.partition(":")
9195

9296
for fixed_version in fixed_versions.split(","):
9397
fixed_version = fixed_version.rstrip("+")
@@ -112,17 +116,17 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData:
112116
)
113117

114118
return AdvisoryData(
115-
aliases=ngnx_adv.aliases,
116-
summary=ngnx_adv.summary,
119+
aliases=nginx_adv.aliases,
120+
summary=nginx_adv.summary,
117121
affected_packages=affected_packages,
118-
references=ngnx_adv.references,
122+
references=nginx_adv.references,
119123
url="https://nginx.org/en/security_advisories.html",
120124
)
121125

122126

123-
def parse_advisory_data_from_paragraph(vuln_info):
127+
def parse_advisory_data_from_paragraph(vulnerability_info):
124128
"""
125-
Return an NginxAdvisory from a ``vuln_info`` bs4 paragraph.
129+
Return an NginxAdvisory from a ``vulnerability_info`` bs4 paragraph.
126130
127131
An advisory paragraph, without html markup, looks like this:
128132
@@ -145,7 +149,7 @@ def parse_advisory_data_from_paragraph(vuln_info):
145149

146150
# we iterate on the children to accumulate values in variables
147151
# FIXME: using an explicit xpath-like query could be simpler
148-
for child in vuln_info.children:
152+
for child in vulnerability_info.children:
149153
if is_first:
150154
summary = child
151155
is_first = False

vulnerabilities/pipelines/pypa_importer.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
1818
from vulnerabilities.utils import get_advisory_url
1919

20-
module_logger = logging.getLogger(__name__)
21-
2220

2321
class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline):
2422
"""Collect advisories from PyPA GitHub repository."""

0 commit comments

Comments
 (0)