33# VulnerableCode is a trademark of nexB Inc.
44# SPDX-License-Identifier: Apache-2.0
55# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6- # See https://github.com/nexB /vulnerablecode for support or download.
6+ # See https://github.com/aboutcode-org /vulnerablecode for support or download.
77# See https://aboutcode.org for more information about nexB OSS projects.
88#
99
10- import logging
1110from typing import Iterable
12- from typing import List
1311from typing import NamedTuple
1412
1513import requests
1614from bs4 import BeautifulSoup
17- from django .db .models .query import QuerySet
1815from packageurl import PackageURL
1916from univers .version_range import NginxVersionRange
2017from univers .versions import NginxVersion
2118
2219from vulnerabilities .importer import AdvisoryData
2320from vulnerabilities .importer import AffectedPackage
24- from vulnerabilities .importer import Importer
2521from vulnerabilities .importer import Reference
2622from vulnerabilities .importer import VulnerabilitySeverity
23+ from vulnerabilities .pipelines import VulnerableCodeBaseImporterPipeline
2724from vulnerabilities .severity_systems import GENERIC
2825
29- logger = logging .getLogger (__name__ )
3026
27+ class NginxImporterPipeline (VulnerableCodeBaseImporterPipeline ):
28+ """Collect Nginx security advisories."""
3129
32- class NginxImporter (Importer ):
33-
34- url = "https://nginx.org/en/security_advisories.html"
30+ pipeline_id = "nginx_importer"
3531
3632 spdx_license_expression = "BSD-2-Clause"
3733 license_url = "https://nginx.org/LICENSE"
34+ url = "https://nginx.org/en/security_advisories.html"
3835 importer_name = "Nginx Importer"
3936
40- def advisory_data (self ) -> Iterable [AdvisoryData ]:
41- text = self .fetch ()
42- yield from advisory_data_from_text (text )
37+ @classmethod
38+ def steps (cls ):
39+ return (
40+ cls .fetch ,
41+ cls .collect_and_store_advisories ,
42+ cls .import_new_advisories ,
43+ )
4344
4445 def fetch (self ):
45- return requests .get (self .url ).content
46+ self .log (f"Fetch `{ self .url } `" )
47+ self .advisory_data = requests .get (self .url ).text
4648
49+ def advisories_count (self ):
50+ return self .advisory_data .count ("<li><p>" )
4751
48- def advisory_data_from_text ( text ) :
49- """
50- Yield AdvisoryData from the ``text`` of the nginx security advisories HTML
51- web page.
52- """
53- soup = BeautifulSoup (text , features = "lxml" )
54- vuln_list = soup .select ("li p" )
55- for vuln_info in vuln_list :
56- ngnix_adv = parse_advisory_data_from_paragraph (vuln_info )
57- yield to_advisory_data (ngnix_adv )
52+ def collect_advisories ( self ) -> Iterable [ AdvisoryData ] :
53+ """
54+ Yield AdvisoryData from nginx security advisories HTML
55+ web page.
56+ """
57+ soup = BeautifulSoup (self . advisory_data , features = "lxml" )
58+ vulnerability_list = soup .select ("li p" )
59+ for vulnerability_info in vulnerability_list :
60+ ngnix_advisory = parse_advisory_data_from_paragraph (vulnerability_info )
61+ yield to_advisory_data (ngnix_advisory )
5862
5963
6064class NginxAdvisory (NamedTuple ):
@@ -69,15 +73,15 @@ def to_dict(self):
6973 return self ._asdict ()
7074
7175
72- def to_advisory_data (ngnx_adv : NginxAdvisory ) -> AdvisoryData :
76+ def to_advisory_data (nginx_adv : NginxAdvisory ) -> AdvisoryData :
7377 """
7478 Return AdvisoryData from an NginxAdvisory tuple.
7579 """
7680 package_name = "nginx"
7781 package_type = "nginx"
7882 qualifiers = {}
7983
80- _ , _ , affected_version_range = ngnx_adv .vulnerable .partition (":" )
84+ _ , _ , affected_version_range = nginx_adv .vulnerable .partition (":" )
8185 if "nginx/Windows" in affected_version_range :
8286 qualifiers ["os" ] = "windows"
8387 affected_version_range = affected_version_range .replace ("nginx/Windows" , "" )
@@ -87,7 +91,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData:
8791 affected_version_range = NginxVersionRange .from_native (affected_version_range )
8892
8993 affected_packages = []
90- _ , _ , fixed_versions = ngnx_adv .not_vulnerable .partition (":" )
94+ _ , _ , fixed_versions = nginx_adv .not_vulnerable .partition (":" )
9195
9296 for fixed_version in fixed_versions .split ("," ):
9397 fixed_version = fixed_version .rstrip ("+" )
@@ -112,17 +116,17 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData:
112116 )
113117
114118 return AdvisoryData (
115- aliases = ngnx_adv .aliases ,
116- summary = ngnx_adv .summary ,
119+ aliases = nginx_adv .aliases ,
120+ summary = nginx_adv .summary ,
117121 affected_packages = affected_packages ,
118- references = ngnx_adv .references ,
122+ references = nginx_adv .references ,
119123 url = "https://nginx.org/en/security_advisories.html" ,
120124 )
121125
122126
123- def parse_advisory_data_from_paragraph (vuln_info ):
127+ def parse_advisory_data_from_paragraph (vulnerability_info ):
124128 """
125- Return an NginxAdvisory from a ``vuln_info `` bs4 paragraph.
129+ Return an NginxAdvisory from a ``vulnerability_info `` bs4 paragraph.
126130
127131 An advisory paragraph, without html markup, looks like this:
128132
@@ -145,7 +149,7 @@ def parse_advisory_data_from_paragraph(vuln_info):
145149
146150 # we iterate on the children to accumulate values in variables
147151 # FIXME: using an explicit xpath-like query could be simpler
148- for child in vuln_info .children :
152+ for child in vulnerability_info .children :
149153 if is_first :
150154 summary = child
151155 is_first = False
0 commit comments