Skip to content

Commit 0c04da4

Browse files
authored
Merge pull request #1042 from nexB/972-migrate-apache-kafka-importer
Modify apache_kafka.py and related tests for migration
2 parents 770d22f + e33e18d commit 0c04da4

File tree

10 files changed

+947
-169
lines changed

10 files changed

+947
-169
lines changed

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@ Release notes
22
=============
33

44

5+
Next Release
6+
------------
7+
8+
- We re-enabled support for the Apache Kafka vulnerabilities advisories importer.
9+
10+
511
Version v32.0.0rc2
612
--------------------
713

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from vulnerabilities.importers import alpine_linux
1111
from vulnerabilities.importers import apache_httpd
12+
from vulnerabilities.importers import apache_kafka
1213
from vulnerabilities.importers import apache_tomcat
1314
from vulnerabilities.importers import archlinux
1415
from vulnerabilities.importers import debian
@@ -63,6 +64,7 @@
6364
xen.XenImporter,
6465
ubuntu_usn.UbuntuUSNImporter,
6566
fireeye.FireyeImporter,
67+
apache_kafka.ApacheKafkaImporter,
6668
]
6769

6870
IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}

vulnerabilities/importers/apache_kafka.py

Lines changed: 144 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -7,120 +7,179 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10-
import asyncio
1110

11+
import pytz
1212
import requests
1313
from bs4 import BeautifulSoup
14+
from dateutil.parser import parse
1415
from packageurl import PackageURL
15-
from univers.version_range import VersionRange
16-
from univers.versions import MavenVersion
1716

1817
from vulnerabilities.importer import AdvisoryData
18+
from vulnerabilities.importer import AffectedPackage
1919
from vulnerabilities.importer import Importer
2020
from vulnerabilities.importer import Reference
21-
from vulnerabilities.package_managers import GitHubTagsAPI
22-
from vulnerabilities.utils import nearest_patched_package
2321

24-
GH_PAGE_URL = "https://raw.githubusercontent.com/apache/kafka-site/asf-site/cve-list.html"
25-
ASF_PAGE_URL = "https://kafka.apache.org/cve-list"
22+
# The entries below with `"action": "omit"` have no useful/reportable fixed or affected version data.
23+
# See https://kafka.apache.org/cve-list
24+
affected_version_range_mapping = {
25+
"CVE-2022-34917": {
26+
"action": "include",
27+
"2.8.0 - 2.8.1, 3.0.0 - 3.0.1, 3.1.0 - 3.1.1, 3.2.0 - 3.2.1": "affected",
28+
"2.8.2, 3.0.2, 3.1.2, 3.2.3": "fixed",
29+
"affected_version_range": "vers:apache/>=2.8.0|<=2.8.1|!=2.8.2|>=3.0.0|<=3.0.1|!=3.0.2|>=3.1.0|<=3.1.1|!=3.1.2|>=3.2.0|<=3.2.1|!=3.2.3",
30+
"Issue announced": "19 Sep 2022",
31+
},
32+
"CVE-2022-23302": {
33+
"action": "omit",
34+
},
35+
"CVE-2022-23305": {
36+
"action": "omit",
37+
},
38+
"CVE-2022-23307": {
39+
"action": "omit",
40+
},
41+
"CVE-2021-45046": {
42+
"action": "omit",
43+
},
44+
"CVE-2021-44228": {
45+
"action": "omit",
46+
},
47+
"CVE-2021-4104": {
48+
"action": "omit",
49+
},
50+
"CVE-2021-38153": {
51+
"action": "include",
52+
"2.0.0, 2.0.1, 2.1.0, 2.1.1, 2.2.0, 2.2.1, 2.2.2, 2.3.0, 2.3.1, 2.4.0, 2.4.1, 2.5.0, 2.5.1, 2.6.0, 2.6.1, 2.6.2, 2.7.0, 2.7.1, 2.8.0.": "affected",
53+
"2.6.3, 2.7.2, 2.8.1, 3.0.0 and later": "fixed",
54+
"affected_version_range": "vers:apache/2.0.0|2.0.1|2.1.0|2.1.1|2.2.0|2.2.1|2.2.2|2.3.0|2.3.1|2.4.0|2.4.1|2.5.0|2.5.1|2.6.0|2.6.1|2.6.2|!=2.6.3|2.7.0|2.7.1|!=2.7.2|2.8.0.|!=2.8.1|<3.0.0",
55+
"Issue announced": "21 Sep 2021",
56+
},
57+
"CVE-2019-12399": {
58+
"action": "include",
59+
"2.0.0, 2.0.1, 2.1.0, 2.1.1, 2.2.0, 2.2.1, 2.3.0": "affected",
60+
"2.2.2, 2.3.1 and later": "fixed",
61+
"affected_version_range": "vers:apache/2.0.0|2.0.1|2.1.0|2.1.1|2.2.0|2.2.1|!=2.2.2|2.3.0|<2.3.1",
62+
"Issue announced": "13 Jan 2020",
63+
},
64+
"CVE-2018-17196": {
65+
"action": "include",
66+
"0.11.0.0 to 2.1.0": "affected",
67+
"2.1.1 and later": "fixed",
68+
"affected_version_range": "vers:apache/>=0.11.0.0|<2.1.1",
69+
"Issue announced": "10 July 2019",
70+
},
71+
"CVE-2018-1288": {
72+
"action": "include",
73+
"0.9.0.0 to 0.9.0.1, 0.10.0.0 to 0.10.2.1, 0.11.0.0 to 0.11.0.2, 1.0.0": "affected",
74+
"0.10.2.2, 0.11.0.3, 1.0.1, 1.1.0": "fixed",
75+
"affected_version_range": "vers:apache/>=0.9.0.0|<=0.9.0.1|>=0.10.0.0|<=0.10.2.1|!=0.10.2.2|>=0.11.0.0|<=0.11.0.2|!=0.11.0.3|1.0.0|!=1.0.1|!=1.1.0",
76+
"Issue announced": "26 July 2018",
77+
},
78+
"CVE-2017-12610": {
79+
"action": "include",
80+
"0.10.0.0 to 0.10.2.1, 0.11.0.0 to 0.11.0.1": "affected",
81+
"0.10.2.2, 0.11.0.2, 1.0.0": "fixed",
82+
"affected_version_range": "vers:apache/>=0.10.0.0|<=0.10.2.1|!=0.10.2.2|>=0.11.0.0|<=0.11.0.1|!=0.11.0.2|!=1.0.0",
83+
"Issue announced": "26 July 2018",
84+
},
85+
}
2686

2787

2888
class ApacheKafkaImporter(Importer):
89+
90+
GH_PAGE_URL = "https://raw.githubusercontent.com/apache/kafka-site/asf-site/cve-list.html"
91+
ASF_PAGE_URL = "https://kafka.apache.org/cve-list"
92+
spdx_license_expression = "Apache-2.0"
93+
license_url = "https://www.apache.org/licenses/"
94+
2995
@staticmethod
30-
def fetch_advisory_page():
31-
page = requests.get(GH_PAGE_URL)
96+
def fetch_advisory_page(self):
97+
page = requests.get(self.GH_PAGE_URL)
3298
return page.content
3399

34-
def set_api(self):
35-
self.version_api = GitHubTagsAPI()
36-
asyncio.run(self.version_api.load_api(["apache/kafka"]))
100+
def advisory_data(self):
101+
advisory_page = self.fetch_advisory_page(self)
37102

38-
def updated_advisories(self):
39-
advisory_page = self.fetch_advisory_page()
40-
self.set_api()
41103
parsed_data = self.to_advisory(advisory_page)
42-
return self.batch_advisories(parsed_data)
104+
return parsed_data
43105

44106
def to_advisory(self, advisory_page):
45107
advisories = []
108+
46109
advisory_page = BeautifulSoup(advisory_page, features="lxml")
47110
cve_section_beginnings = advisory_page.find_all("h2")
48111
for cve_section_beginning in cve_section_beginnings:
49-
cve_id = cve_section_beginning.text.split("\n")[0]
112+
# This sometimes includes text that follows the CVE on the same line -- sometimes there is a carriage return, sometimes there is not
113+
# cve_id = cve_section_beginning.text.split("\n")[0]
114+
# This is superior, gets only the cve id and no following text.
115+
cve_id = cve_section_beginning.get("id")
116+
50117
cve_description_paragraph = cve_section_beginning.find_next_sibling("p")
118+
119+
description = str(cve_description_paragraph.get_text())
120+
description = " ".join(description.split())
121+
51122
cve_data_table = cve_section_beginning.find_next_sibling("table")
52123
cve_data_table_rows = cve_data_table.find_all("tr")
53124
affected_versions_row = cve_data_table_rows[0]
54125
fixed_versions_row = cve_data_table_rows[1]
55-
affected_version_ranges = to_version_ranges(
56-
affected_versions_row.find_all("td")[1].text
57-
)
58-
fixed_version_ranges = to_version_ranges(fixed_versions_row.find_all("td")[1].text)
59-
60-
fixed_packages = [
61-
PackageURL(type="apache", name="kafka", version=version)
62-
for version in self.version_api.get("apache/kafka").valid_versions
63-
if any(
64-
[
65-
MavenVersion(version) in version_range
66-
for version_range in fixed_version_ranges
67-
]
68-
)
69-
]
70-
71-
affected_packages = [
72-
PackageURL(type="apache", name="kafka", version=version)
73-
for version in self.version_api.get("apache/kafka").valid_versions
74-
if any(
75-
[
76-
MavenVersion(version) in version_range
77-
for version_range in affected_version_ranges
78-
]
79-
)
80-
]
81-
82-
advisories.append(
83-
AdvisoryData(
84-
vulnerability_id=cve_id,
85-
summary=cve_description_paragraph.text,
86-
affected_packages=nearest_patched_package(affected_packages, fixed_packages),
87-
references=[
88-
Reference(url=ASF_PAGE_URL),
89-
Reference(
90-
url=f"https://cve.mitre.org/cgi-bin/cvename.cgi?name={cve_id}",
91-
reference_id=cve_id,
92-
),
93-
],
126+
127+
# Remove leading white space after initial comma
128+
affected_versions = affected_versions_row.find_all("td")[1].text
129+
130+
affected_versions_clean = [v.strip() for v in affected_versions.split(",")]
131+
affected_versions_clean = [v for v in affected_versions if v]
132+
133+
fixed_versions = fixed_versions_row.find_all("td")[1].text
134+
135+
fixed_versions_clean = [v.strip() for v in fixed_versions.split(",")]
136+
fixed_versions_clean = [v for v in fixed_versions if v]
137+
138+
# This throws a KeyError if the opening h2 tag `id` data changes or is not in the
139+
# hard-coded affected_version_range_mapping dictionary.
140+
cve_version_mapping = affected_version_range_mapping[cve_id]
141+
if cve_version_mapping["action"] == "include":
142+
# These 2 variables (not used elsewhere) trigger the KeyError for changed/missing data.
143+
check_affected_versions_key = cve_version_mapping[affected_versions]
144+
check_fixed_versions_key = cve_version_mapping[fixed_versions]
145+
146+
references = [
147+
Reference(
148+
url=self.ASF_PAGE_URL,
149+
reference_id=cve_id,
150+
),
151+
Reference(
152+
url=f"{self.ASF_PAGE_URL}#{cve_id}",
153+
reference_id=cve_id,
154+
),
155+
Reference(
156+
url=f"https://nvd.nist.gov/vuln/detail/{cve_id}",
157+
reference_id=cve_id,
158+
),
159+
]
160+
161+
affected_packages = []
162+
affected_package = AffectedPackage(
163+
package=PackageURL(
164+
name="kafka",
165+
type="apache",
166+
),
167+
affected_version_range=cve_version_mapping["affected_version_range"],
94168
)
95-
)
96-
return advisories
169+
affected_packages.append(affected_package)
97170

171+
date_published = parse(cve_version_mapping["Issue announced"]).replace(
172+
tzinfo=pytz.UTC
173+
)
98174

99-
def to_version_ranges(version_range_text):
100-
version_ranges = []
101-
range_expressions = version_range_text.split(",")
102-
for range_expression in range_expressions:
103-
if "to" in range_expression:
104-
# eg range_expression == "3.2.0 to 3.2.1"
105-
lower_bound, upper_bound = range_expression.split("to")
106-
lower_bound = f">={lower_bound}"
107-
upper_bound = f"<={upper_bound}"
108-
version_ranges.append(
109-
VersionRange.from_scheme_version_spec_string(
110-
"maven", f"{lower_bound},{upper_bound}"
175+
advisories.append(
176+
AdvisoryData(
177+
aliases=[cve_id],
178+
summary=description,
179+
affected_packages=affected_packages,
180+
references=references,
181+
date_published=date_published,
182+
)
111183
)
112-
)
113-
114-
elif "and later" in range_expression:
115-
# eg range_expression == "2.1.1 and later"
116-
range_expression = range_expression.replace("and later", "")
117-
version_ranges.append(
118-
VersionRange.from_scheme_version_spec_string("maven", f">={range_expression}")
119-
)
120-
121-
else:
122-
# eg range_expression == "3.0.0"
123-
version_ranges.append(
124-
VersionRange.from_scheme_version_spec_string("maven", range_expression)
125-
)
126-
return version_ranges
184+
185+
return advisories

vulnerabilities/tests/conftest.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ def no_rmtree(monkeypatch):
2525
# Step 2: Run test for importer only if it is activated (pytestmark = pytest.mark.skipif(...))
2626
# Step 3: Migrate all the tests
2727
collect_ignore = [
28-
"test_apache_kafka.py",
2928
"test_models.py",
3029
"test_package_managers.py",
3130
"test_ruby.py",

0 commit comments

Comments
 (0)