Skip to content

Commit 47fed93

Browse files
committed
Merge remote-tracking branch 'origin/main' into faster-vuln-search
2 parents 911341b + 8eaa86a commit 47fed93

27 files changed

+940
-704
lines changed

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,11 @@ toml==0.10.2
106106
tomli==2.0.1
107107
traitlets==5.1.1
108108
typing_extensions==4.1.1
109-
univers==30.7.0
109+
univers==30.9.0
110110
urllib3==1.26.9
111111
wcwidth==0.2.5
112112
websocket-client==0.59.0
113113
yarl==1.7.2
114114
zipp==3.8.0
115115
dateparser==1.1.1
116-
fetchcode==0.1.0
116+
fetchcode==0.2.0

setup.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ install_requires =
6565

6666
#essentials
6767
packageurl-python>=0.9.4
68-
univers>=30.3.1
68+
univers>=30.9.0
6969
license-expression>=21.6.14
7070

7171
# file and data formats
@@ -82,7 +82,7 @@ install_requires =
8282
# networking
8383
GitPython>=3.1.17
8484
requests>=2.25.1
85-
fetchcode>=0.1.0
85+
fetchcode>=0.2.0
8686

8787
[options.extras_require]
8888
dev =

vulnerabilities/importer.py

Lines changed: 53 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,16 @@
2222
from typing import Set
2323
from typing import Tuple
2424

25+
import pytz
26+
from dateutil import parser as dateparser
2527
from fetchcode.vcs import fetch_via_vcs
2628
from license_expression import Licensing
2729
from packageurl import PackageURL
30+
from univers.version_range import RANGE_CLASS_BY_SCHEMES
2831
from univers.version_range import VersionRange
2932
from univers.versions import Version
3033

34+
from vulnerabilities import severity_systems
3135
from vulnerabilities.oval_parser import OvalParser
3236
from vulnerabilities.severity_systems import SCORING_SYSTEMS
3337
from vulnerabilities.severity_systems import ScoringSystem
@@ -350,13 +354,13 @@ class OvalImporter(Importer):
350354
"""
351355

352356
@staticmethod
353-
def create_purl(pkg_name: str, pkg_version: str, pkg_data: Mapping) -> PackageURL:
357+
def create_purl(pkg_name: str, pkg_data: Mapping) -> PackageURL:
354358
"""
355359
Helper method for creating different purls for subclasses without them reimplementing
356360
get_data_from_xml_doc method
357361
Note: pkg_data must include 'type' of package
358362
"""
359-
return PackageURL(name=pkg_name, version=pkg_version, **pkg_data)
363+
return PackageURL(name=pkg_name, **pkg_data)
360364

361365
@staticmethod
362366
def _collect_pkgs(parsed_oval_data: Mapping) -> Set:
@@ -390,28 +394,17 @@ def advisory_data(self) -> List[AdvisoryData]:
390394
for metadata, oval_file in self._fetch():
391395
try:
392396
oval_data = self.get_data_from_xml_doc(oval_file, metadata)
393-
yield oval_data
397+
yield from oval_data
394398
except Exception:
395399
logger.error(
396400
f"Failed to get updated_advisories: {oval_file!r} "
397401
f"with {metadata!r}:\n" + traceback.format_exc()
398402
)
399403
continue
400404

401-
def set_api(self, all_pkgs: Iterable[str]):
402-
"""
403-
This method loads the self.pkg_manager_api with the specified packages.
404-
It fetches and caches all the versions of these packages and exposes
405-
them through self.pkg_manager_api.get(<package_name>). Example
406-
407-
>> self.set_api(['electron'])
408-
Assume 'electron' has only versions 1.0.0 and 1.2.0
409-
>> assert self.pkg_manager_api.get('electron') == {'1.0.0','1.2.0'}
410-
411-
"""
412-
raise NotImplementedError
413-
414-
def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> List[AdvisoryData]:
405+
def get_data_from_xml_doc(
406+
self, xml_doc: ET.ElementTree, pkg_metadata={}
407+
) -> Iterable[AdvisoryData]:
415408
"""
416409
The orchestration method of the OvalDataSource. This method breaks an
417410
OVAL xml ElementTree into a list of `Advisory`.
@@ -422,66 +415,58 @@ def get_data_from_xml_doc(self, xml_doc: ET.ElementTree, pkg_metadata={}) -> Lis
422415
Example value of pkg_metadata:
423416
{"type":"deb","qualifiers":{"distro":"buster"} }
424417
"""
425-
426-
all_adv = []
427-
oval_doc = OvalParser(self.translations, xml_doc)
428-
raw_data = oval_doc.get_data()
429-
all_pkgs = self._collect_pkgs(raw_data)
430-
self.set_api(all_pkgs)
418+
oval_parsed_data = OvalParser(self.translations, xml_doc)
419+
raw_data = oval_parsed_data.get_data()
420+
oval_doc = oval_parsed_data.oval_document
421+
timestamp = oval_doc.getGenerator().getTimestamp()
431422

432423
# convert definition_data to Advisory objects
433424
for definition_data in raw_data:
434425
# These fields are definition level, i.e common for all elements
435426
# connected/linked to an OvalDefinition
436427
vuln_id = definition_data["vuln_id"]
437428
description = definition_data["description"]
438-
references = [Reference(url=url) for url in definition_data["reference_urls"]]
429+
severities = (
430+
[
431+
VulnerabilitySeverity(
432+
system=severity_systems.GENERIC, value=definition_data.get("severity")
433+
)
434+
]
435+
if definition_data.get("severity")
436+
else []
437+
)
438+
references = [
439+
Reference(url=url, severities=severities)
440+
for url in definition_data["reference_urls"]
441+
]
439442
affected_packages = []
440443
for test_data in definition_data["test_data"]:
441444
for package_name in test_data["package_list"]:
442-
if package_name and len(package_name) >= 50:
443-
continue
444-
445-
affected_version_range = test_data["version_ranges"] or set()
446-
version_class = version_class_by_package_type[pkg_metadata["type"]]
447-
version_scheme = version_class.scheme
448-
449-
affected_version_range = VersionRange.from_scheme_version_spec_string(
450-
version_scheme, affected_version_range
451-
)
452-
all_versions = self.pkg_manager_api.get(package_name).valid_versions
453-
454-
# FIXME: what is this 50 DB limit? that's too small for versions
455-
# FIXME: we should not drop data this way
456-
# This filter is for filtering out long versions.
457-
# 50 is limit because that's what db permits atm.
458-
all_versions = [version for version in all_versions if len(version) < 50]
459-
if not all_versions:
460-
continue
461-
462-
affected_purls = []
463-
safe_purls = []
464-
for version in all_versions:
465-
purl = self.create_purl(
466-
pkg_name=package_name,
467-
pkg_version=version,
468-
pkg_data=pkg_metadata,
445+
affected_version_range = test_data["version_ranges"]
446+
vrc = RANGE_CLASS_BY_SCHEMES[pkg_metadata["type"]]
447+
if affected_version_range:
448+
try:
449+
affected_version_range = vrc.from_native(affected_version_range)
450+
except Exception as e:
451+
logger.error(
452+
f"Failed to parse version range {affected_version_range!r} "
453+
f"for package {package_name!r}:\n{e}"
454+
)
455+
continue
456+
if package_name:
457+
affected_packages.append(
458+
AffectedPackage(
459+
package=self.create_purl(package_name, pkg_metadata),
460+
affected_version_range=affected_version_range,
461+
)
469462
)
470-
if version_class(version) in affected_version_range:
471-
affected_purls.append(purl)
472-
else:
473-
safe_purls.append(purl)
474-
475-
affected_packages.extend(
476-
nearest_patched_package(affected_purls, safe_purls),
477-
)
478-
479-
all_adv.append(
480-
AdvisoryData(
481-
summary=description,
482-
affected_packages=affected_packages,
483-
vulnerability_id=vuln_id,
484-
references=references,
485-
)
463+
date_published = dateparser.parse(timestamp)
464+
if not date_published.tzinfo:
465+
date_published = date_published.replace(tzinfo=pytz.UTC)
466+
yield AdvisoryData(
467+
aliases=[vuln_id],
468+
summary=description,
469+
affected_packages=affected_packages,
470+
references=sorted(references),
471+
date_published=date_published,
486472
)
487-
return all_adv

vulnerabilities/importers/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
#
99

1010
from vulnerabilities.importers import alpine_linux
11+
from vulnerabilities.importers import archlinux
1112
from vulnerabilities.importers import debian
13+
from vulnerabilities.importers import debian_oval
1214
from vulnerabilities.importers import github
1315
from vulnerabilities.importers import gitlab
1416
from vulnerabilities.importers import nginx
@@ -17,6 +19,7 @@
1719
from vulnerabilities.importers import pypa
1820
from vulnerabilities.importers import pysec
1921
from vulnerabilities.importers import redhat
22+
from vulnerabilities.importers import ubuntu
2023

2124
IMPORTERS_REGISTRY = [
2225
nginx.NginxImporter,
@@ -29,6 +32,9 @@
2932
debian.DebianImporter,
3033
gitlab.GitLabAPIImporter,
3134
pypa.PyPaImporter,
35+
archlinux.ArchlinuxImporter,
36+
ubuntu.UbuntuImporter,
37+
debian_oval.DebianOvalImporter,
3238
]
3339

3440
IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}

vulnerabilities/importers/alpine_linux.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from vulnerabilities.references import WireSharkReference
3232
from vulnerabilities.references import XsaReference
3333
from vulnerabilities.references import ZbxReference
34+
from vulnerabilities.utils import fetch_response
3435
from vulnerabilities.utils import is_cve
3536

3637
LOGGER = logging.getLogger(__name__)
@@ -58,16 +59,6 @@ def advisory_data(self) -> Iterable[AdvisoryData]:
5859
yield from process_record(record)
5960

6061

61-
def fetch_response(url):
62-
"""
63-
Fetch and return `response` from the `url`
64-
"""
65-
response = requests.get(url)
66-
if response.status_code == 200:
67-
return response
68-
raise Exception(f"Failed to fetch data from {url!r} with status code: {response.status_code!r}")
69-
70-
7162
def fetch_advisory_directory_links(page_response_content: str) -> List[str]:
7263
"""
7364
Return a list of advisory directory links present in `page_response_content` html string

vulnerabilities/importers/archlinux.py

Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,66 +6,64 @@
66
# See https://github.com/nexB/vulnerablecode for support or download.
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
9-
import dataclasses
10-
import json
9+
1110
from typing import Iterable
1211
from typing import List
1312
from typing import Mapping
14-
from typing import Set
1513
from urllib.request import urlopen
1614

1715
from packageurl import PackageURL
16+
from univers.version_range import ArchLinuxVersionRange
17+
from univers.versions import ArchLinuxVersion
1818

1919
from vulnerabilities import severity_systems
2020
from vulnerabilities.importer import AdvisoryData
21+
from vulnerabilities.importer import AffectedPackage
2122
from vulnerabilities.importer import Importer
2223
from vulnerabilities.importer import Reference
2324
from vulnerabilities.importer import VulnerabilitySeverity
24-
from vulnerabilities.utils import nearest_patched_package
25+
from vulnerabilities.utils import fetch_response
2526

2627

2728
class ArchlinuxImporter(Importer):
28-
def __enter__(self):
29-
self._api_response = self._fetch()
30-
31-
def updated_advisories(self) -> Set[AdvisoryData]:
32-
advisories = []
29+
url = "https://security.archlinux.org/json"
30+
spdx_license_expression = "MIT"
31+
license_url = "https://github.com/archlinux/arch-security-tracker/blob/master/LICENSE"
3332

34-
for record in self._api_response:
35-
advisories.extend(self._parse(record))
33+
def fetch(self) -> Iterable[Mapping]:
34+
response = fetch_response(self.url)
35+
return response.json()
3636

37-
return self.batch_advisories(advisories)
37+
def advisory_data(self) -> Iterable[AdvisoryData]:
38+
for record in self.fetch():
39+
yield from self.parse_advisory(record)
3840

39-
def _fetch(self) -> Iterable[Mapping]:
40-
with urlopen(self.config.archlinux_tracker_url) as response:
41-
return json.load(response)
42-
43-
def _parse(self, record) -> List[AdvisoryData]:
41+
def parse_advisory(self, record) -> List[AdvisoryData]:
4442
advisories = []
45-
46-
for cve_id in record["issues"]:
43+
aliases = record.get("issues") or []
44+
for alias in aliases:
4745
affected_packages = []
4846
for name in record["packages"]:
49-
impacted_purls, resolved_purls = [], []
50-
impacted_purls.append(
51-
PackageURL(
47+
summary = record.get("type") or ""
48+
if summary == "unknown":
49+
summary = ""
50+
affected = record.get("affected") or ""
51+
affected_version_range = (
52+
ArchLinuxVersionRange.from_versions([affected]) if affected else None
53+
)
54+
fixed = record.get("fixed") or ""
55+
fixed_version = ArchLinuxVersion(fixed) if fixed else None
56+
affected_packages = []
57+
affected_package = AffectedPackage(
58+
package=PackageURL(
5259
name=name,
53-
type="pacman",
60+
type="alpm",
5461
namespace="archlinux",
55-
version=record["affected"],
56-
)
62+
),
63+
affected_version_range=affected_version_range,
64+
fixed_version=fixed_version,
5765
)
58-
59-
if record["fixed"]:
60-
resolved_purls.append(
61-
PackageURL(
62-
name=name,
63-
type="pacman",
64-
namespace="archlinux",
65-
version=record["fixed"],
66-
)
67-
)
68-
affected_packages.extend(nearest_patched_package(impacted_purls, resolved_purls))
66+
affected_packages.append(affected_package)
6967

7068
references = []
7169
references.append(
@@ -89,9 +87,9 @@ def _parse(self, record) -> List[AdvisoryData]:
8987
)
9088

9189
advisories.append(
92-
Advisory(
93-
vulnerability_id=cve_id,
94-
summary="",
90+
AdvisoryData(
91+
aliases=[alias],
92+
summary=summary,
9593
affected_packages=affected_packages,
9694
references=references,
9795
)

vulnerabilities/importers/debian.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
class DebianImporter(Importer):
4141

42-
spdx_license_expression = "MIT"
42+
spdx_license_expression = "LicenseRef-scancode-other-permissive"
4343
license_url = "https://www.debian.org/license"
4444
notice = """
4545
From: Tushar Goel <[email protected]>

0 commit comments

Comments
 (0)