Skip to content

Commit 26d45ed

Browse files
authored
Merge pull request #1049 from TG1999/migrate/ubuntu_usn
Migrate ubuntu usn importer #1051
2 parents 80da375 + d588821 commit 26d45ed

File tree

6 files changed

+150
-99
lines changed

6 files changed

+150
-99
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from vulnerabilities.importers import retiredotnet
3232
from vulnerabilities.importers import suse_scores
3333
from vulnerabilities.importers import ubuntu
34+
from vulnerabilities.importers import ubuntu_usn
3435
from vulnerabilities.importers import xen
3536

3637
IMPORTERS_REGISTRY = [
@@ -59,6 +60,7 @@
5960
elixir_security.ElixirSecurityImporter,
6061
apache_tomcat.ApacheTomcatImporter,
6162
xen.XenImporter,
63+
ubuntu_usn.UbuntuUSNImporter,
6264
]
6365

6466
IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}

vulnerabilities/importers/ubuntu.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,48 @@
1919

2020

2121
class UbuntuImporter(OvalImporter):
22-
spdx_license_expression = "GPL"
23-
license_url = "https://ubuntu.com/legal/terms"
22+
spdx_license_expression = "LicenseRef-scancode-other-permissive"
23+
notice = """
24+
From: Seth Arnold <[email protected]>
25+
Date: Wed, Jan 25, 2023 at 2:02 AM
26+
Subject: Re: [ubuntu-hardened] Usage of Ubuntu Security Data in VulnerableCode
27+
To: Tushar Goel <[email protected]>
28+
29+
30+
31+
On Wed, Jan 11, 2023 at 06:27:38PM +0530, Tushar Goel wrote:
32+
> We would like to integrate the Ubuntu usn data[1][2] and
33+
> Ubuntu security data (OVAL format)[3] in vulnerablecode[4]
34+
> which is a FOSS db of FOSS vulnerability data. We were not
35+
> able to know under which license this security data comes.
36+
> We would be grateful to have your acknowledgement over usage of
37+
> the ubuntu security data in vulnerablecode and have
38+
> some kind of licensing declaration from your side.
39+
40+
Hello Tushar, we do not have an explicit license on this data.
41+
42+
We share our data with the intention that others will use it. Please
43+
feel free to use it for the general furtherance of security.
44+
45+
Much of the data that's contained within our databases is sourced from
46+
third parties, who also shared their data with the intention that others
47+
will use it. I'm not sure what it would look like to try to put a license
48+
on data that is crowd-sourced from thousands of contributors. (If you were
49+
to start such a project today, it'd probably be one of the first things to
50+
formalize. But when CVE was started two decades ago, the primary goal was
51+
sharing knowledge and simplifying the vulnerability remediation process,
52+
and licensing the data was, as far as I can remember, not considered.
53+
Sharing was the goal.)
54+
55+
I will ask that vulnerablecode 'be nice' to our infrastructure that
56+
hosts the databases -- some automated uses of our infrastructure by
57+
vulnerability scanner tools has lead to significant load and engineering
58+
effort. In general, please prefer a small handful of systems updating
59+
mirrors roughly twice a day rather than thousands of hosts pulling
60+
data hourly.
61+
62+
Thanks
63+
"""
2464

2565
def __init__(self, *args, **kwargs):
2666
super().__init__(*args, **kwargs)

vulnerabilities/importers/ubuntu_usn.py

Lines changed: 58 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,55 +15,81 @@
1515
from vulnerabilities.importer import AdvisoryData
1616
from vulnerabilities.importer import Importer
1717
from vulnerabilities.importer import Reference
18-
from vulnerabilities.utils import create_etag
1918
from vulnerabilities.utils import is_cve
2019

2120

2221
class UbuntuUSNImporter(Importer):
23-
def updated_advisories(self):
24-
advisories = []
25-
if create_etag(data_src=self, url=self.config.db_url, etag_key="etag"):
26-
advisories.extend(self.to_advisories(fetch(self.config.db_url)))
27-
28-
return self.batch_advisories(advisories)
29-
30-
def create_etag(self, url):
31-
etag = requests.head(url).headers.get("etag")
32-
if not etag:
33-
return True
34-
35-
elif url in self.config.etags:
36-
if self.config.etags[url] == etag:
37-
return False
38-
39-
self.config.etags[url] = etag
40-
return True
22+
db_url = "https://usn.ubuntu.com/usn-db/database-all.json.bz2"
23+
spdx_license_expression = "LicenseRef-scancode-other-permissive"
24+
notice = """
25+
From: Seth Arnold <[email protected]>
26+
Date: Wed, Jan 25, 2023 at 2:02 AM
27+
Subject: Re: [ubuntu-hardened] Usage of Ubuntu Security Data in VulnerableCode
28+
To: Tushar Goel <[email protected]>
29+
30+
31+
32+
On Wed, Jan 11, 2023 at 06:27:38PM +0530, Tushar Goel wrote:
33+
> We would like to integrate the Ubuntu usn data[1][2] and
34+
> Ubuntu security data (OVAL format)[3] in vulnerablecode[4]
35+
> which is a FOSS db of FOSS vulnerability data. We were not
36+
> able to know under which license this security data comes.
37+
> We would be grateful to have your acknowledgement over usage of
38+
> the ubuntu security data in vulnerablecode and have
39+
> some kind of licensing declaration from your side.
40+
41+
Hello Tushar, we do not have an explicit license on this data.
42+
43+
We share our data with the intention that others will use it. Please
44+
feel free to use it for the general furtherance of security.
45+
46+
Much of the data that's contained within our databases is sourced from
47+
third parties, who also shared their data with the intention that others
48+
will use it. I'm not sure what it would look like to try to put a license
49+
on data that is crowd-sourced from thousands of contributors. (If you were
50+
to start such a project today, it'd probably be one of the first things to
51+
formalize. But when CVE was started two decades ago, the primary goal was
52+
sharing knowledge and simplifying the vulnerability remediation process,
53+
and licensing the data was, as far as I can remember, not considered.
54+
Sharing was the goal.)
55+
56+
I will ask that vulnerablecode 'be nice' to our infrastructure that
57+
hosts the databases -- some automated uses of our infrastructure by
58+
vulnerability scanner tools has lead to significant load and engineering
59+
effort. In general, please prefer a small handful of systems updating
60+
mirrors roughly twice a day rather than thousands of hosts pulling
61+
data hourly.
62+
63+
Thanks
64+
"""
65+
66+
def advisory_data(self):
67+
usn_db = fetch(self.db_url)
68+
yield from self.to_advisories(usn_db=usn_db)
4169

4270
@staticmethod
4371
def to_advisories(usn_db):
44-
advisories = []
4572
for usn in usn_db:
46-
reference = get_usn_references(usn_db[usn]["id"])
47-
for cve in usn_db[usn].get("cves", [""]):
73+
usn_data = usn_db[usn]
74+
references = get_usn_references(usn_data.get("id"))
75+
for cve in usn_data.get("cves", []):
4876
# The db sometimes contains entries like
4977
# {'cves': ['python-pgsql vulnerabilities', 'CVE-2006-2313', 'CVE-2006-2314']}
5078
# This `if` filters entries like 'python-pgsql vulnerabilities'
5179
if not is_cve(cve):
52-
cve = ""
80+
continue
5381

54-
advisories.append(
55-
AdvisoryData(
56-
vulnerability_id=cve,
57-
summary="",
58-
references=[reference],
59-
)
82+
yield AdvisoryData(
83+
aliases=[cve],
84+
summary="",
85+
references=references,
6086
)
6187

62-
return advisories
63-
6488

6589
def get_usn_references(usn_id):
66-
return Reference(reference_id="USN-" + usn_id, url="https://usn.ubuntu.com/{}/".format(usn_id))
90+
if not usn_id:
91+
return []
92+
return [Reference(reference_id=f"USN-{usn_id}", url=f"https://usn.ubuntu.com/{usn_id}/")]
6793

6894

6995
def fetch(url):

vulnerabilities/tests/conftest.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,5 @@ def no_rmtree(monkeypatch):
3333
"test_rust.py",
3434
"test_suse_backports.py",
3535
"test_suse.py",
36-
"test_ubuntu_usn.py",
3736
"test_upstream.py",
3837
]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
[
2+
{
3+
"aliases": [
4+
"CVE-2009-0698"
5+
],
6+
"summary": "",
7+
"affected_packages": [],
8+
"references": [
9+
{
10+
"reference_id": "USN-763-1",
11+
"url": "https://usn.ubuntu.com/763-1/",
12+
"severities": []
13+
}
14+
],
15+
"date_published": null,
16+
"weaknesses": []
17+
},
18+
{
19+
"aliases": [
20+
"CVE-2009-1274"
21+
],
22+
"summary": "",
23+
"affected_packages": [],
24+
"references": [
25+
{
26+
"reference_id": "USN-763-1",
27+
"url": "https://usn.ubuntu.com/763-1/",
28+
"severities": []
29+
}
30+
],
31+
"date_published": null,
32+
"weaknesses": []
33+
}
34+
]

vulnerabilities/tests/test_ubuntu_usn.py

Lines changed: 14 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -17,71 +17,21 @@
1717

1818
from packageurl import PackageURL
1919

20-
import vulnerabilities.importers.ubuntu_usn as ubuntu_usn
2120
from vulnerabilities.importer import AdvisoryData
2221
from vulnerabilities.importer import Reference
22+
from vulnerabilities.importers.ubuntu_usn import UbuntuUSNImporter
23+
from vulnerabilities.tests import util_tests
2324

2425
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25-
TEST_DATA = os.path.join(BASE_DIR, "test_data/", "ubuntu_usn_db", "database-all.json.bz2")
26-
27-
28-
class TestUbuntuUSNImporter(TestCase):
29-
@classmethod
30-
def setUpClass(cls):
31-
data_src_cfg = {"etags": {}, "db_url": "http://exampledb.com"}
32-
cls.data_src = ubuntu_usn.UbuntuUSNImporter(batch_size=1, config=data_src_cfg)
33-
with open(TEST_DATA, "rb") as f:
34-
cls.raw_data = f.read()
35-
cls.db = json.loads(bz2.decompress(cls.raw_data))
36-
37-
def test_get_usn_references(self):
38-
39-
eg_usn = "435-1"
40-
expected_references = Reference(
41-
reference_id="USN-435-1", url="https://usn.ubuntu.com/435-1/"
42-
)
43-
44-
found_references = ubuntu_usn.get_usn_references(eg_usn)
45-
assert found_references == expected_references
46-
47-
def test_fetch(self):
48-
49-
mock_response = MagicMock()
50-
mock_response.content = self.raw_data
51-
with patch("vulnerabilities.importers.ubuntu_usn.requests.get", return_value=mock_response):
52-
assert ubuntu_usn.fetch("www.db.com") == self.db
53-
54-
def test_to_advisories(self):
55-
56-
expected_advisories = [
57-
Advisory(
58-
summary="",
59-
references=[
60-
Reference(url="https://usn.ubuntu.com/763-1/", reference_id="USN-763-1")
61-
],
62-
vulnerability_id="CVE-2009-0698",
63-
),
64-
Advisory(
65-
summary="",
66-
references=[
67-
Reference(url="https://usn.ubuntu.com/763-1/", reference_id="USN-763-1")
68-
],
69-
vulnerability_id="CVE-2009-1274",
70-
),
71-
]
72-
found_advisories = self.data_src.to_advisories(self.db)
73-
74-
found_advisories = list(map(Advisory.normalized, found_advisories))
75-
expected_advisories = list(map(Advisory.normalized, expected_advisories))
76-
assert sorted(found_advisories) == sorted(expected_advisories)
77-
78-
def test_create_etag(self):
79-
assert self.data_src.config.etags == {}
80-
81-
mock_response = MagicMock()
82-
mock_response.headers = {"etag": "2131151243&2191"}
83-
84-
with patch("vulnerabilities.importers.ubuntu.requests.head", return_value=mock_response):
85-
assert self.data_src.create_etag("https://example.org")
86-
assert self.data_src.config.etags == {"https://example.org": "2131151243&2191"}
87-
assert not self.data_src.create_etag("https://example.org")
26+
TEST_DIR = os.path.join(BASE_DIR, "test_data", "ubuntu_usn_db")
27+
28+
29+
def test_ubuntu_usn():
30+
database = os.path.join(TEST_DIR, "database-all.json.bz2")
31+
with open(database, "rb") as f:
32+
raw_data = f.read()
33+
db = json.loads(bz2.decompress(raw_data))
34+
advisories = UbuntuUSNImporter().to_advisories(db)
35+
expected_file = os.path.join(TEST_DIR, f"ubuntu-usn-expected.json")
36+
result = [data.to_dict() for data in list(advisories)]
37+
util_tests.check_results_against_json(result, expected_file)

0 commit comments

Comments
 (0)