Skip to content

Commit e2fc548

Browse files
author
John Andersen
authored
cvedb: Store NVD JSON files gzip compressed (#637)
Signed-off-by: John Andersen <[email protected]>
1 parent 5c963e7 commit e2fc548

File tree

3 files changed

+13
-12
lines changed

3 files changed

+13
-12
lines changed

cve_bin_tool/cvedb.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def cache_update(cachedir, url, sha, chunk_size=16 * 1024, logger=LOGGER):
9191
"""
9292
Update the cache for a single year of NVD data.
9393
"""
94-
filename = url.split("/")[-1].replace(".gz", "")
94+
filename = url.split("/")[-1]
9595
# Ensure we only write to files within the cachedir
9696
filepath = os.path.abspath(os.path.join(cachedir, filename))
9797
if not filepath.startswith(os.path.abspath(cachedir)):
@@ -101,7 +101,7 @@ def cache_update(cachedir, url, sha, chunk_size=16 * 1024, logger=LOGGER):
101101
# Validate the sha and write out
102102
sha = sha.upper()
103103
calculate = hashlib.sha256()
104-
with open(filepath, "rb") as handle:
104+
with gzip.open(filepath, "rb") as handle:
105105
chunk = handle.read(chunk_size)
106106
while chunk:
107107
calculate.update(chunk)
@@ -127,7 +127,7 @@ def cache_update(cachedir, url, sha, chunk_size=16 * 1024, logger=LOGGER):
127127
sha = sha.upper()
128128
calculate = hashlib.sha256()
129129
# Copy the contents while updating the sha
130-
with open(filepath, "wb") as filepath_handle:
130+
with gzip.open(filepath, "wb") as filepath_handle:
131131
chunk = jsondata_fileobj.read(chunk_size)
132132
while chunk:
133133
calculate.update(chunk)
@@ -149,7 +149,7 @@ class CVEDB(object):
149149
CACHEDIR = DISK_LOCATION_DEFAULT
150150
FEED = "https://nvd.nist.gov/vuln/data-feeds"
151151
LOGGER = LOGGER.getChild("CVEDB")
152-
NVDCVE_FILENAME_TEMPLATE = "nvdcve-1.1-{}.json"
152+
NVDCVE_FILENAME_TEMPLATE = "nvdcve-1.1-{}.json.gz"
153153
META_REGEX = re.compile(r"https:\/\/.*\/json\/.*-[0-9]*\.[0-9]*-[0-9]*\.meta")
154154
RANGE_UNSET = ""
155155

@@ -558,7 +558,7 @@ def year(self, year):
558558
if not os.path.isfile(filename):
559559
raise CVEDataForYearNotInCache(year)
560560
# Open the file and load the JSON data, log the number of CVEs loaded
561-
with open(filename, "rb") as fileobj:
561+
with gzip.open(filename, "rb") as fileobj:
562562
cves_for_year = json.load(fileobj)
563563
self.LOGGER.debug(
564564
f'Year {year} has {len(cves_for_year["CVE_Items"])} CVEs in dataset'
@@ -571,9 +571,9 @@ def years(self):
571571
"""
572572
return sorted(
573573
[
574-
int(filename.split(".")[-2].split("-")[-1])
574+
int(filename.split(".")[-3].split("-")[-1])
575575
for filename in glob.glob(
576-
os.path.join(self.cachedir, "nvdcve-1.1-*.json")
576+
os.path.join(self.cachedir, "nvdcve-1.1-*.json.gz")
577577
)
578578
]
579579
)

test/test_cvedb.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,6 @@ def test_04_verify_false(self):
4646
with self.cvedb:
4747
self.assertTrue(
4848
os.path.isfile(
49-
os.path.join(self.cvedb.cachedir, "nvdcve-1.1-2015.json")
49+
os.path.join(self.cvedb.cachedir, "nvdcve-1.1-2015.json.gz")
5050
)
5151
)

test/test_json.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77
import hashlib
88
import json
9+
import gzip
910
import os
1011
import unittest
1112
import datetime
@@ -24,7 +25,7 @@
2425
NVD_SCHEMA = "https://scap.nist.gov/schema/nvd/feed/1.1/nvd_cve_feed_json_1.1.schema"
2526

2627
# NVD feeds from "https://nvd.nist.gov/vuln/data-feeds#JSON_FEED" but stored locally
27-
NVD_FILE_TEMPLATE = "nvdcve-1.1-{}.json"
28+
NVD_FILE_TEMPLATE = "nvdcve-1.1-{}.json.gz"
2829

2930

3031
class TestJSON(unittest.TestCase):
@@ -39,11 +40,11 @@ def test_json_validation(self):
3940
years = list(range(2002, datetime.datetime.now().year + 1))
4041
# Open the latest nvd file on disk
4142
for year in years:
42-
with open(
43-
os.path.join(DISK_LOCATION_DEFAULT, f"nvdcve-1.1-{year}.json"), "rb",
43+
with gzip.open(
44+
os.path.join(DISK_LOCATION_DEFAULT, f"nvdcve-1.1-{year}.json.gz"), "rb",
4445
) as json_file:
4546
nvd_json = json.loads(json_file.read())
46-
print(f"Loaded json for year {year}: nvdcve-1.1-{year}.json")
47+
print(f"Loaded json for year {year}: nvdcve-1.1-{year}.json.gz")
4748

4849
# Validate -- will raise a ValidationError if not valid
4950
try:

0 commit comments

Comments
 (0)