Skip to content

Commit 72a59f5

Browse files
authored
feat: add rate limit for nvd downloads (from @param211) (#1230)
* Add rate limit for NVD downloads * Add license information * Fix merge conflicts * fix: move license to top of file (per recommendation from our licensing team) Co-authored-by: param211 <[email protected]> fixes #1081
1 parent c546b8c commit 72a59f5

File tree

3 files changed

+82
-13
lines changed

3 files changed

+82
-13
lines changed

.github/workflows/pythonapp.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ jobs:
9494
matrix:
9595
os: [ubuntu-latest]
9696
python: [3.6, 3.7]
97-
timeout-minutes: 10
97+
timeout-minutes: 20
9898
env:
9999
ACTIONS: 1
100100
LONG_TESTS: 0
@@ -232,7 +232,7 @@ jobs:
232232
windows_tests:
233233
name: Windows py3.8
234234
runs-on: windows-latest
235-
timeout-minutes: 10
235+
timeout-minutes: 20
236236
env:
237237
ACTIONS: 1
238238
LONG_TESTS: 0

cve_bin_tool/async_utils.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,32 @@
11
# Copyright (C) 2021 Intel Corporation
22
# SPDX-License-Identifier: GPL-3.0-or-later
33

4+
# This file also includes the RateLimiter function with the following license:
5+
#
6+
# Copyright 2018 Quentin Pradet
7+
#
8+
# Permission is hereby granted, free of charge, to any person obtaining a
9+
# copy of this software and associated documentation files (the "Software"), to
10+
# deal in the Software without restriction, including without limitation the
11+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12+
# sell copies of the Software, and to permit persons to whom the Software is
13+
# furnished to do so, subject to the following conditions:
14+
15+
# The above copyright notice and this permission notice shall be included in
16+
# all copies or substantial portions of the Software.
17+
18+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24+
# SOFTWARE."
25+
426
# pylint: disable=too-many-arguments
27+
528
""" Utility classes for the CVE Binary Tool """
29+
630
import asyncio
731
import glob
832
import gzip
@@ -12,6 +36,7 @@
1236
import subprocess
1337
import sys
1438
import tempfile
39+
import time
1540
from functools import partial, wraps
1641

1742
from cve_bin_tool.util import inpath
@@ -203,6 +228,46 @@ class GzipFile(FileIO):
203228
_open = async_wrap(gzip.GzipFile)
204229

205230

231+
class RateLimiter:
232+
"""Rate limits an HTTP client that would make get() and post() calls.
233+
Calls are rate-limited by host.
234+
https://quentin.pradet.me/blog/how-do-you-rate-limit-calls-with-aiohttp.html
235+
This class is not thread-safe.
236+
237+
Copyright 2018 Quentin Pradet
238+
See license at top of file.
239+
"""
240+
241+
RATE = 10
242+
MAX_TOKENS = 10
243+
244+
def __init__(self, client):
245+
self.client = client
246+
self.tokens = self.MAX_TOKENS
247+
self.updated_at = time.monotonic()
248+
249+
async def get(self, *args, **kwargs):
250+
await self.wait_for_token()
251+
return self.client.get(*args, **kwargs)
252+
253+
async def wait_for_token(self):
254+
while self.tokens < 1:
255+
self.add_new_tokens()
256+
await asyncio.sleep(0.1)
257+
self.tokens -= 1
258+
259+
def add_new_tokens(self):
260+
now = time.monotonic()
261+
time_since_update = now - self.updated_at
262+
new_tokens = time_since_update * self.RATE
263+
if self.tokens + new_tokens >= 1:
264+
self.tokens = min(self.tokens + new_tokens, self.MAX_TOKENS)
265+
self.updated_at = now
266+
267+
async def close(self):
268+
await self.client.close()
269+
270+
206271
aio_rmdir = async_wrap(shutil.rmtree)
207272
aio_rmfile = async_wrap(os.remove)
208273
aio_unpack_archive = async_wrap(shutil.unpack_archive)

cve_bin_tool/cvedb.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from bs4 import BeautifulSoup
2121
from rich.progress import track
2222

23-
from cve_bin_tool.async_utils import FileIO, GzipFile, run_coroutine
23+
from cve_bin_tool.async_utils import FileIO, GzipFile, RateLimiter, run_coroutine
2424
from cve_bin_tool.error_handler import (
2525
AttemptedToWriteOutsideCachedir,
2626
CVEDataForCurlVersionNotInCache,
@@ -103,7 +103,7 @@ def get_db_update_date(self):
103103
return os.path.getmtime(self.dbpath)
104104

105105
async def getmeta(self, session, meta_url):
106-
async with session.get(meta_url) as response:
106+
async with await session.get(meta_url) as response:
107107
response.raise_for_status()
108108
return (
109109
meta_url.replace(".meta", ".json.gz"),
@@ -117,7 +117,7 @@ async def getmeta(self, session, meta_url):
117117
)
118118

119119
async def nist_scrape(self, session):
120-
async with session.get(self.feed) as response:
120+
async with await session.get(self.feed) as response:
121121
response.raise_for_status()
122122
page = await response.text()
123123
json_meta_links = self.META_REGEX.findall(page)
@@ -161,8 +161,7 @@ async def cache_update(self, session, url, sha, chunk_size=16 * 1024):
161161
self.LOGGER.debug(f"Correct SHA for {filename}")
162162
return
163163
self.LOGGER.debug(f"Updating CVE cache for {filename}")
164-
165-
async with session.get(url) as response:
164+
async with await session.get(url) as response:
166165
# Raise better error message on ratelimit by NVD
167166
if response.status == 403:
168167
with ErrorHandler(mode=self.error_mode, logger=self.LOGGER):
@@ -187,7 +186,7 @@ async def cache_update(self, session, url, sha, chunk_size=16 * 1024):
187186
@staticmethod
188187
async def get_curl_versions(session):
189188
regex = re.compile(r"vuln-(\d+.\d+.\d+)\.html")
190-
async with session.get(
189+
async with await session.get(
191190
"https://curl.haxx.se/docs/vulnerabilities.html"
192191
) as response:
193192
response.raise_for_status()
@@ -196,7 +195,7 @@ async def get_curl_versions(session):
196195
return [match.group(1) for match in matches]
197196

198197
async def download_curl_version(self, session, version):
199-
async with session.get(
198+
async with await session.get(
200199
f"https://curl.haxx.se/docs/vuln-{version}.html"
201200
) as response:
202201
response.raise_for_status()
@@ -233,13 +232,16 @@ async def refresh(self):
233232
check_latest_version()
234233
if not self.session:
235234
connector = aiohttp.TCPConnector(limit_per_host=19)
236-
self.session = aiohttp.ClientSession(connector=connector, trust_env=True)
235+
self.session = RateLimiter(
236+
aiohttp.ClientSession(connector=connector, trust_env=True)
237+
)
237238
self.LOGGER.info("Downloading CVE data...")
238239
nvd_metadata, curl_metadata = await asyncio.gather(
239-
self.nist_scrape(self.session), self.get_curl_versions(self.session)
240+
asyncio.ensure_future(self.nist_scrape(self.session)),
241+
asyncio.ensure_future(self.get_curl_versions(self.session)),
240242
)
241243
tasks = [
242-
self.cache_update(self.session, url, meta["sha256"])
244+
asyncio.ensure_future(self.cache_update(self.session, url, meta["sha256"]))
243245
for url, meta in nvd_metadata.items()
244246
if meta is not None
245247
]
@@ -251,7 +253,9 @@ async def refresh(self):
251253
tasks.append(
252254
asyncio.gather(
253255
*(
254-
self.download_curl_version(self.session, version)
256+
asyncio.ensure_future(
257+
self.download_curl_version(self.session, version)
258+
)
255259
for version in curl_metadata
256260
)
257261
)

0 commit comments

Comments
 (0)