diff --git a/scripts/1-fetch/github_fetch.py b/scripts/1-fetch/github_fetch.py index fc1412e6..ef585d16 100755 --- a/scripts/1-fetch/github_fetch.py +++ b/scripts/1-fetch/github_fetch.py @@ -32,15 +32,6 @@ # Constants FILE1_COUNT = os.path.join(PATHS["data_phase"], "github_1_count.csv") GH_TOKEN = os.getenv("GH_TOKEN") -GITHUB_RETRY_STATUS_FORCELIST = [ - 408, # Request Timeout - 422, # Unprocessable Content (Validation failed, or endpoint spammed) - 429, # Too Many Requests - 500, # Internal Server Error - 502, # Bad Gateway - 503, # Service Unavailable - 504, # Gateway Timeout -] # Also see: https://en.wikipedia.org/wiki/Public-domain-equivalent_license GITHUB_TOOLS = [ {"TOOL_IDENTIFIER": "BSD Zero Clause License", "SPDX_IDENTIFIER": "0BSD"}, @@ -93,11 +84,14 @@ def get_requests_session(): max_retries = Retry( total=5, backoff_factor=10, - status_forcelist=GITHUB_RETRY_STATUS_FORCELIST, + status_forcelist=shared.STATUS_FORCELIST, ) session = requests.Session() session.mount("https://", HTTPAdapter(max_retries=max_retries)) - headers = {"accept": "application/vnd.github+json"} + headers = { + "accept": "application/vnd.github+json", + "User-Agent": shared.USER_AGENT, + } if GH_TOKEN: headers["authorization"] = f"Bearer {GH_TOKEN}" session.headers.update(headers)