Skip to content

Commit 036a1bb

Browse files
author
=
committed
Merge branch 'main' of https://github.com/creativecommons/quantifying into museums
2 parents 2f45937 + d5f457e commit 036a1bb

File tree

4 files changed

+110
-6
lines changed

4 files changed

+110
-6
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: Test scripts' help
2+
3+
on:
4+
push:
5+
workflow_dispatch:
6+
7+
jobs:
8+
job:
9+
runs-on: ubuntu-latest
10+
11+
steps:
12+
13+
# https://github.com/actions/setup-python
14+
- name: Install Python 3.11
15+
uses: actions/setup-python@v5
16+
with:
17+
python-version: '3.11'
18+
19+
- name: Install pipenv
20+
run: |
21+
pip install --upgrade pip
22+
pip install pipenv
23+
24+
# https://github.com/actions/checkout
25+
- name: Checkout quantifying
26+
uses: actions/checkout@v4
27+
28+
- name: Install Python dependencies
29+
run: |
30+
pipenv sync --system
31+
32+
- name: Test scripts' help
33+
run: |
34+
./dev/test_scripts_help.sh

dev/test_scripts_help.sh

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Ensure each script can display help message to ensure basic execution.
4+
#
5+
# This script must be run from within the pipenv shell or a properly configured
6+
# environment. For example:
7+
#
8+
# 1. Using pipenv run
9+
# pipenv run ./dev/test_scripts_help.sh
10+
#
11+
# 2. Using pipenv shell
12+
# pipenv shell
13+
# ./dev/test_scripts_help.sh
14+
#
15+
# 3. A properly configured environment
16+
# (see .github/workflows/test_scripts_help.yml)
17+
#
18+
#### SETUP ####################################################################
19+
20+
set -o errexit
21+
set -o errtrace
22+
set -o nounset
23+
24+
# shellcheck disable=SC2154
25+
trap '_es=${?};
26+
printf "${0}: line ${LINENO}: \"${BASH_COMMAND}\"";
27+
printf " exited with a status of ${_es}\n";
28+
exit ${_es}' ERR
29+
30+
DIR_REPO="$(cd -P -- "${0%/*}/.." && pwd -P)"
31+
EXIT_STATUS=0
32+
33+
#### FUNCTIONS ################################################################
34+
35+
test_help() {
36+
local _es _script
37+
for _script in $(find scripts/?-* -type f -name '*.py' | sort)
38+
do
39+
_es=0
40+
./"${_script}" --help &>/dev/null || _es=${?}
41+
if (( _es == 0 ))
42+
then
43+
echo "${_script}"
44+
else
45+
echo "${_script}"
46+
EXIT_STATUS=${_es}
47+
fi
48+
done
49+
}
50+
51+
#### MAIN #####################################################################
52+
53+
cd "${DIR_REPO}"
54+
test_help
55+
echo "exit status: ${EXIT_STATUS}"
56+
exit ${EXIT_STATUS}

scripts/1-fetch/arxiv_fetch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
# Constants
3636
# API Configuration
37-
BASE_URL = "http://export.arxiv.org/api/query?"
37+
BASE_URL = "https://export.arxiv.org/api/query?"
3838
DEFAULT_FETCH_LIMIT = 800 # Default total papers to fetch
3939

4040
# CSV Headers

scripts/shared.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
import logging
33
import os
44
import sys
5+
from collections import OrderedDict
56
from datetime import datetime, timezone
67

78
# Third-party
89
from git import InvalidGitRepositoryError, NoSuchPathError, Repo
910
from pandas import PeriodIndex
1011
from requests import Session
11-
from requests.adapters import HTTPAdapter, Retry
12+
from requests.adapters import HTTPAdapter
13+
from urllib3.util import Retry
1214

1315
# Constants
1416
STATUS_FORCELIST = [
@@ -33,14 +35,26 @@ def __init__(self, message, exit_code=None):
3335
super().__init__(self.message)
3436

3537

36-
def get_session(accept_header=None):
37-
"""Create a reusable HTTP session with retry logic."""
38-
session = Session()
38+
def get_session(accept_header=None, session=None):
39+
"""
40+
Create or configure a reusable HTTPS session with retry logic and
41+
appropriate headers.
42+
"""
43+
if session is None:
44+
session = Session()
45+
46+
# Purge default and custom session connection adapters
47+
# (With only a https:// adapter, below, unencrypted requests will fail.)
48+
session.adapters = OrderedDict()
3949

50+
# Try again after 0s, 6s, 12s, 24s, 48s (total 90s) for the specified HTTP
51+
# error codes (STATUS_FORCELIST)
4052
retry_strategy = Retry(
4153
total=5,
42-
backoff_factor=10,
54+
backoff_factor=3,
4355
status_forcelist=STATUS_FORCELIST,
56+
allowed_methods=["GET", "POST"],
57+
raise_on_status=False,
4458
)
4559
session.mount("https://", HTTPAdapter(max_retries=retry_strategy))
4660

0 commit comments

Comments
 (0)