Skip to content

Commit cfb6073

Browse files
committed
Merge branch 'main' into license-classify-modeling
2 parents 04b7d23 + ddb3958 commit cfb6073

File tree

154 files changed

+26796
-3178
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

154 files changed

+26796
-3178
lines changed

.github/workflows/1-fetch.yml

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
name: Fetch Data
2+
3+
on:
4+
schedule:
5+
# Normal schedule
6+
# # at 01:15 on all days in first month of each quarter
7+
# - cron: '15 1 * 1,4,7,10 *'
8+
# # at 01:15 on days 1-14 in second month of each quarter
9+
# - cron: '15 1 1-14 2,5,8,11 *'
10+
# Temp schedule
11+
# at 01:15 on all days in all months
12+
- cron: '15 1 * * *'
13+
14+
workflow_dispatch:
15+
16+
jobs:
17+
fetch:
18+
runs-on: ubuntu-latest
19+
20+
steps:
21+
22+
# CC Technology team members:
23+
# See cc-quantifying-bot GitHub entry in Bitwarden for information on
24+
# BOT_ secrets
25+
- name: Configure git
26+
run: |
27+
git config --global init.defaultBranch main
28+
git config --global user.name "${{ secrets.BOT_NAME }}"
29+
git config --global user.email "${{ secrets.BOT_EMAIL }}"
30+
31+
- name: Checkout repository
32+
uses: actions/checkout@v4
33+
with:
34+
# Default fetch-depth is 1, however that value results in errors
35+
# when GitPython attempts to push changes:
36+
# "failed to push some refs"
37+
fetch-depth: 0
38+
token: ${{ secrets.BOT_TOKEN }}
39+
40+
- name: Set up Python
41+
uses: actions/setup-python@v5
42+
with:
43+
python-version: '3.11'
44+
45+
- name: Install Python dependencies
46+
run: |
47+
pip install --upgrade pip pipenv
48+
49+
- name: Sync Python modules
50+
run: |
51+
pipenv sync --system
52+
53+
# CC Technology team members:
54+
# See cc-quantifying-bot Google Workspace entry in Bitwarden for
55+
# information on GCS_ secrets
56+
- name: Fetch from Google Custom Search (GCS)
57+
run: |
58+
./scripts/1-fetch/gcs_fetch.py \
59+
--limit=100 --enable-save --enable-git
60+
env:
61+
GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }}
62+
GCS_CX: ${{ secrets.GCS_CX }}
63+
64+
- name: Fetch from GitHub
65+
run: |
66+
./scripts/1-fetch/github_fetch.py \
67+
--enable-save --enable-git
68+
env:
69+
GH_TOKEN: ${{ secrets.BOT_TOKEN }}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: Process Data
2+
3+
on:
4+
schedule:
5+
# at 02:15 on days 15-28 in second month of each quarter
6+
- cron: '15 2 15-28 2,5,8,11 *'
7+
workflow_dispatch:
8+
9+
jobs:
10+
process:
11+
runs-on: ubuntu-latest
12+
13+
# CC Technology team members:
14+
# See cc-quantifying-bot GitHub entry in Bitwarden for information on
15+
# BOT_ secrets
16+
17+
steps:
18+
- name: Checkout repository
19+
uses: actions/checkout@v4
20+
with:
21+
token: ${{ secrets.BOT_TOKEN }}
22+
23+
- name: Configure git
24+
run: |
25+
git config user.name "${{ secrets.BOT_NAME }}"
26+
git config user.email "${{ secrets.BOT_EMAIL }}"
27+
28+
- name: Set up Python
29+
uses: actions/setup-python@v5
30+
with:
31+
python-version: '3.11'
32+
33+
- name: Install dependencies
34+
run: |
35+
pip install --upgrade pip
36+
pip install pipenv
37+
pipenv sync --system
38+
39+
- name: Set PYTHONPATH
40+
run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV
41+
42+
- name: Run process script
43+
run: |
44+
# ADD SCRIPT INVOCATION HERE
45+
46+
# vim: ft=yaml
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: Generate Report
2+
3+
on:
4+
schedule:
5+
# at 03:15 on all days in third month of each quarter
6+
- cron: '15 3 * 3,6,9,12 *'
7+
workflow_dispatch:
8+
9+
jobs:
10+
generate-report:
11+
runs-on: ubuntu-latest
12+
13+
# CC Technology team members:
14+
# See cc-quantifying-bot GitHub entry in Bitwarden for information on
15+
# BOT_ secrets
16+
17+
steps:
18+
- name: Checkout repository
19+
uses: actions/checkout@v4
20+
with:
21+
token: ${{ secrets.BOT_TOKEN }}
22+
23+
- name: Configure git
24+
run: |
25+
git config user.name "${{ secrets.BOT_NAME }}"
26+
git config user.email "${{ secrets.BOT_EMAIL }}"
27+
28+
- name: Set up Python
29+
uses: actions/setup-python@v5
30+
with:
31+
python-version: '3.11'
32+
33+
- name: Install dependencies
34+
run: |
35+
pip install --upgrade pip
36+
pip install pipenv
37+
pipenv sync --system
38+
39+
- name: Set PYTHONPATH
40+
run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV
41+
42+
- name: Run report script
43+
run: |
44+
# ADD SCRIPT INVOCATION HERE
45+
46+
# vim: ft=yaml

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,6 @@ Thumbs.db
144144
# secrets
145145
.env
146146
query_secrets.py
147+
148+
# backup files
149+
*.bak

Pipfile

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,22 @@ name = "pypi"
55

66
[packages]
77
flickrapi = "*"
8-
internetarchive = "*"
8+
GitPython = "*"
9+
google-api-python-client = "*"
10+
h11 = ">=0.16.0" # Ensure dependency is secure
11+
internetarchive = ">=5.5.1"
912
jupyterlab = ">=3.6.7"
1013
matplotlib = "*"
1114
numpy = "*"
1215
pandas = "*"
1316
plotly = "*"
17+
pillow = ">=11.3.0" # Ensure dependency is secure
1418
Pyarrow = "*"
19+
Pygments = "*"
1520
python-dotenv = "*"
1621
requests = ">=2.31.0"
1722
seaborn = "*"
18-
urllib3 = ">=1.26.18"
23+
urllib3 = ">=2.5.0"
1924
wordcloud = "*"
2025

2126
[dev-packages]
@@ -27,3 +32,9 @@ pre-commit = "*"
2732

2833
[requires]
2934
python_version = "3.11"
35+
36+
[scripts]
37+
gcs_fetched = "./scripts/1-fetch/gcs_fetched.py"
38+
flickr_fetched = "./scripts/1-fetch/flickr_fetched.py"
39+
gcs_processed = "./scripts/2-process/gcs_processed.py"
40+
gcs_reports = "./scripts/3-report/gcs_reports.py"

0 commit comments

Comments
 (0)