Skip to content

Commit 9e2aca0

Browse files
authored
Merge pull request #123 from creativecommons/gsoc2024-dev-1
Automating Quantifying the Commons (Google Summer of Code 2024)
2 parents dfca3e0 + 199d562 commit 9e2aca0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+8307
-685
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
name: Fetch Data
2+
3+
on:
4+
schedule:
5+
# 1:15am onwards, days 1-20, first month of each quarter
6+
- cron: '15 1,5,9,13,17,21,23 1-20 1,4,7,10 *'
7+
workflow_dispatch:
8+
9+
jobs:
10+
fetch:
11+
runs-on: ubuntu-latest
12+
13+
env:
14+
GCS_DEVELOPER_KEY: ${{ secrets.GCS_DEVELOPER_KEY }}
15+
GCS_CX: ${{ secrets.GCS_CX }}
16+
17+
steps:
18+
- name: Checkout repository
19+
uses: actions/checkout@v4
20+
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.11'
25+
26+
- name: Install dependencies
27+
run: |
28+
pip install --upgrade pip
29+
pip install pipenv
30+
pipenv sync --system
31+
32+
- name: Set PYTHONPATH
33+
run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV
34+
35+
- name: Run fetch script
36+
run:
37+
# python scripts/1-fetch/deviantart_fetched.py
38+
# python scripts/1-fetch/flickr_fetched.py
39+
# python scripts/1-fetch/github_fetched.py
40+
# python scripts/1-fetch/internetarchive_fetched.py
41+
# python scripts/1-fetch/metmuseum_fetched.py
42+
# python scripts/1-fetch/vimeo_fetched.py
43+
# python scripts/1-fetch/wikicommons_fetched.py
44+
# python scripts/1-fetch/wikipedia_fetched.py
45+
# python scripts/1-fetch/youtube_fetched.py
46+
python scripts/1-fetch/gcs_fetched.py
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: Process Data
2+
3+
on:
4+
schedule:
5+
# 1:15am onwards, days 1-20, second month of each quarter
6+
- cron: '15 1,5,9,13,17,21,23 1-20 2,5,8,11 *'
7+
workflow_dispatch:
8+
9+
jobs:
10+
process:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v4
16+
17+
- name: Set up Python
18+
uses: actions/setup-python@v5
19+
with:
20+
python-version: '3.11'
21+
22+
- name: Install dependencies
23+
run: |
24+
pip install --upgrade pip
25+
pip install pipenv
26+
pipenv sync --system
27+
28+
- name: Set PYTHONPATH
29+
run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV
30+
31+
- name: Run process script
32+
run:
33+
# python scripts/2-process/deviantart_processed.py
34+
# python scripts/2-process/flickr_processed.py
35+
# python scripts/2-process/github_processed.py
36+
# python scripts/2-process/internetarchive_processed.py
37+
# python scripts/2-process/metmuseum_processed.py
38+
# python scripts/2-process/vimeo_processed.py
39+
# python scripts/2-process/wikicommons_processed.py
40+
# python scripts/2-process/wikipedia_processed.py
41+
# python scripts/2-process/youtube_processed.py
42+
python scripts/2-process/gcs_processed.py
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Generate Report
2+
3+
on:
4+
schedule:
5+
# 1:15am onwards, days 1-20, third month of each quarter
6+
- cron: '15 1,5,9,13,17,21,23 1-20 3,6,9,12 *'
7+
workflow_dispatch:
8+
9+
10+
jobs:
11+
generate-report:
12+
runs-on: ubuntu-latest
13+
14+
steps:
15+
- name: Checkout repository
16+
uses: actions/checkout@v4
17+
18+
- name: Set up Python
19+
uses: actions/setup-python@v5
20+
with:
21+
python-version: '3.11'
22+
23+
- name: Install dependencies
24+
run: |
25+
pip install --upgrade pip
26+
pip install pipenv
27+
pipenv sync --system
28+
29+
- name: Set PYTHONPATH
30+
run: echo "PYTHONPATH=./scripts" >> $GITHUB_ENV
31+
32+
- name: Run report script
33+
run:
34+
# python scripts/3-report/deviantart_reports.py
35+
# python scripts/3-report/flickr_reports.py
36+
# python scripts/3-report/github_reports.py
37+
# python scripts/3-report/internetarchive_reports.py
38+
# python scripts/3-report/metmuseum_reports.py
39+
# python scripts/3-report/vimeo_reports.py
40+
# python scripts/3-report/wikicommons_reports.py
41+
# python scripts/3-report/wikipedia_reports.py
42+
# python scripts/3-report/youtube_reports.py
43+
python scripts/3-report/gcs_reports.py

Pipfile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ name = "pypi"
55

66
[packages]
77
flickrapi = "*"
8+
GitPython = "*"
9+
google-api-python-client = "*"
810
internetarchive = "*"
911
jupyterlab = ">=3.6.7"
1012
matplotlib = "*"
@@ -27,3 +29,9 @@ pre-commit = "*"
2729

2830
[requires]
2931
python_version = "3.11"
32+
33+
[scripts]
34+
gcs_fetched = "./scripts/1-fetch/gcs_fetched.py"
35+
flickr_fetched = "./scripts/1-fetch/flickr_fetched.py"
36+
gcs_processed = "./scripts/2-process/gcs_processed.py"
37+
gcs_reports = "./scripts/3-report/gcs_reports.py"

Pipfile.lock

Lines changed: 805 additions & 649 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,49 @@ See [`CONTRIBUTING.md`][org-contrib].
3939
[org-contrib]: https://github.com/creativecommons/.github/blob/main/CONTRIBUTING.md
4040

4141

42+
### Project structure
43+
44+
Please note that in the directory tree below, all instances of `fetch`,
45+
`process`, and `report` are referring to the three phases of data gathering,
46+
processing, and report generation.
47+
48+
```
49+
Quantifying/
50+
├── .github/
51+
│ ├── workflows/
52+
│ │ ├── fetch.yml
53+
│ │ ├── process.yml
54+
│ │ ├── report.yml
55+
│ │ └── static_analysis.yml
56+
├── data/ # Data generated by script runs
57+
│ ├── 20XXQX/
58+
│ │ ├── 1-fetch/
59+
│ │ ├── 2-process/
60+
│ │ ├── 3-report/
61+
│ │ │ └── README.md # All generated reports are displayed in the README
62+
│ └── ...
63+
├── dev/
64+
├── pre-automation/ # All Quantifying work prior to adding automation system
65+
├── scripts/ # Run scripts for all phases
66+
│ ├── 1-fetch/
67+
│ ├── 2-process/
68+
│ ├── 3-report/
69+
│ └── shared.py
70+
├── .cc-metadata.yml
71+
├── .flake8 # Python tool configuration
72+
├── .gitignore
73+
├── .pre-commit-config.yaml # Static analysis configuration
74+
├── LICENSE
75+
├── Pipfile # Specifies the project's dependencies and Python version
76+
├── Pipfile.lock
77+
├── README.md
78+
├── env.example
79+
├── history.md
80+
├── pyproject.toml # Python tools configuration
81+
└── sources.md
82+
```
83+
84+
4285
## Development
4386

4487

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
LICENSE TYPE, No Priori, United States, Canada, India, United Kingdom, Australia, Japan, English, Spanish, French, Arabic, Chinese (Simplified), Indonesian
2+
https://creativecommons.org/licenses/nc/2.0,57000000,43700000,39400,11500,157000,31100,29300,54600000,1160000,2720000,2960,68100000,101000
3+
https://creativecommons.org/licenses/sa/1.0,108000000,91000000,34400,19100,158000,43100,25100,104000000,268000,332000,28500,106000000,223000
4+
https://creativecommons.org/licenses/nd/1.0,222000000,259000000,76700,63400,519000,165000,79600,215000000,492000,383000,69500,222000000,2480000
5+
https://creativecommons.org/licenses/by-sa/3.0,108000000,88800000,64700,47600,254000,151000,145000,93500000,5010000,3980000,90500,114000000,112000
6+
https://creativecommons.org/licenses/sampling/1.0,170000000,151000000,26500,23600,263000,56800,34300,170000000,87500,21500,1630,171000000,209000
7+
https://creativecommons.org/licenses/nc-sa/1.0,24700000,17600000,12600,3800,75300,11600,13400,22600000,67700,272000,1200,24700000,42400
8+
https://creativecommons.org/licenses/nd-nc/2.0,56900000,43700000,43500,11100,156000,36500,28300,54200000,237000,549000,2660,67600000,11500
9+
https://creativecommons.org/licenses/by-nd-nc/1.0,16000000,10400000,13500,2900,64700,9990,21300,15500000,63400,28800,1920,16000000,27600
10+
https://creativecommons.org/licenses/sa/1.0,108000000,91500000,23000,17200,158000,38200,21700,104000000,270000,337000,28600,108000000,224000
11+
https://creativecommons.org/licenses/nd-nc/1.0,57200000,45500000,23100,8400,123000,21400,30000,56100000,135000,249000,3340,57200000,10200
12+
https://creativecommons.org/publicdomain/zero/1.0,32700000,30000000,20200,18200,48600,12100,32200,30900000,131000,93400,16100,32700000,10500
13+
https://creativecommons.org/licenses/nc-sa/2.0,25000000,17700000,19700,4640,80700,14700,11900,23400000,368000,1340000,1490,25000000,36100
14+
https://creativecommons.org/licenses/sa/1.0,108000000,91400000,23000,17100,159000,38200,21600,104000000,270000,337000,28600,108000000,224000
15+
https://creativecommons.org/licenses/by-sa/1.0,102000000,85900000,21900,15800,150000,36100,21200,97400000,267000,330000,28300,102000000,219000
16+
https://creativecommons.org/licenses/by-nd/2.5,49500000,35500000,45100,8640,133000,24600,23600,47800000,367000,37900,2200,49500000,11200
17+
https://creativecommons.org/licenses/by-nd-nc/1.0,15800000,10300000,15600,2940,66800,10200,21400,15300000,62400,29000,1980,15800000,27500
18+
https://creativecommons.org/licenses/nd/1.0,220000000,194000000,89900,56500,424000,147000,88400,213000000,490000,297000,69500,220000000,2570000
19+
https://creativecommons.org/licenses/by-nc-sa/4.0,62900000,44700000,49900,32900,163000,32900,79400,58900000,1020000,3610000,11500,62900000,102000
20+
https://creativecommons.org/licenses/sampling+/1.0,169000000,151000000,27300,22900,274000,57300,34700,168000000,123000,62800,1310,169000000,200000
21+
https://creativecommons.org/licenses/by-nc-sa/2.5,31600000,22800000,20100,5400,83100,15200,10400,29900000,258000,250000,1570,31600000,7290
22+
https://creativecommons.org/licenses/nc-sa/1.0,24100000,17500000,10400,3820,58800,11800,10600,23300000,67500,280000,1200,24100000,42400
54.9 KB
Loading
56 KB
Loading
81.1 KB
Loading

0 commit comments

Comments
 (0)