1- name : Update CKAN Extensions Metadata
1+ name : Update CKAN Metadata
2+
23on :
34 schedule :
4- # Runs every Sunday at 2:00 AM UTC
5- - cron : ' 0 2 * * 0'
6- workflow_dispatch :
7- # Allows manual triggering from GitHub UI
5+ # Run every Sunday at 4 AM UTC
6+ - cron : ' 0 4 * * 0'
7+ workflow_dispatch : # Allow manual triggering
8+ push :
9+ branches :
10+ - main
11+ paths :
12+ - ' sites-workflow/**'
13+ - ' Dockerfile'
14+ - ' docker-entrypoint.sh'
15+ - ' requirements.txt'
816
917env :
10- GITHUB_TOKEN : ${{ secrets.GH_METADATA_TOKEN }}
11- CKAN_API_KEY : ${{ secrets.CKAN_API_KEY }}
18+ DOCKER_IMAGE : dathere/ckan-metadata-workflow
19+ DOCKER_TAG : latest
1220
1321jobs :
14- update-metadata :
22+ build-and-push :
1523 runs-on : ubuntu-latest
24+ outputs :
25+ image-digest : ${{ steps.build.outputs.digest }}
1626
1727 steps :
1828 - name : Checkout repository
1929 uses : actions/checkout@v4
20-
21- - name : Set up Python
22- uses : actions/setup-python@v4
30+
31+ - name : Set up Docker Buildx
32+ uses : docker/setup-buildx-action@v3
33+
34+ - name : Login to Docker Hub
35+ uses : docker/login-action@v3
2336 with :
24- python-version : ' 3.9'
25- cache : ' pip'
26-
27- - name : Install dependencies
28- run : |
29- python -m pip install --upgrade pip
30- pip install -r requirements.txt
31-
32- - name : Step 1 - Extract GitHub URLs from CKAN
33- run : |
34- echo "=== STEP 1: Extracting GitHub URLs from CKAN ==="
35- python 1getURL.py
36- echo "✓ URL extraction completed"
37-
38- # Check if output file was created
39- if [ -f "url_list.csv" ]; then
40- echo "✓ url_list.csv created successfully"
41- wc -l url_list.csv
42- else
43- echo "✗ url_list.csv not found"
44- exit 1
45- fi
46-
47- - name : Step 2 - Fetch Dynamic Metadata from GitHub
48- run : |
49- echo "=== STEP 2: Fetching dynamic metadata from GitHub ==="
50- python 2refresh.py
51- echo "✓ Metadata extraction completed"
52-
53- # Check if output file was created
54- if [ -f "dynamic_metadata_update.csv" ]; then
55- echo "✓ dynamic_metadata_update.csv created successfully"
56- wc -l dynamic_metadata_update.csv
57- else
58- echo "✗ dynamic_metadata_update.csv not found"
59- exit 1
60- fi
61-
62- - name : Step 3 - Update CKAN Catalog with Metadata
63- run : |
64- echo "=== STEP 3: Updating CKAN catalog with metadata ==="
65- python 3updateCatalog.py
66- echo "✓ CKAN catalog update completed"
67-
68- - name : Step 3.1 - Download Existing CSV from CKAN
69- run : |
70- echo "=== STEP 3.1: Downloading existing CSV from CKAN ==="
71- python 31download.py existing_metadata.csv
72- echo "✓ CSV download completed"
73-
74- # Check if download was successful
75- if [ -f "existing_metadata.csv" ]; then
76- echo "✓ existing_metadata.csv downloaded successfully"
77- wc -l existing_metadata.csv
78- else
79- echo "✗ existing_metadata.csv not found"
80- exit 1
81- fi
82-
83- - name : Step 3.2 - Merge CSVs
84- run : |
85- echo "=== STEP 3.2: Merging existing and new CSV data ==="
86- python 32merger.py existing_metadata.csv dynamic_metadata_update.csv dynamic_metadata_update.csv
87- echo "✓ CSV merge completed"
88-
89- # Check if merge was successful (32merger.py overwrites dynamic_metadata_update.csv)
90- if [ -f "dynamic_metadata_update.csv" ]; then
91- echo "✓ dynamic_metadata_update.csv merged successfully"
92- wc -l dynamic_metadata_update.csv
93- else
94- echo "✗ dynamic_metadata_update.csv not found after merge"
95- exit 1
96- fi
97-
98- - name : Step 3.3 - Delete Old Resource from CKAN
99- run : |
100- echo "=== STEP 3.3: Deleting old resource from CKAN ==="
101- python 33delete.py
102- echo "✓ Old resource deletion completed"
37+ username : ${{ secrets.DOCKER_USERNAME }}
38+ password : ${{ secrets.DOCKER_PASSWORD }}
39+
40+ - name : Build and push Docker image
41+ id : build
42+ uses : docker/build-push-action@v5
43+ with :
44+ context : .
45+ file : ./Dockerfile
46+ push : true
47+ tags : ${{ env.DOCKER_IMAGE }}:${{ env.DOCKER_TAG }}
48+ platforms : linux/amd64,linux/arm64
49+ cache-from : type=gha
50+ cache-to : type=gha,mode=max
51+
52+ run-workflow :
53+ needs : build-and-push
54+ runs-on : ubuntu-latest
10355
104- - name : Step 4 - Upload New CSV to CKAN Dataset
56+ steps :
57+ - name : Run CKAN metadata workflow
10558 run : |
106- echo "=== STEP 4: Uploading merged CSV to CKAN dataset ==="
107- # The upload script will use dynamic_metadata_update.csv (which is now merged)
108- python 4uploadDataset.py
109- echo "✓ CSV upload completed"
110-
111- - name : Upload artifacts on failure
112- if : failure()
59+ docker run --rm \
60+ -e CKAN_API_KEY="${{ secrets.CKAN_API_KEY }}" \
61+ -v ${{ github.workspace }}/data:/app/data \
62+ -v ${{ github.workspace }}/logs:/app/logs \
63+ ${{ env.DOCKER_IMAGE }}:${{ env.DOCKER_TAG }}
64+
65+ - name : Upload workflow logs
11366 uses : actions/upload-artifact@v4
67+ if : always()
11468 with :
115- name : debug-files
116- path : |
117- *.csv
118- *.log
119- retention-days : 7
120-
121- - name : Upload generated CSVs as artifacts
122- if : success()
69+ name : workflow-logs-${{ github.run_number }}
70+ path : logs/
71+ retention-days : 30
72+
73+ - name : Upload generated data
12374 uses : actions/upload-artifact@v4
75+ if : success()
12476 with :
125- name : metadata-csv-files
126- path : |
127- dynamic_metadata_update.csv
128- existing_metadata.csv
129- retention-days : 30
130-
131- - name : Workflow Summary
132- if : always()
133- run : |
134- echo "=== WORKFLOW SUMMARY ==="
135- echo "Status: ${{ job.status }}"
136- echo "Timestamp: $(date -u)"
137-
138- # Show file sizes if they exist
139- for file in url_list.csv dynamic_metadata_update.csv existing_metadata.csv; do
140- if [ -f "$file" ]; then
141- echo "$file: $(wc -l < "$file") lines, $(du -h "$file" | cut -f1)"
142- fi
143- done
144-
145- # Show any log files
146- if ls *.log >/dev/null 2>&1; then
147- echo "Log files created:"
148- ls -la *.log
149- fi
77+ name : generated-data-${{ github.run_number }}
78+ path : data/
79+ retention-days : 7
0 commit comments