Skip to content

Commit b9ea9d4

Browse files
committed
workflow file
1 parent b478cc0 commit b9ea9d4

File tree

1 file changed

+61
-131
lines changed

1 file changed

+61
-131
lines changed
Lines changed: 61 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -1,149 +1,79 @@
1-
name: Update CKAN Extensions Metadata
1+
name: Update CKAN Metadata
2+
23
on:
34
schedule:
4-
# Runs every Sunday at 2:00 AM UTC
5-
- cron: '0 2 * * 0'
6-
workflow_dispatch:
7-
# Allows manual triggering from GitHub UI
5+
# Run every Sunday at 4 AM UTC
6+
- cron: '0 4 * * 0'
7+
workflow_dispatch: # Allow manual triggering
8+
push:
9+
branches:
10+
- main
11+
paths:
12+
- 'sites-workflow/**'
13+
- 'Dockerfile'
14+
- 'docker-entrypoint.sh'
15+
- 'requirements.txt'
816

917
env:
10-
GITHUB_TOKEN: ${{ secrets.GH_METADATA_TOKEN }}
11-
CKAN_API_KEY: ${{ secrets.CKAN_API_KEY }}
18+
DOCKER_IMAGE: dathere/ckan-metadata-workflow
19+
DOCKER_TAG: latest
1220

1321
jobs:
14-
update-metadata:
22+
build-and-push:
1523
runs-on: ubuntu-latest
24+
outputs:
25+
image-digest: ${{ steps.build.outputs.digest }}
1626

1727
steps:
1828
- name: Checkout repository
1929
uses: actions/checkout@v4
20-
21-
- name: Set up Python
22-
uses: actions/setup-python@v4
30+
31+
- name: Set up Docker Buildx
32+
uses: docker/setup-buildx-action@v3
33+
34+
- name: Login to Docker Hub
35+
uses: docker/login-action@v3
2336
with:
24-
python-version: '3.9'
25-
cache: 'pip'
26-
27-
- name: Install dependencies
28-
run: |
29-
python -m pip install --upgrade pip
30-
pip install -r requirements.txt
31-
32-
- name: Step 1 - Extract GitHub URLs from CKAN
33-
run: |
34-
echo "=== STEP 1: Extracting GitHub URLs from CKAN ==="
35-
python 1getURL.py
36-
echo "✓ URL extraction completed"
37-
38-
# Check if output file was created
39-
if [ -f "url_list.csv" ]; then
40-
echo "✓ url_list.csv created successfully"
41-
wc -l url_list.csv
42-
else
43-
echo "✗ url_list.csv not found"
44-
exit 1
45-
fi
46-
47-
- name: Step 2 - Fetch Dynamic Metadata from GitHub
48-
run: |
49-
echo "=== STEP 2: Fetching dynamic metadata from GitHub ==="
50-
python 2refresh.py
51-
echo "✓ Metadata extraction completed"
52-
53-
# Check if output file was created
54-
if [ -f "dynamic_metadata_update.csv" ]; then
55-
echo "✓ dynamic_metadata_update.csv created successfully"
56-
wc -l dynamic_metadata_update.csv
57-
else
58-
echo "✗ dynamic_metadata_update.csv not found"
59-
exit 1
60-
fi
61-
62-
- name: Step 3 - Update CKAN Catalog with Metadata
63-
run: |
64-
echo "=== STEP 3: Updating CKAN catalog with metadata ==="
65-
python 3updateCatalog.py
66-
echo "✓ CKAN catalog update completed"
67-
68-
- name: Step 3.1 - Download Existing CSV from CKAN
69-
run: |
70-
echo "=== STEP 3.1: Downloading existing CSV from CKAN ==="
71-
python 31download.py existing_metadata.csv
72-
echo "✓ CSV download completed"
73-
74-
# Check if download was successful
75-
if [ -f "existing_metadata.csv" ]; then
76-
echo "✓ existing_metadata.csv downloaded successfully"
77-
wc -l existing_metadata.csv
78-
else
79-
echo "✗ existing_metadata.csv not found"
80-
exit 1
81-
fi
82-
83-
- name: Step 3.2 - Merge CSVs
84-
run: |
85-
echo "=== STEP 3.2: Merging existing and new CSV data ==="
86-
python 32merger.py existing_metadata.csv dynamic_metadata_update.csv dynamic_metadata_update.csv
87-
echo "✓ CSV merge completed"
88-
89-
# Check if merge was successful (32merger.py overwrites dynamic_metadata_update.csv)
90-
if [ -f "dynamic_metadata_update.csv" ]; then
91-
echo "✓ dynamic_metadata_update.csv merged successfully"
92-
wc -l dynamic_metadata_update.csv
93-
else
94-
echo "✗ dynamic_metadata_update.csv not found after merge"
95-
exit 1
96-
fi
97-
98-
- name: Step 3.3 - Delete Old Resource from CKAN
99-
run: |
100-
echo "=== STEP 3.3: Deleting old resource from CKAN ==="
101-
python 33delete.py
102-
echo "✓ Old resource deletion completed"
37+
username: ${{ secrets.DOCKER_USERNAME }}
38+
password: ${{ secrets.DOCKER_PASSWORD }}
39+
40+
- name: Build and push Docker image
41+
id: build
42+
uses: docker/build-push-action@v5
43+
with:
44+
context: .
45+
file: ./Dockerfile
46+
push: true
47+
tags: ${{ env.DOCKER_IMAGE }}:${{ env.DOCKER_TAG }}
48+
platforms: linux/amd64,linux/arm64
49+
cache-from: type=gha
50+
cache-to: type=gha,mode=max
51+
52+
run-workflow:
53+
needs: build-and-push
54+
runs-on: ubuntu-latest
10355

104-
- name: Step 4 - Upload New CSV to CKAN Dataset
56+
steps:
57+
- name: Run CKAN metadata workflow
10558
run: |
106-
echo "=== STEP 4: Uploading merged CSV to CKAN dataset ==="
107-
# The upload script will use dynamic_metadata_update.csv (which is now merged)
108-
python 4uploadDataset.py
109-
echo "✓ CSV upload completed"
110-
111-
- name: Upload artifacts on failure
112-
if: failure()
59+
docker run --rm \
60+
-e CKAN_API_KEY="${{ secrets.CKAN_API_KEY }}" \
61+
-v ${{ github.workspace }}/data:/app/data \
62+
-v ${{ github.workspace }}/logs:/app/logs \
63+
${{ env.DOCKER_IMAGE }}:${{ env.DOCKER_TAG }}
64+
65+
- name: Upload workflow logs
11366
uses: actions/upload-artifact@v4
67+
if: always()
11468
with:
115-
name: debug-files
116-
path: |
117-
*.csv
118-
*.log
119-
retention-days: 7
120-
121-
- name: Upload generated CSVs as artifacts
122-
if: success()
69+
name: workflow-logs-${{ github.run_number }}
70+
path: logs/
71+
retention-days: 30
72+
73+
- name: Upload generated data
12374
uses: actions/upload-artifact@v4
75+
if: success()
12476
with:
125-
name: metadata-csv-files
126-
path: |
127-
dynamic_metadata_update.csv
128-
existing_metadata.csv
129-
retention-days: 30
130-
131-
- name: Workflow Summary
132-
if: always()
133-
run: |
134-
echo "=== WORKFLOW SUMMARY ==="
135-
echo "Status: ${{ job.status }}"
136-
echo "Timestamp: $(date -u)"
137-
138-
# Show file sizes if they exist
139-
for file in url_list.csv dynamic_metadata_update.csv existing_metadata.csv; do
140-
if [ -f "$file" ]; then
141-
echo "$file: $(wc -l < "$file") lines, $(du -h "$file" | cut -f1)"
142-
fi
143-
done
144-
145-
# Show any log files
146-
if ls *.log >/dev/null 2>&1; then
147-
echo "Log files created:"
148-
ls -la *.log
149-
fi
77+
name: generated-data-${{ github.run_number }}
78+
path: data/
79+
retention-days: 7

0 commit comments

Comments
 (0)