Skip to content

Update CKAN Extensions Metadata #43

Update CKAN Extensions Metadata

Update CKAN Extensions Metadata #43

name: Update CKAN Extensions Metadata
on:
schedule:
# Runs every Sunday at 2:00 AM UTC
- cron: '0 2 * * 0'
workflow_dispatch:
# Allows manual triggering from GitHub UI
env:
GITHUB_TOKEN: ${{ secrets.GH_METADATA_TOKEN }}
CKAN_API_KEY: ${{ secrets.CKAN_API_KEY }}
jobs:
update-metadata:
runs-on: self-hosted
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Step 1 - Extract GitHub URLs from CKAN
run: |
echo "=== STEP 1: Extracting GitHub URLs from CKAN ==="
python 1getURL.py
echo "✓ URL extraction completed"
# Check if output file was created
if [ -f "url_list.csv" ]; then
echo "✓ url_list.csv created successfully"
wc -l url_list.csv
else
echo "✗ url_list.csv not found"
exit 1
fi
- name: Step 2 - Fetch Dynamic Metadata from GitHub
run: |
echo "=== STEP 2: Fetching dynamic metadata from GitHub ==="
python 2refresh.py
echo "✓ Metadata extraction completed"
# Check if output file was created
if [ -f "dynamic_metadata_update.csv" ]; then
echo "✓ dynamic_metadata_update.csv created successfully"
wc -l dynamic_metadata_update.csv
else
echo "✗ dynamic_metadata_update.csv not found"
exit 1
fi
- name: Step 3 - Update CKAN Catalog with Metadata
run: |
echo "=== STEP 3: Updating CKAN catalog with metadata ==="
python 3updateCatalog.py
echo "✓ CKAN catalog update completed"
- name: Step 3.1 - Download Existing CSV from CKAN
run: |
echo "=== STEP 3.1: Downloading existing CSV from CKAN ==="
python 31download.py existing_metadata.csv
echo "✓ CSV download completed"
# Check if download was successful
if [ -f "existing_metadata.csv" ]; then
echo "✓ existing_metadata.csv downloaded successfully"
wc -l existing_metadata.csv
else
echo "✗ existing_metadata.csv not found"
exit 1
fi
- name: Step 3.2 - Merge CSVs
run: |
echo "=== STEP 3.2: Merging existing and new CSV data ==="
python 32merger.py existing_metadata.csv dynamic_metadata_update.csv dynamic_metadata_update.csv
echo "✓ CSV merge completed"
# Check if merge was successful (32merger.py overwrites dynamic_metadata_update.csv)
if [ -f "dynamic_metadata_update.csv" ]; then
echo "✓ dynamic_metadata_update.csv merged successfully"
wc -l dynamic_metadata_update.csv
else
echo "✗ dynamic_metadata_update.csv not found after merge"
exit 1
fi
- name: Step 3.3 - Delete Old Resource from CKAN
run: |
echo "=== STEP 3.3: Deleting old resource from CKAN ==="
python 33delete.py
echo "✓ Old resource deletion completed"
- name: Step 4 - Upload New CSV to CKAN Dataset
run: |
echo "=== STEP 4: Uploading merged CSV to CKAN dataset ==="
# The upload script will use dynamic_metadata_update.csv (which is now merged)
python 4uploadDataset.py
echo "✓ CSV upload completed"
- name: Upload artifacts on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: debug-files
path: |
*.csv
*.log
retention-days: 7
- name: Upload generated CSVs as artifacts
if: success()
uses: actions/upload-artifact@v4
with:
name: metadata-csv-files
path: |
dynamic_metadata_update.csv
existing_metadata.csv
retention-days: 30
- name: Workflow Summary
if: always()
run: |
echo "=== WORKFLOW SUMMARY ==="
echo "Status: ${{ job.status }}"
echo "Timestamp: $(date -u)"
# Show file sizes if they exist
for file in url_list.csv dynamic_metadata_update.csv existing_metadata.csv; do
if [ -f "$file" ]; then
echo "$file: $(wc -l < "$file") lines, $(du -h "$file" | cut -f1)"
fi
done
# Show any log files
if ls *.log >/dev/null 2>&1; then
echo "Log files created:"
ls -la *.log
fi