Update CKAN Extensions Metadata #43
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Update CKAN Extensions Metadata | |
| on: | |
| schedule: | |
| # Runs every Sunday at 2:00 AM UTC | |
| - cron: '0 2 * * 0' | |
| workflow_dispatch: | |
| # Allows manual triggering from GitHub UI | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_METADATA_TOKEN }} | |
| CKAN_API_KEY: ${{ secrets.CKAN_API_KEY }} | |
| jobs: | |
| update-metadata: | |
| runs-on: self-hosted | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.9' | |
| cache: 'pip' | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Step 1 - Extract GitHub URLs from CKAN | |
| run: | | |
| echo "=== STEP 1: Extracting GitHub URLs from CKAN ===" | |
| python 1getURL.py | |
| echo "✓ URL extraction completed" | |
| # Check if output file was created | |
| if [ -f "url_list.csv" ]; then | |
| echo "✓ url_list.csv created successfully" | |
| wc -l url_list.csv | |
| else | |
| echo "✗ url_list.csv not found" | |
| exit 1 | |
| fi | |
| - name: Step 2 - Fetch Dynamic Metadata from GitHub | |
| run: | | |
| echo "=== STEP 2: Fetching dynamic metadata from GitHub ===" | |
| python 2refresh.py | |
| echo "✓ Metadata extraction completed" | |
| # Check if output file was created | |
| if [ -f "dynamic_metadata_update.csv" ]; then | |
| echo "✓ dynamic_metadata_update.csv created successfully" | |
| wc -l dynamic_metadata_update.csv | |
| else | |
| echo "✗ dynamic_metadata_update.csv not found" | |
| exit 1 | |
| fi | |
| - name: Step 3 - Update CKAN Catalog with Metadata | |
| run: | | |
| echo "=== STEP 3: Updating CKAN catalog with metadata ===" | |
| python 3updateCatalog.py | |
| echo "✓ CKAN catalog update completed" | |
| - name: Step 3.1 - Download Existing CSV from CKAN | |
| run: | | |
| echo "=== STEP 3.1: Downloading existing CSV from CKAN ===" | |
| python 31download.py existing_metadata.csv | |
| echo "✓ CSV download completed" | |
| # Check if download was successful | |
| if [ -f "existing_metadata.csv" ]; then | |
| echo "✓ existing_metadata.csv downloaded successfully" | |
| wc -l existing_metadata.csv | |
| else | |
| echo "✗ existing_metadata.csv not found" | |
| exit 1 | |
| fi | |
| - name: Step 3.2 - Merge CSVs | |
| run: | | |
| echo "=== STEP 3.2: Merging existing and new CSV data ===" | |
| python 32merger.py existing_metadata.csv dynamic_metadata_update.csv dynamic_metadata_update.csv | |
| echo "✓ CSV merge completed" | |
| # Check if merge was successful (32merger.py overwrites dynamic_metadata_update.csv) | |
| if [ -f "dynamic_metadata_update.csv" ]; then | |
| echo "✓ dynamic_metadata_update.csv merged successfully" | |
| wc -l dynamic_metadata_update.csv | |
| else | |
| echo "✗ dynamic_metadata_update.csv not found after merge" | |
| exit 1 | |
| fi | |
| - name: Step 3.3 - Delete Old Resource from CKAN | |
| run: | | |
| echo "=== STEP 3.3: Deleting old resource from CKAN ===" | |
| python 33delete.py | |
| echo "✓ Old resource deletion completed" | |
| - name: Step 4 - Upload New CSV to CKAN Dataset | |
| run: | | |
| echo "=== STEP 4: Uploading merged CSV to CKAN dataset ===" | |
| # The upload script will use dynamic_metadata_update.csv (which is now merged) | |
| python 4uploadDataset.py | |
| echo "✓ CSV upload completed" | |
| - name: Upload artifacts on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: debug-files | |
| path: | | |
| *.csv | |
| *.log | |
| retention-days: 7 | |
| - name: Upload generated CSVs as artifacts | |
| if: success() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: metadata-csv-files | |
| path: | | |
| dynamic_metadata_update.csv | |
| existing_metadata.csv | |
| retention-days: 30 | |
| - name: Workflow Summary | |
| if: always() | |
| run: | | |
| echo "=== WORKFLOW SUMMARY ===" | |
| echo "Status: ${{ job.status }}" | |
| echo "Timestamp: $(date -u)" | |
| # Show file sizes if they exist | |
| for file in url_list.csv dynamic_metadata_update.csv existing_metadata.csv; do | |
| if [ -f "$file" ]; then | |
| echo "$file: $(wc -l < "$file") lines, $(du -h "$file" | cut -f1)" | |
| fi | |
| done | |
| # Show any log files | |
| if ls *.log >/dev/null 2>&1; then | |
| echo "Log files created:" | |
| ls -la *.log | |
| fi |