Datapump CKAN Sites Timeseries Metadata #13
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Datapump CKAN Sites Timeseries Metadata | |
| on: | |
| schedule: | |
| # Runs every Sunday at 3:00 AM UTC (1 hour after extensions workflow) | |
| - cron: '0 3 * * 0' | |
| workflow_dispatch: | |
| # Allows manual triggering from GitHub UI | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GH_METADATA_TOKEN }} | |
| CKAN_API_KEY: ${{ secrets.CKAN_API_KEY }} | |
| jobs: | |
| update-sites-metadata: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.11' | |
| cache: 'pip' | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r sites-workflow/requirements.txt | |
| - name: Step 1 - Extract Sites URLs from CKAN | |
| run: | | |
| echo "=== STEP 1: Extracting Sites URLs from CKAN ===" | |
| cd sites-workflow | |
| python 1getSitesURL.py | |
| echo "✓ Sites URL extraction completed" | |
| # Check if output file was created | |
| if [ -f "site_urls.csv" ]; then | |
| echo "✓ site_urls.csv created successfully" | |
| wc -l site_urls.csv | |
| else | |
| echo "✗ site_urls.csv not found" | |
| exit 1 | |
| fi | |
| - name: Step 2 - Fetch Sites Data via CKAN Action API | |
| run: | | |
| echo "=== STEP 2: Fetching sites data via CKAN Action API ===" | |
| cd sites-workflow | |
| python 2CKANActionAPI.py | |
| echo "✓ Sites data extraction completed" | |
| # Check if output file was created | |
| if [ -f "ckan_stats.csv" ]; then | |
| echo "✓ ckan_stats.csv created successfully" | |
| wc -l ckan_stats.csv | |
| else | |
| echo "✗ ckan_stats.csv not found" | |
| exit 1 | |
| fi | |
| - name: Step 3 - Update CKAN Sites Catalog | |
| run: | | |
| echo "=== STEP 3: Updating CKAN sites catalog ===" | |
| cd sites-workflow | |
| python 3updateSitesCatalog.py | |
| echo "✓ CKAN sites catalog update completed" | |
| - name: Step 4 - Append Time-Series Data to CKAN | |
| run: | | |
| echo "=== STEP 4: Appending time-series data to CKAN ===" | |
| cd sites-workflow | |
| python datapump.py ckan_stats.csv | |
| echo "✓ Time-series data append completed" | |
| - name: Upload artifacts on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sites-debug-files | |
| path: | | |
| sites-workflow/*.csv | |
| sites-workflow/*.log | |
| retention-days: 7 | |
| - name: Upload generated Sites CSVs as artifacts | |
| if: success() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sites-metadata-csv-files | |
| path: | | |
| sites-workflow/site_urls.csv | |
| sites-workflow/ckan_stats.csv | |
| sites-workflow/dynamic_metadata_update.csv | |
| sites-workflow/existing_sites_metadata.csv | |
| retention-days: 30 | |
| - name: Workflow Summary | |
| if: always() | |
| run: | | |
| echo "=== SITES WORKFLOW SUMMARY ===" | |
| echo "Status: ${{ job.status }}" | |
| echo "Timestamp: $(date -u)" | |
| # Show file sizes if they exist | |
| cd sites-workflow | |
| for file in site_urls.csv ckan_stats.csv dynamic_metadata_update.csv existing_sites_metadata.csv; do | |
| if [ -f "$file" ]; then | |
| echo "$file: $(wc -l < "$file") lines, $(du -h "$file" | cut -f1)" | |
| fi | |
| done | |
| # Show any log files | |
| if ls *.log >/dev/null 2>&1; then | |
| echo "Log files created:" | |
| ls -la *.log | |
| fi |