Run Pipelines #351
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Run Pipelines | |
| on: | |
| schedule: | |
| # Run at 23:00 UTC daily | |
| - cron: "0 23 * * *" | |
| workflow_dispatch: # Allow manual trigger | |
| inputs: | |
| force_ingest: | |
| description: "Force reingest of data" | |
| type: boolean | |
| default: false | |
| required: false | |
| force_process: | |
| description: "Force reprocessing/re-export of data" | |
| type: boolean | |
| default: false | |
| required: false | |
| force_summaries: | |
| description: "Force regeneration of summaries" | |
| type: boolean | |
| default: false | |
| required: false | |
| run_untracked_repos: | |
| description: "Run untracked repos discovery" | |
| type: boolean | |
| default: false | |
| required: false | |
| summary_types_to_run: | |
| description: "Comma-separated list of summary types to run (repository,overall,contributors)" | |
| type: string | |
| default: "repository,overall,contributors" | |
| summary_intervals_to_run: | |
| description: "Comma-separated list of intervals to run (daily,weekly,monthly)" | |
| type: string | |
| default: "daily,weekly,monthly" | |
| startDate: | |
| description: "Start date for data processing (format: YYYY-MM-DD)" | |
| type: string | |
| required: false | |
| endDate: | |
| description: "End date for data processing (format: YYYY-MM-DD)" | |
| type: string | |
| required: false | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} | |
| PIPELINE_DATA_BRANCH: "_data" | |
| DATA_DIR: "data" | |
| # ============================================================================= | |
| # FORK CONFIGURATION | |
| # ============================================================================= | |
| # Config is loaded from JSON file. Priority: PIPELINE_CONFIG_FILE → config/config.json | |
| # | |
| # For forks, you have 2 options: | |
| # 1. Create config/config.json (copy from config.example.json) - keeps this file unchanged | |
| # 2. Create your own named config (e.g., config/myorg.json) and update PIPELINE_CONFIG_FILE below | |
| # | |
| # Either option = zero merge conflicts when syncing with upstream | |
| # ============================================================================= | |
| PIPELINE_CONFIG_FILE: ${{ secrets.PIPELINE_CONFIG_FILE || 'config/example.json' }} | |
| jobs: | |
| ingest-export: | |
| name: Ingest/Export Pipeline | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 60 | |
| permissions: | |
| contents: write # Needed for pushing to branches | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| - name: Setup Bun | |
| uses: oven-sh/setup-bun@v2 | |
| with: | |
| bun-version: latest | |
| - name: Install dependencies | |
| run: bun install | |
| # Set common conditional variables | |
| - name: Set conditional variables | |
| id: set-vars | |
| run: | | |
| START_DATE_ARG="${{ github.event_name == 'workflow_dispatch' && github.event.inputs.startDate != '' && format(' -a {0}', github.event.inputs.startDate) || '' }}" | |
| END_DATE_ARG="${{ github.event_name == 'workflow_dispatch' && github.event.inputs.endDate != '' && format(' -b {0}', github.event.inputs.endDate) || '' }}" | |
| FORCE_INGEST_ARG="${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force_ingest == 'true' && ' -f' || '' }}" | |
| FORCE_PROCESS_ARG="${{ github.event_name == 'workflow_dispatch' && github.event.inputs.force_process == 'true' && ' -f' || '' }}" | |
| echo "start_date_arg=$START_DATE_ARG" >> $GITHUB_ENV | |
| echo "end_date_arg=$END_DATE_ARG" >> $GITHUB_ENV | |
| echo "force_ingest_arg=$FORCE_INGEST_ARG" >> $GITHUB_ENV | |
| echo "force_process_arg=$FORCE_PROCESS_ARG" >> $GITHUB_ENV | |
| # Set up pipeline-data branch worktree | |
| - name: Setup pipeline-data branch | |
| uses: ./.github/actions/pipeline-data | |
| with: | |
| operation: setup | |
| branch_name: ${{ env.PIPELINE_DATA_BRANCH }} | |
| data_dir: ${{ env.DATA_DIR }} | |
| # Restore database from pipeline-data branch | |
| - name: Restore database | |
| uses: ./.github/actions/restore-db | |
| with: | |
| operation: restore | |
| dump_dir: ${{ env.DATA_DIR }}/dump | |
| db_path: ${{ env.DATA_DIR }}/db.sqlite | |
| - name: Run ingest pipeline | |
| run: bun run pipeline ingest${{ env.force_ingest_arg }}${{ env.start_date_arg }}${{ env.end_date_arg }} | |
| # Ingest untracked repos weekly (on Sundays) or on manual request | |
| - name: Run ingest-untracked pipeline | |
| if: github.event.inputs.run_untracked_repos == 'true' || (github.event_name == 'schedule') | |
| run: | | |
| DAY_OF_WEEK=$(date +%u) | |
| # Run on Sundays for scheduled runs, or always for manual dispatch with flag | |
| if [ "${{ github.event.inputs.run_untracked_repos }}" = "true" ] || [ "$DAY_OF_WEEK" = "7" ]; then | |
| if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then | |
| # Manual run: fail on error (no fallback) | |
| bun run pipeline ingest-untracked | |
| else | |
| # Scheduled run: log warning and continue | |
| bun run pipeline ingest-untracked || echo "::warning::Untracked repos ingestion failed (scheduled run, continuing)" | |
| fi | |
| fi | |
| - name: Run process pipeline | |
| run: bun run pipeline process${{ env.force_process_arg }} | |
| - name: Run export pipeline # Export everything missing + overwrite last 2 days to ensure overlap | |
| run: | | |
| bun run pipeline export${{ env.start_date_arg }}${{ env.end_date_arg }}${{ env.force_process_arg }} | |
| bun run pipeline export --days 2 -f | |
| # Dump SQLite database to diffable files before updating pipeline-data branch | |
| - name: Dump SQLite database | |
| uses: ./.github/actions/restore-db | |
| with: | |
| operation: dump | |
| db_path: ${{ env.DATA_DIR }}/db.sqlite | |
| dump_dir: ${{ env.DATA_DIR }}/dump | |
| # Update pipeline-data branch with new data | |
| - name: Update pipeline-data branch | |
| uses: ./.github/actions/pipeline-data | |
| with: | |
| operation: update | |
| data_dir: ${{ env.DATA_DIR }} | |
| commit_message: "Ingest/export run: $(date -u +'%Y-%m-%d %H:%M')" | |
| branch_name: ${{ env.PIPELINE_DATA_BRANCH }} | |
| # Cleanup worktree (always runs) | |
| - name: Cleanup | |
| if: always() | |
| uses: ./.github/actions/pipeline-data | |
| with: | |
| operation: cleanup | |
| data_dir: ${{ env.DATA_DIR }} | |
| branch_name: ${{ env.PIPELINE_DATA_BRANCH }} | |
| generate-summaries: | |
| name: Generate Summaries | |
| needs: ingest-export | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| permissions: | |
| contents: write # Needed for pushing to branches | |
| # Skip summary generation if all summary types are disabled in a manual run | |
| if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.summary_types_to_run != '' }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v6 | |
| - name: Setup Bun | |
| uses: oven-sh/setup-bun@v2 | |
| with: | |
| bun-version: latest | |
| - name: Install dependencies | |
| run: bun install --frozen-lockfile | |
| # Set up pipeline-data branch worktree | |
| - name: Setup pipeline-data branch | |
| uses: ./.github/actions/pipeline-data | |
| with: | |
| operation: setup | |
| branch_name: ${{ env.PIPELINE_DATA_BRANCH }} | |
| data_dir: ${{ env.DATA_DIR }} | |
| # Restore database from pipeline-data branch | |
| - name: Restore database | |
| uses: ./.github/actions/restore-db | |
| with: | |
| operation: restore | |
| dump_dir: ${{ env.DATA_DIR }}/dump | |
| db_path: ${{ env.DATA_DIR }}/db.sqlite | |
| # Import any markdown summaries that exist but are missing from database | |
| - name: Import missing summaries from markdown files | |
| run: | | |
| bun run pipeline import-summaries --from-github --interval month | |
| bun run pipeline import-summaries --from-github --interval week | |
| bun run pipeline import-summaries --from-github --interval day | |
| # Determine which intervals and types to run | |
| - name: Set run conditions | |
| id: conditions | |
| run: | | |
| DAY_OF_WEEK=$(date +%u) # 1=Mon, 7=Sun | |
| # Scheduled runs | |
| if [ "${{ github.event_name }}" == "schedule" ]; then | |
| echo "RUN_DAILY=true" >> $GITHUB_OUTPUT | |
| if [ "$DAY_OF_WEEK" = "3" ] || [ "$DAY_OF_WEEK" = "6" ]; then | |
| echo "RUN_WEEKLY=true" >> $GITHUB_OUTPUT | |
| fi | |
| if [ "$DAY_OF_WEEK" = "7" ]; then | |
| echo "RUN_MONTHLY=true" >> $GITHUB_OUTPUT | |
| echo "RUN_CONTRIBUTORS=true" >> $GITHUB_OUTPUT | |
| fi | |
| echo "RUN_REPOSITORIES=true" >> $GITHUB_OUTPUT | |
| echo "RUN_OVERALL=true" >> $GITHUB_OUTPUT | |
| # Manual runs | |
| else | |
| SUMMARY_TYPES="${{ github.event.inputs.summary_types_to_run }}" | |
| SUMMARY_INTERVALS="${{ github.event.inputs.summary_intervals_to_run }}" | |
| if [[ "$SUMMARY_INTERVALS" == *'daily'* ]]; then echo "RUN_DAILY=true"; else echo "RUN_DAILY=false"; fi >> $GITHUB_OUTPUT | |
| if [[ "$SUMMARY_INTERVALS" == *'weekly'* ]]; then echo "RUN_WEEKLY=true"; else echo "RUN_WEEKLY=false"; fi >> $GITHUB_OUTPUT | |
| if [[ "$SUMMARY_INTERVALS" == *'monthly'* ]]; then echo "RUN_MONTHLY=true"; else echo "RUN_MONTHLY=false"; fi >> $GITHUB_OUTPUT | |
| if [[ "$SUMMARY_TYPES" == *'contributors'* ]]; then echo "RUN_CONTRIBUTORS=true"; else echo "RUN_CONTRIBUTORS=false"; fi >> $GITHUB_OUTPUT | |
| if [[ "$SUMMARY_TYPES" == *'repository'* ]]; then echo "RUN_REPOSITORIES=true"; else echo "RUN_REPOSITORIES=false"; fi >> $GITHUB_OUTPUT | |
| if [[ "$SUMMARY_TYPES" == *'overall'* ]]; then echo "RUN_OVERALL=true"; else echo "RUN_OVERALL=false"; fi >> $GITHUB_OUTPUT | |
| fi | |
| # Repository summaries must run BEFORE overall summaries | |
| # because overall summaries aggregate repository summaries | |
| - name: Run Repository Summaries | |
| if: steps.conditions.outputs.RUN_REPOSITORIES == 'true' | |
| uses: ./.github/actions/run-summary | |
| with: | |
| summary-type: "repository" | |
| daily: ${{ steps.conditions.outputs.RUN_DAILY }} | |
| weekly: ${{ steps.conditions.outputs.RUN_WEEKLY }} | |
| monthly: ${{ steps.conditions.outputs.RUN_MONTHLY }} | |
| force: ${{ github.event.inputs.force_summaries }} | |
| start-date: ${{ github.event.inputs.startDate }} | |
| end-date: ${{ github.event.inputs.endDate }} | |
| - name: Run Contributor Summaries | |
| if: steps.conditions.outputs.RUN_CONTRIBUTORS == 'true' | |
| uses: ./.github/actions/run-summary | |
| with: | |
| summary-type: "contributors" | |
| daily: ${{ steps.conditions.outputs.RUN_DAILY }} | |
| weekly: ${{ steps.conditions.outputs.RUN_WEEKLY }} | |
| monthly: ${{ steps.conditions.outputs.RUN_MONTHLY }} | |
| force: ${{ github.event.inputs.force_summaries }} | |
| start-date: ${{ github.event.inputs.startDate }} | |
| end-date: ${{ github.event.inputs.endDate }} | |
| - name: Run Overall Summaries | |
| if: steps.conditions.outputs.RUN_OVERALL == 'true' | |
| uses: ./.github/actions/run-summary | |
| with: | |
| summary-type: "overall" | |
| daily: ${{ steps.conditions.outputs.RUN_DAILY }} | |
| weekly: ${{ steps.conditions.outputs.RUN_WEEKLY }} | |
| monthly: ${{ steps.conditions.outputs.RUN_MONTHLY }} | |
| force: ${{ github.event.inputs.force_summaries }} | |
| start-date: ${{ github.event.inputs.startDate }} | |
| end-date: ${{ github.event.inputs.endDate }} | |
| # Force regenerate steps also need correct order: repo -> contributor -> overall | |
| - name: Force regenerate recent repository summaries | |
| if: github.event_name == 'schedule' && steps.conditions.outputs.RUN_REPOSITORIES == 'true' | |
| uses: ./.github/actions/run-summary | |
| with: | |
| summary-type: "repository" | |
| daily: "true" | |
| force: "true" | |
| days: 1 | |
| - name: Force regenerate recent contributor summaries | |
| if: github.event_name == 'schedule' && steps.conditions.outputs.RUN_CONTRIBUTORS == 'true' | |
| uses: ./.github/actions/run-summary | |
| with: | |
| summary-type: "contributors" | |
| daily: "true" | |
| force: "true" | |
| days: 1 | |
| - name: Force regenerate recent overall summaries | |
| if: github.event_name == 'schedule' | |
| uses: ./.github/actions/run-summary | |
| with: | |
| summary-type: "overall" | |
| daily: "true" | |
| force: "true" | |
| days: 1 | |
| # Re-export stats files to include contributor summaries | |
| - name: Re-export stats with contributor summaries | |
| run: bun run pipeline export${{ env.start_date_arg }}${{ env.end_date_arg }}${{ github.event_name == 'schedule' && ' --days 2' || '' }} -f | |
| # Determine site URL for API exports | |
| - name: Determine site URL | |
| id: site-config | |
| run: | | |
| REPO_NAME="${{ github.event.repository.name }}" | |
| OWNER="${{ github.repository_owner }}" | |
| # Auto-detect site URL based on repo type | |
| if [[ "$REPO_NAME" == *.github.io ]]; then | |
| echo "site_url=https://${OWNER}.github.io" >> $GITHUB_OUTPUT | |
| else | |
| echo "site_url=https://${OWNER}.github.io/${REPO_NAME}" >> $GITHUB_OUTPUT | |
| fi | |
| # Export JSON API files | |
| - name: Export leaderboard JSON files | |
| run: bun run pipeline export-leaderboard | |
| env: | |
| SITE_URL: ${{ steps.site-config.outputs.site_url }} | |
| - name: Export summaries to JSON API | |
| run: bun run pipeline export-summaries | |
| env: | |
| SITE_URL: ${{ steps.site-config.outputs.site_url }} | |
| # Dump SQLite database to diffable files | |
| - name: Dump SQLite database | |
| uses: ./.github/actions/restore-db | |
| with: | |
| operation: dump | |
| db_path: ${{ env.DATA_DIR }}/db.sqlite | |
| dump_dir: ${{ env.DATA_DIR }}/dump | |
| # Update pipeline-data branch with new summaries | |
| - name: Update pipeline-data branch with summaries | |
| uses: ./.github/actions/pipeline-data | |
| with: | |
| operation: update | |
| data_dir: ${{ env.DATA_DIR }} | |
| commit_message: "Summary generation run: $(date -u +'%Y-%m-%d %H:%M')" | |
| branch_name: ${{ env.PIPELINE_DATA_BRANCH }} | |
| # Cleanup worktree (always runs) | |
| - name: Cleanup | |
| if: always() | |
| uses: ./.github/actions/pipeline-data | |
| with: | |
| operation: cleanup | |
| data_dir: ${{ env.DATA_DIR }} | |
| branch_name: ${{ env.PIPELINE_DATA_BRANCH }} |