Scrape and Publish Latest Statement #550
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Scrape and Publish Latest Statement | |
| on: | |
| push: | |
| workflow_dispatch: | |
| schedule: | |
| - cron: '0 8 * * *' # daily 8:00 UTC | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: write | |
| jobs: | |
| scheduled: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check out this repo | |
| uses: actions/checkout@v4 | |
| - name: Check if today is a scrape day (T+1 after FOMC release) | |
| id: check | |
| run: | | |
| TODAY=$(date -u +%Y-%m-%d) | |
| if grep -q "^${TODAY}$" release_calendar.txt 2>/dev/null; then | |
| echo "run_scraper=true" >> $GITHUB_OUTPUT | |
| echo "::notice::Today ($TODAY) is a release calendar T+1 day; will run scraper." | |
| RUN_LABEL=Yes | |
| else | |
| echo "run_scraper=false" >> $GITHUB_OUTPUT | |
| echo "::notice::Today ($TODAY) is not in release_calendar.txt; skipping scrape." | |
| RUN_LABEL=No | |
| fi | |
| echo "## Scrape decision" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Date:** $TODAY" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Run scraper:** $RUN_LABEL" >> $GITHUB_STEP_SUMMARY | |
| - name: Set up Python | |
| if: steps.check.outputs.run_scraper == 'true' | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.9' | |
| cache: 'pip' | |
| - name: Install dependencies | |
| if: steps.check.outputs.run_scraper == 'true' | |
| run: pip install -r requirements.txt | |
| - name: Fetch latest data | |
| if: steps.check.outputs.run_scraper == 'true' | |
| run: python scrape.py | |
| - name: Commit and push if changed | |
| if: steps.check.outputs.run_scraper == 'true' | |
| run: |- | |
| git config --global user.name "Automated" | |
| git config --global user.email "actions@users.noreply.github.com" | |
| git add -A | |
| git commit -m "Latest data: $(date -u)" || echo "No changes to commit" | |
| git push | |
| - name: Install Git LFS | |
| if: steps.check.outputs.run_scraper == 'true' | |
| run: |- | |
| sudo apt-get update | |
| sudo apt-get install -y git-lfs | |
| git lfs install | |
| - name: Push data to Hugging Face | |
| if: steps.check.outputs.run_scraper == 'true' | |
| run: |- | |
| if [ -z "$HF_TOKEN" ]; then | |
| echo "::notice::HF_TOKEN not set; skipping Hugging Face push" | |
| exit 0 | |
| fi | |
| echo "::add-mask::$HF_TOKEN" | |
| git clone https://user:${HF_TOKEN}@huggingface.co/datasets/vtasca/fomc-statements-minutes hf_repo | |
| cd hf_repo | |
| git lfs track "communications.csv" | |
| git add .gitattributes || true | |
| cp ../communications.csv . | |
| git add communications.csv | |
| git commit -m "Latest data: $(date -u)" || echo 0 | |
| git push origin main | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} |