Update Database #160
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Update Database | |
| on: | |
| schedule: | |
| - cron: '0 12 * * 1-5' # Run every weekday at 20:00 UTC+8 (12:00 UTC) | |
| workflow_dispatch: # Allow manual triggering | |
| jobs: | |
| prepare: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| latest_date: ${{ steps.latest-date.outputs.DATA }} | |
| has_site_records: ${{ steps.check-files.outputs.has_site_records }} | |
| has_announcements: ${{ steps.check-files.outputs.has_announcements }} | |
| has_urls: ${{ steps.check-files.outputs.has_urls }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20.x' | |
| cache: 'npm' | |
| - name: Install dependencies | |
| run: npm ci | |
| - run: mkdir -p tmp | |
| - name: Fetch lateset date in DB | |
| env: | |
| CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} | |
| CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| run: | | |
| npx -y wrangler d1 execute open165 --command 'SELECT max(endDate) AS latestDate FROM ScamSiteRecord' --json --remote > tmp/latest-date.json &&\ | |
| cat tmp/latest-date.json | |
| - name: Set latest date | |
| id: latest-date | |
| run: echo "DATA=$(cat tmp/latest-date.json | jq -r '.[0].results[0].latestDate')" >> "$GITHUB_OUTPUT" | |
| shell: bash | |
| - name: Generate the URL list and SQL files | |
| env: | |
| LATEST_DATE: ${{ steps.latest-date.outputs.DATA }} | |
| run: npm run predb:seed | |
| - name: Check if generated files exist | |
| id: check-files | |
| run: | | |
| echo "has_site_records=$([[ -s tmp/scamSiteRecord.sql ]] && echo 'true' || echo 'false')" >> "$GITHUB_OUTPUT" | |
| echo "has_announcements=$([[ -s tmp/scamSiteAnnouncement.sql ]] && echo 'true' || echo 'false')" >> "$GITHUB_OUTPUT" | |
| echo "has_urls=$([[ -s tmp/scamSiteUrls.txt ]] && echo 'true' || echo 'false')" >> "$GITHUB_OUTPUT" | |
| shell: bash | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: generated-files | |
| path: | | |
| tmp/scamSiteRecord.sql | |
| tmp/scamSiteAnnouncement.sql | |
| tmp/scamSiteUrls.txt | |
| tmp/latest-date.json | |
| retention-days: 7 | |
| if-no-files-found: warn | |
| update-scam-site-records: | |
| needs: prepare | |
| runs-on: ubuntu-latest | |
| if: ${{ needs.prepare.outputs.has_site_records == 'true' }} | |
| steps: | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: generated-files | |
| path: tmp | |
| - name: Update scam site records | |
| env: | |
| CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} | |
| CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| run: npx -y wrangler d1 execute open165 --file tmp/scamSiteRecord.sql --remote | |
| update-scam-site-announcements: | |
| needs: prepare | |
| runs-on: ubuntu-latest | |
| if: ${{ needs.prepare.outputs.has_announcements == 'true' }} | |
| steps: | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: generated-files | |
| path: tmp | |
| - name: Update scam site announcements | |
| env: | |
| CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} | |
| CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} | |
| run: npx -y wrangler d1 execute open165 --file tmp/scamSiteAnnouncement.sql --remote | |
| submit-to-urlscan: | |
| needs: prepare | |
| runs-on: ubuntu-latest | |
| if: ${{ needs.prepare.outputs.has_urls == 'true' }} | |
| steps: | |
| - name: Download artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: generated-files | |
| path: tmp | |
| - name: Submit URLs to URLscan.io | |
| run: | | |
| # Set API information | |
| API_URL="https://urlscan.io/api/v1/scan/" | |
| API_KEY="${{ secrets.URLSCAN_API_KEY }}" | |
| # Read URLs file and submit to URLscan | |
| echo "Starting URL submission to URLscan.io..." | |
| cat tmp/scamSiteUrls.txt | while read url; do | |
| if [ -z "$url" ]; then | |
| continue | |
| fi | |
| echo "Submitting URL: $url" | |
| # Submit URL to URLscan.io | |
| response=$(curl -s -X POST "$API_URL" \ | |
| -H "Content-Type: application/json" \ | |
| -H "API-Key: $API_KEY" \ | |
| -d "{\"url\": \"$url\", \"visibility\": \"public\", \"tags\": [\"scam\", \"malicious\"]}") | |
| uuid=$(echo $response | jq -r '.uuid // empty') | |
| if [ ! -z "$uuid" ]; then | |
| echo "UUID: $uuid" | |
| else | |
| echo "Submission failed: $response" | |
| fi | |
| # Wait 2 seconds between submissions to avoid API rate limits | |
| sleep 2 | |
| done | |
| echo "URL submission completed" |