Cloud Cost ETL Pipeline #24
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Cloud Cost ETL Pipeline | |
| on: | |
| schedule: | |
| # Run daily at 2 AM UTC | |
| - cron: '0 2 * * *' | |
| workflow_dispatch: # Allow manual trigger | |
| inputs: | |
| include_normalized: | |
| description: 'Also ingest normalized data to ClickHouse' | |
| required: false | |
| type: boolean | |
| default: false | |
| jobs: | |
| run-all-cloud: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Install uv | |
| run: pip install uv | |
| - name: Install dependencies | |
| run: uv sync | |
| - name: Create dlt secrets | |
| run: | | |
| mkdir -p .dlt | |
| cat > .dlt/secrets.toml << 'EOF' | |
| # ClickHouse destination credentials | |
| [destination.clickhouse.credentials] | |
| host = "${{ secrets.CLICKHOUSE_HOST }}" | |
| port = 9440 | |
| username = "${{ secrets.CLICKHOUSE_USERNAME }}" | |
| password = "${{ secrets.CLICKHOUSE_PASSWORD }}" | |
| secure = 1 | |
| # AWS credentials for S3 access | |
| [sources.filesystem.credentials] | |
| aws_access_key_id = "${{ secrets.AWS_ACCESS_KEY_ID }}" | |
| aws_secret_access_key = "${{ secrets.AWS_SECRET_ACCESS_KEY }}" | |
| # GCP BigQuery service account credentials | |
| [source.bigquery.credentials] | |
| project_id = "${{ secrets.GCP_PROJECT_ID }}" | |
| private_key = "${{ secrets.GCP_PRIVATE_KEY }}" | |
| client_email = "${{ secrets.GCP_CLIENT_EMAIL }}" | |
| token_uri = "https://oauth2.googleapis.com/token" | |
| # Stripe API credentials | |
| [sources.stripe_analytics] | |
| stripe_secret_key = "${{ secrets.STRIPE_SECRET_KEY }}" | |
| EOF | |
| - name: Run AWS pipeline | |
| env: | |
| DLT_DESTINATION: clickhouse | |
| run: uv run python pipelines/aws_pipeline.py | |
| continue-on-error: false | |
| - name: Run GCP pipeline | |
| env: | |
| DLT_DESTINATION: clickhouse | |
| run: uv run python pipelines/google_bq_incremental_pipeline.py | |
| continue-on-error: false | |
| - name: Run Stripe pipeline | |
| env: | |
| DLT_DESTINATION: clickhouse | |
| run: uv run python pipelines/stripe_pipeline.py | |
| continue-on-error: false | |
| - name: Anonymize data for public dashboards | |
| run: uv run python scripts/anonymize_clickhouse.py | |
| continue-on-error: false | |
| - name: Normalize AWS data (optional - for advanced dashboards) | |
| if: ${{ github.event.inputs.include_normalized == 'true' }} | |
| run: | | |
| cd viz_rill | |
| uv run python cur-wizard/scripts/normalize.py | |
| env: | |
| NORMALIZED_DATA_DIR: data | |
| INPUT_DATA_DIR: data/aws_costs/cur_export_test_00001 | |
| - name: Normalize GCP data (optional - for advanced dashboards) | |
| if: ${{ github.event.inputs.include_normalized == 'true' }} | |
| run: | | |
| cd viz_rill | |
| uv run python cur-wizard/scripts/normalize_gcp.py | |
| env: | |
| NORMALIZED_DATA_DIR: data | |
| INPUT_DATA_DIR_GCP: data/gcp_costs | |
| - name: Ingest normalized data to ClickHouse (optional) | |
| if: ${{ github.event.inputs.include_normalized == 'true' }} | |
| env: | |
| DLT_DESTINATION: clickhouse | |
| run: uv run python pipelines/ingest_normalized_pipeline.py | |
| continue-on-error: true | |
| - name: Upload logs on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: pipeline-logs-${{ github.run_id }} | |
| path: | | |
| ~/.local/share/dlt/**/*.log | |
| retention-days: 7 | |
| # skip upload and deployment of new dashboards. As they don't change for now and can be done manually | |
| # - name: Install Rill CLI (Simplified Latest Download) | |
| # run: | | |
| # # 1. Use the simple GitHub redirect URL for the latest asset | |
| # LATEST_URL="https://github.com/rilldata/rill/releases/latest/download/rill_linux_amd64.zip" | |
| # ASSET_NAME="rill_linux_amd64.zip" | |
| # # 2. Download and unzip the archive | |
| # # -L flag is critical to follow the redirect from /latest/ to the actual tag URL | |
| # curl -sSL -L "$LATEST_URL" -o "$ASSET_NAME" | |
| # # 3. Handle extraction and PATH setup | |
| # unzip -o "$ASSET_NAME" | |
| # chmod +x rill | |
| # INSTALL_DIR="$HOME/.rill/bin" | |
| # mkdir -p "$INSTALL_DIR" | |
| # mv rill "$INSTALL_DIR"/rill | |
| # echo "$INSTALL_DIR" >> $GITHUB_PATH | |
| # echo "Rill CLI successfully installed." | |
| # - name: Configure Rill Cloud environment | |
| # run: | | |
| # cd viz_rill | |
| # cat > .env << 'EOF' | |
| # RILL_CONNECTOR=clickhouse | |
| # connector.clickhouse.dsn=clickhouse://${{ secrets.CLICKHOUSE_USERNAME }}:${{ secrets.CLICKHOUSE_PASSWORD }}@${{ secrets.CLICKHOUSE_HOST }}:8443/default?secure=true | |
| # EOF | |
| # - name: Deploy to Rill Cloud | |
| # env: | |
| # RILL_API_TOKEN: ${{ secrets.RILL_API_TOKEN }} | |
| # run: | | |
| # cd viz_rill | |
| # rill deploy \ | |
| # --org demo \ | |
| # --path viz_rill \ | |
| # --public \ | |
| # --prod-branch main \ | |
| # # --api-token ${{ secrets.RILL_API_TOKEN }} | |
| - name: Notify on success | |
| if: success() | |
| run: | | |
| echo "✅ ETL pipeline completed successfully" | |
| echo "Data loaded to ClickHouse at ${{ secrets.CLICKHOUSE_HOST }}" | |
| # echo "📊 Dashboards deployed to Rill Cloud: https://ui.rilldata.com/demo/viz_rill/" |