dag-updated #25
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Sync DAGs from S3 and Push | |
| on: | |
| repository_dispatch: | |
| types: [dag-updated] | |
| workflow_dispatch: | |
| permissions: | |
| id-token: write | |
| contents: write | |
| jobs: | |
| sync-dags: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ secrets.CDR_PUSH_PAT }} | |
| fetch-depth: 1 | |
| - name: Configure AWS credentials | |
| uses: aws-actions/configure-aws-credentials@v4 | |
| with: | |
| role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/github-actions-ecr-role | |
| aws-region: eu-central-1 | |
| audience: sts.amazonaws.com | |
| # - name: Check for DAG changes | |
| # id: check_dags | |
| # run: | | |
| # echo $(ls) | |
| # cd dags/ | |
| # CHANGES=$(aws s3 sync s3://airflow-dags-bc/ . --dryrun --exact-timestamps) | |
| # CHANGES_COUNT=$(aws s3 sync s3://airflow-dags-bc/ . --dryrun --exact-timestamps | wc -l) | |
| # echo "Found $CHANGES changes" | |
| # echo "Found $CHANGES_COUNT total changes" | |
| # if [ "$CHANGES" -eq "0" ]; then | |
| # echo "no_changes=true" >> "$GITHUB_OUTPUT" | |
| # else | |
| # echo "no_changes=false" >> "$GITHUB_OUTPUT" | |
| # fi | |
| - name: Check for DAG changes using ETags | |
| id: check_dags | |
| run: | | |
| # Get S3 file ETags (content hashes) | |
| aws s3api list-objects-v2 --bucket airflow-dags-bc \ | |
| --query 'Contents[].{Key:Key,ETag:ETag}' \ | |
| --output json > /tmp/s3_etags.json | |
| # Check each file's content hash | |
| files_to_download="" | |
| files_changed=0 | |
| while IFS= read -r line; do | |
| key=$(echo "$line" | jq -r '.Key') | |
| s3_etag=$(echo "$line" | jq -r '.ETag' | tr -d '"') | |
| local_file="dags/$key" | |
| # Check if local file exists | |
| if [ ! -f "$local_file" ]; then | |
| echo "File $key doesn't exist locally" | |
| files_to_download="$files_to_download $key" | |
| files_changed=$((files_changed + 1)) | |
| continue | |
| fi | |
| # Calculate local file MD5 hash | |
| local_md5=$(md5sum "$local_file" | cut -d' ' -f1) | |
| # Compare hashes | |
| if [ "$s3_etag" != "$local_md5" ]; then | |
| echo "File $key has different content (S3: $s3_etag, Local: $local_md5)" | |
| files_to_download="$files_to_download $key" | |
| files_changed=$((files_changed + 1)) | |
| fi | |
| done < <(jq -c '.[]' /tmp/s3_etags.json) | |
| echo "Files that need downloading: $files_changed" | |
| echo "files_to_download=$files_to_download" >> "$GITHUB_OUTPUT" | |
| if [ "$files_changed" -eq "0" ]; then | |
| echo "no_changes=true" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "no_changes=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Exit early if no DAGs changed | |
| if: steps.check_dags.outputs.no_changes == 'true' | |
| run: echo "No DAGs changed. Skipping workflow." && exit 0 | |
| - name: Download DAGs from S3 | |
| if: steps.check_dags.outputs.no_changes != 'true' | |
| run: aws s3 sync s3://airflow-dags-bc/ dags/ --exact-timestamps | |
| - name: Commit and push DAG changes | |
| if: steps.check_dags.outputs.no_changes != 'true' | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| git add dags/ | |
| if ! git diff --cached --quiet; then | |
| git commit -m "sync DAGs from S3" | |
| git push origin HEAD:main | |
| echo "Changes pushed successfully." | |
| else | |
| echo "No DAG changes to commit." | |
| fi |