Skip to content

dag-updated

dag-updated #25

Workflow file for this run

name: Sync DAGs from S3 and Push
on:
repository_dispatch:
types: [dag-updated]
workflow_dispatch:
permissions:
id-token: write
contents: write
jobs:
sync-dags:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.CDR_PUSH_PAT }}
fetch-depth: 1
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/github-actions-ecr-role
aws-region: eu-central-1
audience: sts.amazonaws.com
# - name: Check for DAG changes
# id: check_dags
# run: |
# echo $(ls)
# cd dags/
# CHANGES=$(aws s3 sync s3://airflow-dags-bc/ . --dryrun --exact-timestamps)
# CHANGES_COUNT=$(aws s3 sync s3://airflow-dags-bc/ . --dryrun --exact-timestamps | wc -l)
# echo "Found $CHANGES changes"
# echo "Found $CHANGES_COUNT total changes"
# if [ "$CHANGES" -eq "0" ]; then
# echo "no_changes=true" >> "$GITHUB_OUTPUT"
# else
# echo "no_changes=false" >> "$GITHUB_OUTPUT"
# fi
- name: Check for DAG changes using ETags
id: check_dags
run: |
# Get S3 file ETags (content hashes)
aws s3api list-objects-v2 --bucket airflow-dags-bc \
--query 'Contents[].{Key:Key,ETag:ETag}' \
--output json > /tmp/s3_etags.json
# Check each file's content hash
files_to_download=""
files_changed=0
while IFS= read -r line; do
key=$(echo "$line" | jq -r '.Key')
s3_etag=$(echo "$line" | jq -r '.ETag' | tr -d '"')
local_file="dags/$key"
# Check if local file exists
if [ ! -f "$local_file" ]; then
echo "File $key doesn't exist locally"
files_to_download="$files_to_download $key"
files_changed=$((files_changed + 1))
continue
fi
# Calculate local file MD5 hash
local_md5=$(md5sum "$local_file" | cut -d' ' -f1)
# Compare hashes
if [ "$s3_etag" != "$local_md5" ]; then
echo "File $key has different content (S3: $s3_etag, Local: $local_md5)"
files_to_download="$files_to_download $key"
files_changed=$((files_changed + 1))
fi
done < <(jq -c '.[]' /tmp/s3_etags.json)
echo "Files that need downloading: $files_changed"
echo "files_to_download=$files_to_download" >> "$GITHUB_OUTPUT"
if [ "$files_changed" -eq "0" ]; then
echo "no_changes=true" >> "$GITHUB_OUTPUT"
else
echo "no_changes=false" >> "$GITHUB_OUTPUT"
fi
- name: Exit early if no DAGs changed
if: steps.check_dags.outputs.no_changes == 'true'
run: echo "No DAGs changed. Skipping workflow." && exit 0
- name: Download DAGs from S3
if: steps.check_dags.outputs.no_changes != 'true'
run: aws s3 sync s3://airflow-dags-bc/ dags/ --exact-timestamps
- name: Commit and push DAG changes
if: steps.check_dags.outputs.no_changes != 'true'
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add dags/
if ! git diff --cached --quiet; then
git commit -m "sync DAGs from S3"
git push origin HEAD:main
echo "Changes pushed successfully."
else
echo "No DAG changes to commit."
fi