From 072097152b87c9863bf9dd1a785308db06f0f63f Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 15 Jul 2025 17:23:30 +0200 Subject: [PATCH 1/3] adapt listener changes --- .../03-enable-and-run-spark-dag.yaml | 20 +++++++++++-------- .../04-enable-and-run-date-dag.yaml | 20 +++++++++++-------- stacks/airflow/airflow.yaml | 5 ++++- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml b/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml index 929ff124..3d172c28 100644 --- a/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml +++ b/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml @@ -13,14 +13,18 @@ spec: # restarted. Additionally, the db-init job takes a few minutes to complete before the cluster is deployed. The wait/watch steps # below are not "water-tight" but add a layer of stability by at least ensuring that the db is initialized and ready and that # all pods are reachable (albeit independent of each other). - command: ["bash", "-c", " - kubectl rollout status --watch statefulset/airflow-webserver-default - && kubectl rollout status --watch statefulset/airflow-scheduler-default - && export AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) - && export ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default:8080/auth/token -H 'Content-Type: application/json' -d '{\"username\": \"admin\", \"password\": \"'$AIRFLOW_ADMIN_PASSWORD'\"}' | jq '.access_token' | tr -d '\"') - && curl -H \"Authorization: Bearer $ACCESS_TOKEN\" -H 'Content-Type: application/json' -XPATCH http://airflow-webserver-default:8080/api/v2/dags/sparkapp_dag -d '{\"is_paused\": false}' | jq - && curl -H \"Authorization: Bearer $ACCESS_TOKEN\" -H 'Content-Type: application/json' -XPOST http://airflow-webserver-default:8080/api/v2/dags/sparkapp_dag/dagRuns -d '{\"logical_date\": null}' | jq - "] + command: [ + "bash", + "-c", + ' + kubectl rollout status --watch statefulset/airflow-webserver-default + && kubectl rollout status --watch statefulset/airflow-scheduler-default + && export AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) + && export ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default-headless:8080/auth/token -H ''Content-Type: application/json'' -d ''{"username": "admin", "password": "''$AIRFLOW_ADMIN_PASSWORD''"}'' | jq ''.access_token'' | tr -d ''"'') + && curl -H "Authorization: Bearer $ACCESS_TOKEN" -H ''Content-Type: application/json'' -XPATCH http://airflow-webserver-default-headless:8080/api/v2/dags/sparkapp_dag -d ''{"is_paused": false}'' | jq + && curl -H "Authorization: Bearer $ACCESS_TOKEN" -H ''Content-Type: application/json'' -XPOST http://airflow-webserver-default-headless:8080/api/v2/dags/sparkapp_dag/dagRuns -d ''{"logical_date": null}'' | jq + ', + ] volumeMounts: - name: airflow-credentials mountPath: /airflow-credentials diff --git a/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml b/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml index 087d7c9c..cf316552 100644 --- a/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml +++ b/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml @@ -13,14 +13,18 @@ spec: # restarted. Additionally, the db-init job takes a few minutes to complete before the cluster is deployed. The wait/watch steps # below are not "water-tight" but add a layer of stability by at least ensuring that the db is initialized and ready and that # all pods are reachable (albeit independent of each other). - command: ["bash", "-c", " - kubectl rollout status --watch statefulset/airflow-webserver-default - && kubectl rollout status --watch statefulset/airflow-scheduler-default - && export AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) - && export ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default:8080/auth/token -H 'Content-Type: application/json' -d '{\"username\": \"admin\", \"password\": \"'$AIRFLOW_ADMIN_PASSWORD'\"}' | jq '.access_token' | tr -d '\"') - && curl -H \"Authorization: Bearer $ACCESS_TOKEN\" -H 'Content-Type: application/json' -XPATCH http://airflow-webserver-default:8080/api/v2/dags/date_demo -d '{\"is_paused\": false}' | jq - && curl -H \"Authorization: Bearer $ACCESS_TOKEN\" -H 'Content-Type: application/json' -XPOST http://airflow-webserver-default:8080/api/v2/dags/date_demo/dagRuns -d '{\"logical_date\": null}' | jq - "] + command: [ + "bash", + "-c", + ' + kubectl rollout status --watch statefulset/airflow-webserver-default + && kubectl rollout status --watch statefulset/airflow-scheduler-default + && export AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) + && export ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default-headless:8080/auth/token -H ''Content-Type: application/json'' -d ''{"username": "admin", "password": "''$AIRFLOW_ADMIN_PASSWORD''"}'' | jq ''.access_token'' | tr -d ''"'') + && curl -H "Authorization: Bearer $ACCESS_TOKEN" -H ''Content-Type: application/json'' -XPATCH http://airflow-webserver-default-headless:8080/api/v2/dags/date_demo -d ''{"is_paused": false}'' | jq + && curl -H "Authorization: Bearer $ACCESS_TOKEN" -H ''Content-Type: application/json'' -XPOST http://airflow-webserver-default-headless:8080/api/v2/dags/date_demo/dagRuns -d ''{"logical_date": null}'' | jq + ', + ] volumeMounts: - name: airflow-credentials mountPath: /airflow-credentials diff --git a/stacks/airflow/airflow.yaml b/stacks/airflow/airflow.yaml index 5284b46d..dab3f072 100644 --- a/stacks/airflow/airflow.yaml +++ b/stacks/airflow/airflow.yaml @@ -26,6 +26,8 @@ spec: mountPath: /dags/pyspark_pi.yaml subPath: pyspark_pi.yaml webservers: + roleConfig: + listenerClass: external-unstable config: resources: cpu: @@ -34,7 +36,6 @@ spec: memory: limit: 2Gi gracefulShutdownTimeout: 30s - listenerClass: external-unstable roleGroups: default: envOverrides: @@ -302,6 +303,8 @@ data: memory: limit: 1024Mi replicas: 3 + + # {% endraw %} --- apiVersion: v1 From 3ba3a623ecef7c44f00af6db902aa3d69d1d045a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 15 Jul 2025 17:46:25 +0200 Subject: [PATCH 2/3] Update demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml Co-authored-by: Nick <10092581+NickLarsenNZ@users.noreply.github.com> --- .../03-enable-and-run-spark-dag.yaml | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml b/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml index 3d172c28..ed7edd5b 100644 --- a/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml +++ b/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml @@ -13,18 +13,18 @@ spec: # restarted. Additionally, the db-init job takes a few minutes to complete before the cluster is deployed. The wait/watch steps # below are not "water-tight" but add a layer of stability by at least ensuring that the db is initialized and ready and that # all pods are reachable (albeit independent of each other). - command: [ - "bash", - "-c", - ' + command: + - bash + - -euo + - pipefail + - -c + - | kubectl rollout status --watch statefulset/airflow-webserver-default - && kubectl rollout status --watch statefulset/airflow-scheduler-default - && export AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) - && export ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default-headless:8080/auth/token -H ''Content-Type: application/json'' -d ''{"username": "admin", "password": "''$AIRFLOW_ADMIN_PASSWORD''"}'' | jq ''.access_token'' | tr -d ''"'') - && curl -H "Authorization: Bearer $ACCESS_TOKEN" -H ''Content-Type: application/json'' -XPATCH http://airflow-webserver-default-headless:8080/api/v2/dags/sparkapp_dag -d ''{"is_paused": false}'' | jq - && curl -H "Authorization: Bearer $ACCESS_TOKEN" -H ''Content-Type: application/json'' -XPOST http://airflow-webserver-default-headless:8080/api/v2/dags/sparkapp_dag/dagRuns -d ''{"logical_date": null}'' | jq - ', - ] + kubectl rollout status --watch statefulset/airflow-scheduler-default + AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) + ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default-headless:8080/auth/token -H 'Content-Type: application/json' -d '{"username": "admin", "password": "'$AIRFLOW_ADMIN_PASSWORD'"}' | jq -r .access_token) + curl -H "Authorization: Bearer $ACCESS_TOKEN" -H 'Content-Type: application/json' -XPATCH http://airflow-webserver-default-headless:8080/api/v2/dags/sparkapp_dag -d '{"is_paused": false}' | jq + curl -H "Authorization: Bearer $ACCESS_TOKEN" -H 'Content-Type: application/json' -XPOST http://airflow-webserver-default-headless:8080/api/v2/dags/sparkapp_dag/dagRuns -d '{"logical_date": null}' | jq volumeMounts: - name: airflow-credentials mountPath: /airflow-credentials From 7fc44125baa04eba8015b5c663bd92fbed4db50e Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 15 Jul 2025 17:46:40 +0200 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Nick <10092581+NickLarsenNZ@users.noreply.github.com> --- .../04-enable-and-run-date-dag.yaml | 22 +++++++++---------- stacks/airflow/airflow.yaml | 2 -- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml b/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml index cf316552..08eaccf3 100644 --- a/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml +++ b/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml @@ -13,18 +13,18 @@ spec: # restarted. Additionally, the db-init job takes a few minutes to complete before the cluster is deployed. The wait/watch steps # below are not "water-tight" but add a layer of stability by at least ensuring that the db is initialized and ready and that # all pods are reachable (albeit independent of each other). - command: [ - "bash", - "-c", - ' + command: + - bash + - -euo + - pipefail + - -c + - | kubectl rollout status --watch statefulset/airflow-webserver-default - && kubectl rollout status --watch statefulset/airflow-scheduler-default - && export AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) - && export ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default-headless:8080/auth/token -H ''Content-Type: application/json'' -d ''{"username": "admin", "password": "''$AIRFLOW_ADMIN_PASSWORD''"}'' | jq ''.access_token'' | tr -d ''"'') - && curl -H "Authorization: Bearer $ACCESS_TOKEN" -H ''Content-Type: application/json'' -XPATCH http://airflow-webserver-default-headless:8080/api/v2/dags/date_demo -d ''{"is_paused": false}'' | jq - && curl -H "Authorization: Bearer $ACCESS_TOKEN" -H ''Content-Type: application/json'' -XPOST http://airflow-webserver-default-headless:8080/api/v2/dags/date_demo/dagRuns -d ''{"logical_date": null}'' | jq - ', - ] + kubectl rollout status --watch statefulset/airflow-scheduler-default + AIRFLOW_ADMIN_PASSWORD=$(cat /airflow-credentials/adminUser.password) + ACCESS_TOKEN=$(curl -XPOST http://airflow-webserver-default-headless:8080/auth/token -H 'Content-Type: application/json' -d '{"username": "admin", "password": "'$AIRFLOW_ADMIN_PASSWORD'"}' | jq -r .access_token) + curl -H "Authorization: Bearer $ACCESS_TOKEN" -H 'Content-Type: application/json' -XPATCH http://airflow-webserver-default-headless:8080/api/v2/dags/date_demo -d '{"is_paused": false}' | jq + curl -H "Authorization: Bearer $ACCESS_TOKEN" -H 'Content-Type: application/json' -XPOST http://airflow-webserver-default-headless:8080/api/v2/dags/date_demo/dagRuns -d '{"logical_date": null}' | jq volumeMounts: - name: airflow-credentials mountPath: /airflow-credentials diff --git a/stacks/airflow/airflow.yaml b/stacks/airflow/airflow.yaml index dab3f072..1a02b1ef 100644 --- a/stacks/airflow/airflow.yaml +++ b/stacks/airflow/airflow.yaml @@ -303,8 +303,6 @@ data: memory: limit: 1024Mi replicas: 3 - - # {% endraw %} --- apiVersion: v1