Skip to content

Commit 294f562

Browse files
committed
wip: kafka dag
1 parent 404459c commit 294f562

File tree

4 files changed

+100
-19
lines changed

4 files changed

+100
-19
lines changed

demos/demos-v2.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ demos:
4848
manifests:
4949
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/01-airflow-spark-clusterrole.yaml
5050
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/02-airflow-spark-clusterrolebinding.yaml
51-
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml
51+
#- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml
5252
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml
5353
supportedNamespaces: []
5454
resourceRequests:

stacks/airflow/airflow.yaml

Lines changed: 73 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,44 +26,101 @@ spec:
2626
- name: airflow-dags
2727
mountPath: /dags/pyspark_pi.yaml
2828
subPath: pyspark_pi.yaml
29+
- name: airflow-dags
30+
mountPath: /dags/kafka.py
31+
subPath: kafka.py
2932
webservers:
3033
roleConfig:
3134
listenerClass: external-unstable
3235
config:
3336
resources:
3437
cpu:
35-
min: 400m
36-
max: "1"
38+
min: "2"
39+
max: "3"
3740
memory:
38-
limit: 2Gi
39-
gracefulShutdownTimeout: 30s
41+
limit: 3Gi
42+
envOverrides: &envOverrides
43+
AIRFLOW__CORE__DAGS_FOLDER: "/dags"
44+
PYTHONPATH: "/stackable/app/log_config:/dags"
45+
AIRFLOW_CONN_KUBERNETES_IN_CLUSTER: "kubernetes://?__extra__=%7B%22extra__kubernetes__in_cluster%22%3A+true%2C+%22extra__kubernetes__kube_config%22%3A+%22%22%2C+%22extra__kubernetes__kube_config_path%22%3A+%22%22%2C+%22extra__kubernetes__namespace%22%3A+%22%22%7D"
46+
podOverrides: &podOverrides
47+
spec:
48+
containers:
49+
- name: airflow
50+
env:
51+
- name: KAFKA_BOOTSTRAP
52+
valueFrom:
53+
configMapKeyRef:
54+
name: kafka
55+
key: KAFKA
56+
- name: AIRFLOW_CONN_KAFKA_CONN # $(KAFKA_BOOTSTRAP)
57+
value: "{\"conn_type\": \"kafka\", \"extra\": {\"bootstrap.servers\": \"kafka-broker-default-0-listener-broker.demo.svc.cluster.local:9092\", \"group.id\": \"airflow_group\", \"auto.offset.reset\": \"latest\"}}"
4058
roleGroups:
4159
default:
42-
envOverrides: &envOverrides
43-
AIRFLOW__CORE__DAGS_FOLDER: "/dags"
44-
AIRFLOW_CONN_KUBERNETES_IN_CLUSTER: "kubernetes://?__extra__=%7B%22extra__kubernetes__in_cluster%22%3A+true%2C+%22extra__kubernetes__kube_config%22%3A+%22%22%2C+%22extra__kubernetes__kube_config_path%22%3A+%22%22%2C+%22extra__kubernetes__namespace%22%3A+%22%22%7D"
4560
replicas: 1
4661
kubernetesExecutors:
4762
envOverrides: *envOverrides
63+
podOverrides: *podOverrides
4864
schedulers:
49-
config:
50-
gracefulShutdownTimeout: 30s
51-
resources:
52-
cpu:
53-
min: 400m
54-
max: "1"
55-
memory:
56-
limit: 1Gi
65+
envOverrides: *envOverrides
66+
podOverrides: *podOverrides
67+
roleGroups:
68+
default:
69+
replicas: 1
70+
dagProcessors:
71+
envOverrides: *envOverrides
72+
podOverrides: *podOverrides
73+
roleGroups:
74+
default:
75+
replicas: 1
76+
triggerers:
77+
envOverrides: *envOverrides
78+
podOverrides: *podOverrides
5779
roleGroups:
5880
default:
59-
envOverrides: *envOverrides
6081
replicas: 1
6182
---
6283
apiVersion: v1
6384
kind: ConfigMap
6485
metadata:
6586
name: airflow-dags
6687
data:
88+
kafka.py: |
89+
from airflow.providers.apache.kafka.triggers.msg_queue import KafkaMessageQueueTrigger
90+
from airflow.providers.standard.operators.empty import EmptyOperator
91+
from airflow.sdk import DAG, Asset, AssetWatcher
92+
93+
import logging
94+
logger = logging.getLogger(__name__)
95+
96+
logger.info("✅ kafka.apply_function module imported")
97+
98+
def apply_function(message):
99+
try:
100+
logger.info("apply_function called")
101+
logger.info("message payload: %r", message.value())
102+
return True
103+
except Exception:
104+
logger.exception("apply_function failed")
105+
return False
106+
107+
# Define a trigger that listens to an Apache Kafka message queue
108+
trigger = KafkaMessageQueueTrigger(
109+
topics=["test-topic"],
110+
apply_function="kafka.apply_function",
111+
kafka_config_id="kafka_conn",
112+
apply_function_args=None,
113+
apply_function_kwargs=None,
114+
poll_timeout=1,
115+
poll_interval=5,
116+
)
117+
118+
# Define an asset that watches for messages on the queue
119+
asset = Asset("kafka_queue_asset", watchers=[AssetWatcher(name="kafka_watcher", trigger=trigger)])
120+
121+
with DAG(dag_id="example_kafka_watcher", schedule=[asset]) as dag:
122+
EmptyOperator(task_id="task")
123+
67124
date_demo.py: |
68125
"""Example DAG returning the current date"""
69126
from datetime import datetime, timedelta

stacks/airflow/kafka.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
---
2+
apiVersion: kafka.stackable.tech/v1alpha1
3+
kind: KafkaCluster
4+
metadata:
5+
name: kafka
6+
spec:
7+
image:
8+
productVersion: 4.1.0
9+
clusterConfig:
10+
tls:
11+
serverSecretClass: null
12+
controllers:
13+
roleGroups:
14+
default:
15+
replicas: 1
16+
brokers:
17+
config:
18+
bootstrapListenerClass: cluster-internal
19+
brokerListenerClass: cluster-internal
20+
roleGroups:
21+
default:
22+
replicas: 1

stacks/stacks-v2.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,14 @@ stacks:
139139
- listener
140140
- secret
141141
- airflow
142-
- spark-k8s # Some demo does schedule a Spark job
142+
- spark-k8s
143+
- kafka
143144
labels:
144145
- airflow
145146
manifests:
146147
- helmChart: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/_templates/postgresql-airflow.yaml
147-
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/stacks/airflow/airflow.yaml
148+
- plainYaml: stacks/airflow/airflow.yaml
149+
- plainYaml: stacks/airflow/kafka.yaml
148150
supportedNamespaces: []
149151
resourceRequests:
150152
cpu: 3400m

0 commit comments

Comments
 (0)