Skip to content

Commit 5dacaa7

Browse files
authored
feat(example): side-input manifests (#297)
Signed-off-by: Vigith Maurice <[email protected]>
1 parent 9311f25 commit 5dacaa7

File tree

5 files changed

+270
-0
lines changed

5 files changed

+270
-0
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
FROM python:3.11-slim-bullseye AS builder
2+
3+
ENV PYTHONFAULTHANDLER=1 \
4+
PYTHONUNBUFFERED=1 \
5+
PYTHONHASHSEED=random \
6+
PIP_NO_CACHE_DIR=on \
7+
PIP_DISABLE_PIP_VERSION_CHECK=on \
8+
PIP_DEFAULT_TIMEOUT=100 \
9+
POETRY_HOME="/opt/poetry" \
10+
POETRY_VIRTUALENVS_IN_PROJECT=true \
11+
POETRY_NO_INTERACTION=1 \
12+
PYSETUP_PATH="/opt/pysetup"
13+
14+
ENV PATH="$POETRY_HOME/bin:$PATH"
15+
16+
RUN apt-get update \
17+
&& apt-get install --no-install-recommends -y \
18+
curl \
19+
wget \
20+
# deps for building python deps
21+
build-essential \
22+
&& apt-get install -y git \
23+
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
24+
&& curl -sSL https://install.python-poetry.org | python3 -
25+
26+
FROM builder AS udf
27+
28+
WORKDIR $PYSETUP_PATH
29+
COPY ./ ./
30+
31+
RUN pip
32+
33+
RUN poetry lock
34+
RUN poetry install --no-cache --no-root && \
35+
rm -rf ~/.cache/pypoetry/
36+
RUN poetry add $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
37+
38+
CMD ["poetry", "run", "python", "sideinput_example.py"]
39+
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
To create the `wheel` file, refer [root](../../README.md)
2+
3+
## HOWTO build Image
4+
5+
```bash
6+
docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1 --load
7+
```
8+
9+
### `k3d`
10+
11+
Load it now to `k3d`
12+
13+
```bash
14+
k3d image import quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1
15+
```
16+
17+
### Minikube
18+
19+
```bash
20+
minikube image load quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1
21+
```
22+
23+
## Run the pipeline
24+
25+
```bash
26+
kubectl apply -f pipeline.yaml
27+
```
28+
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
apiVersion: numaflow.numaproj.io/v1alpha1
2+
kind: Pipeline
3+
metadata:
4+
name: simple-sideinput
5+
spec:
6+
sideInputs:
7+
- name: myticker
8+
container:
9+
image: quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1
10+
imagePullPolicy: Never
11+
trigger:
12+
schedule: "*/10 * * * * *"
13+
vertices:
14+
- name: in
15+
source:
16+
# A self data generating source
17+
generator:
18+
rpu: 1
19+
duration: 1s
20+
- name: si-map
21+
udf:
22+
container:
23+
image: quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1
24+
imagePullPolicy: Never
25+
env:
26+
- name: MAPPER
27+
value: "true"
28+
sideInputs:
29+
- myticker
30+
- name: out
31+
sink:
32+
# A simple log printing sink
33+
log: { }
34+
edges:
35+
- from: in
36+
to: si-map
37+
- from: si-map
38+
to: out
39+
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[project]
2+
name = "sideinput-example"
3+
version = "0.1.0"
4+
description = "Side Input Example with Retriever and Mapper"
5+
authors = [
6+
{ name = "Vigith Maurice", email = "[email protected]" }
7+
]
8+
readme = "README.md"
9+
requires-python = ">=3.11"
10+
dependencies = [
11+
"watchfiles",
12+
"watchdog",
13+
]
14+
15+
16+
[build-system]
17+
requires = ["poetry-core>=2.0.0,<3.0.0"]
18+
build-backend = "poetry.core.masonry.api"
19+
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
"""
2+
Side Input Example for pynumaflow-lite.
3+
4+
This module contains both a SideInput retriever and a Mapper that reads from side inputs.
5+
The mode is controlled by the MAPPER environment variable:
6+
- If MAPPER is set to "true", runs as a Mapper that reads side input files
7+
- Otherwise, runs as a SideInput retriever that broadcasts values
8+
"""
9+
import asyncio
10+
import os
11+
import signal
12+
import threading
13+
from threading import Thread
14+
import datetime
15+
16+
from pynumaflow_lite import sideinputer, mapper
17+
from watchfiles import watch
18+
19+
20+
class ExampleSideInput(sideinputer.SideInput):
21+
"""
22+
A SideInput retriever that broadcasts a timestamp message every time.
23+
"""
24+
25+
def __init__(self):
26+
self.counter = 0
27+
28+
async def retrieve_handler(self) -> sideinputer.Response:
29+
"""
30+
This function is called every time the side input is requested.
31+
"""
32+
time_now = datetime.datetime.now()
33+
# val is the value to be broadcasted
34+
val = f"an example: {str(time_now)}"
35+
self.counter += 1
36+
# broadcast_message() is used to indicate that there is a broadcast
37+
return sideinputer.Response.broadcast_message(val.encode("utf-8"))
38+
39+
40+
class SideInputHandler(mapper.Mapper):
41+
"""
42+
A Mapper that reads from side input files and includes the value in its output.
43+
"""
44+
45+
# variable and lock for thread safety
46+
data_value = "no_value"
47+
data_value_lock = threading.Lock()
48+
49+
# Side input file that we are watching
50+
watched_file = "myticker"
51+
52+
async def handler(self, keys: list[str], datum: mapper.Datum) -> mapper.Messages:
53+
with self.data_value_lock:
54+
current_value = self.data_value
55+
56+
messages = mapper.Messages()
57+
messages.append(mapper.Message(str.encode(current_value)))
58+
return messages
59+
60+
def file_watcher(self):
61+
"""
62+
This function is used to watch the side input directory for changes.
63+
"""
64+
path = sideinputer.DIR_PATH
65+
for changes in watch(path):
66+
for change in changes:
67+
change_type, file_path = change
68+
if file_path.endswith(self.watched_file):
69+
with self.data_value_lock:
70+
self.update_data_from_file(file_path)
71+
72+
def init_data_value(self):
73+
"""Read the SIDE INPUT FILE for initial value before starting the server."""
74+
path = os.path.join(sideinputer.DIR_PATH, self.watched_file)
75+
print(f"Initializing side input from: {path}")
76+
self.update_data_from_file(path)
77+
78+
def update_data_from_file(self, path):
79+
try:
80+
with open(path) as file:
81+
value = file.read().strip()
82+
self.data_value = value
83+
print(f"Data value variable set to: {self.data_value}")
84+
except Exception as e:
85+
print(f"Error reading file: {e}")
86+
87+
88+
# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly.
89+
signal.signal(signal.SIGINT, signal.default_int_handler)
90+
try:
91+
signal.signal(signal.SIGTERM, signal.SIG_DFL)
92+
except AttributeError:
93+
pass
94+
95+
96+
async def start_sideinput():
97+
"""Start the SideInput retriever server."""
98+
server = sideinputer.SideInputAsyncServer()
99+
side_input = ExampleSideInput()
100+
101+
loop = asyncio.get_running_loop()
102+
loop.add_signal_handler(signal.SIGINT, lambda: server.stop())
103+
loop.add_signal_handler(signal.SIGTERM, lambda: server.stop())
104+
105+
try:
106+
await server.start(side_input)
107+
print("SideInput server shutting down gracefully...")
108+
except asyncio.CancelledError:
109+
server.stop()
110+
111+
112+
async def start_mapper():
113+
"""Start the Mapper server that reads from side inputs."""
114+
server = mapper.MapAsyncServer()
115+
handler = SideInputHandler()
116+
117+
# Initialize the data value from the side input file
118+
handler.init_data_value()
119+
120+
# Start the file watcher in a background thread
121+
watcher_thread = Thread(target=handler.file_watcher, daemon=True)
122+
watcher_thread.start()
123+
124+
loop = asyncio.get_running_loop()
125+
loop.add_signal_handler(signal.SIGINT, lambda: server.stop())
126+
loop.add_signal_handler(signal.SIGTERM, lambda: server.stop())
127+
128+
try:
129+
await server.start(handler)
130+
print("Mapper server shutting down gracefully...")
131+
except asyncio.CancelledError:
132+
server.stop()
133+
134+
135+
if __name__ == "__main__":
136+
# Check if we should run as a mapper or side input retriever
137+
is_mapper = os.environ.get("MAPPER", "").lower() == "true"
138+
139+
if is_mapper:
140+
print("Starting as Mapper (reading side inputs)...")
141+
asyncio.run(start_mapper())
142+
else:
143+
print("Starting as SideInput retriever...")
144+
asyncio.run(start_sideinput())
145+

0 commit comments

Comments
 (0)