chore: manifests for pynumaflow_lite source (#285)

vigith · web-flow · commit b0c7e7bd063c · 2025-11-12T17:07:35.000-08:00
diff --git a/packages/pynumaflow-lite/manifests/source/Dockerfile b/packages/pynumaflow-lite/manifests/source/Dockerfile
@@ -0,0 +1,38 @@
+FROM python:3.11-slim-bullseye AS builder
+
+ENV PYTHONFAULTHANDLER=1 \
+  PYTHONUNBUFFERED=1 \
+  PYTHONHASHSEED=random \
+  PIP_NO_CACHE_DIR=on \
+  PIP_DISABLE_PIP_VERSION_CHECK=on \
+  PIP_DEFAULT_TIMEOUT=100 \
+  POETRY_HOME="/opt/poetry" \
+  POETRY_VIRTUALENVS_IN_PROJECT=true \
+  POETRY_NO_INTERACTION=1 \
+  PYSETUP_PATH="/opt/pysetup"
+
+  ENV PATH="$POETRY_HOME/bin:$PATH"
+
+RUN apt-get update \
+    && apt-get install --no-install-recommends -y \
+        curl \
+        wget \
+        # deps for building python deps
+        build-essential \
+    && apt-get install -y git \
+    && apt-get clean && rm -rf /var/lib/apt/lists/* \
+    && curl -sSL https://install.python-poetry.org | python3 -
+
+FROM builder AS udf
+
+WORKDIR $PYSETUP_PATH
+COPY ./ ./
+
+RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
+
+RUN poetry lock
+RUN poetry install --no-cache --no-root && \
+    rm -rf ~/.cache/pypoetry/
+
+CMD ["python", "simple_source.py"]
+
diff --git a/packages/pynumaflow-lite/manifests/source/README.md b/packages/pynumaflow-lite/manifests/source/README.md
@@ -0,0 +1,28 @@
+To create the `wheel` file, refer [root](../../README.md)
+
+## HOWTO build Image
+
+```bash
+docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-simple-source:v1 --load
+```
+
+### `k3d`
+
+Load it now to `k3d`
+
+```bash
+k3d image import quay.io/numaio/numaflow/pynumaflow-lite-simple-source:v1
+```
+
+### Minikube
+
+```bash
+minikube image load quay.io/numaio/numaflow/pynumaflow-lite-simple-source:v1
+```
+
+## Run the pipeline
+
+```bash
+kubectl apply -f pipeline.yaml
+```
+
diff --git a/packages/pynumaflow-lite/manifests/source/pipeline.yaml b/packages/pynumaflow-lite/manifests/source/pipeline.yaml
@@ -0,0 +1,21 @@
+apiVersion: numaflow.numaproj.io/v1alpha1
+kind: Pipeline
+metadata:
+  name: simple-source
+spec:
+  vertices:
+    - name: in
+      source:
+        udsource:
+          container:
+            image: quay.io/numaio/numaflow/pynumaflow-lite-simple-source:v1
+            imagePullPolicy: Never
+      limits:
+        readBatchSize: 5
+    - name: out
+      sink:
+        log: {}
+  edges:
+    - from: in
+      to: out
+
diff --git a/packages/pynumaflow-lite/manifests/source/pyproject.toml b/packages/pynumaflow-lite/manifests/source/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "simple-source"
+version = "0.1.0"
+description = "User-defined source example using pynumaflow-lite"
+authors = [
+    { name = "Vigith Maurice", email = "vigith@gmail.com" }
+]
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+]
+
+
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"
+
diff --git a/packages/pynumaflow-lite/manifests/source/simple_source.py b/packages/pynumaflow-lite/manifests/source/simple_source.py
@@ -0,0 +1,122 @@
+import asyncio
+import logging
+import signal
+from datetime import datetime, timezone
+from collections.abc import AsyncIterator
+
+from pynumaflow_lite import sourcer
+from pynumaflow_lite._source_dtypes import Sourcer
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+_LOGGER = logging.getLogger(__name__)
+
+
+class SimpleSource(Sourcer):
+    """
+    Simple source that generates messages with incrementing numbers.
+    This is a class-based user-defined source implementation.
+    """
+
+    def __init__(self):
+        self.counter = 0
+        self.partition_idx = 0
+
+    async def read_handler(self, datum: sourcer.ReadRequest) -> AsyncIterator[sourcer.Message]:
+        """
+        The simple source generates messages with incrementing numbers.
+        """
+        _LOGGER.info(f"Read request: num_records={datum.num_records}, timeout_ms={datum.timeout_ms}")
+
+        # Generate the requested number of messages
+        for i in range(datum.num_records):
+            # Create message payload
+            payload = f"message-{self.counter}".encode("utf-8")
+
+            # Create offset
+            offset = sourcer.Offset(
+                offset=str(self.counter).encode("utf-8"),
+                partition_id=self.partition_idx
+            )
+
+            # Create message
+            message = sourcer.Message(
+                payload=payload,
+                offset=offset,
+                event_time=datetime.now(timezone.utc),
+                keys=["key1"],
+                headers={"source": "simple"}
+            )
+
+            _LOGGER.info(f"Generated message: {self.counter}")
+            self.counter += 1
+
+            yield message
+
+            # Small delay to simulate real source
+            await asyncio.sleep(0.1)
+
+    async def ack_handler(self, request: sourcer.AckRequest) -> None:
+        """
+        The simple source acknowledges the offsets.
+        """
+        _LOGGER.info(f"Acknowledging {len(request.offsets)} offsets")
+        for offset in request.offsets:
+            _LOGGER.debug(f"Acked offset: {offset.offset.decode('utf-8')}, partition: {offset.partition_id}")
+
+    async def nack_handler(self, request: sourcer.NackRequest) -> None:
+        """
+        The simple source negatively acknowledges the offsets.
+        """
+        _LOGGER.info(f"Negatively acknowledging {len(request.offsets)} offsets")
+        for offset in request.offsets:
+            _LOGGER.warning(f"Nacked offset: {offset.offset.decode('utf-8')}, partition: {offset.partition_id}")
+
+    async def pending_handler(self) -> sourcer.PendingResponse:
+        """
+        The simple source always returns zero to indicate there is no pending record.
+        """
+        return sourcer.PendingResponse(count=0)
+
+    async def partitions_handler(self) -> sourcer.PartitionsResponse:
+        """
+        The simple source always returns default partitions.
+        """
+        return sourcer.PartitionsResponse(partitions=[self.partition_idx])
+
+
+# Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly.
+signal.signal(signal.SIGINT, signal.default_int_handler)
+try:
+    signal.signal(signal.SIGTERM, signal.SIG_DFL)
+except AttributeError:
+    pass
+
+
+async def start():
+    server = sourcer.SourceAsyncServer()
+
+    # Create an instance of the source handler
+    handler = SimpleSource()
+
+    # Register loop-level signal handlers to request graceful shutdown
+    loop = asyncio.get_running_loop()
+    try:
+        loop.add_signal_handler(signal.SIGINT, lambda: server.stop())
+        loop.add_signal_handler(signal.SIGTERM, lambda: server.stop())
+    except (NotImplementedError, RuntimeError):
+        pass
+
+    try:
+        await server.start(handler)
+        print("Shutting down gracefully...")
+    except asyncio.CancelledError:
+        try:
+            server.stop()
+        except Exception:
+            pass
+        return
+
+
+if __name__ == "__main__":
+    asyncio.run(start())