Merge branch 'main' into batch_router

DNXie · DNXie · commit 1503fc5819e5 · 2025-09-22T15:12:01.000-07:00
diff --git a/src/forge/controller/service/__init__.py b/src/forge/controller/service/__init__.py
@@ -7,7 +7,7 @@
 from .interface import ServiceInterface, Session, SessionContext
 from .metrics import ServiceMetrics
 from .replica import Replica, ReplicaMetrics, ReplicaState
-from .router import LeastLoadedRouter, RoundRobinRouter, SessionRouter
+from .router import BatchRouter, LeastLoadedRouter, RoundRobinRouter, SessionRouter
 from .service import Service, ServiceActor, ServiceConfig
 
 __all__ = [
@@ -24,4 +24,5 @@
     "LeastLoadedRouter",
     "RoundRobinRouter",
     "SessionRouter",
+    "BatchRouter",
 ]
diff --git a/src/forge/controller/service/interface.py b/src/forge/controller/service/interface.py
@@ -286,7 +286,7 @@ class Router(ABC):
     """Abstract base class for routing logic."""
 
     @abstractmethod
-    def get_replica(
+    async def get_replica(
         self,
         healthy_replicas: List[Replica],
         sess_id: str | None = None,
diff --git a/src/forge/controller/service/router.py b/src/forge/controller/service/router.py
@@ -4,8 +4,10 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import asyncio
 import logging
-from typing import Dict, List
+import time
+from typing import Dict, List, Optional
 
 from .interface import Router
 from .replica import Replica
@@ -20,7 +22,7 @@ class RoundRobinRouter(Router):
     def __init__(self):
         self._next_idx = 0
 
-    def get_replica(
+    async def get_replica(
         self,
         healthy_replicas: List[Replica],
         sess_id: str | None = None,
@@ -38,7 +40,7 @@ def get_replica(
 class LeastLoadedRouter(Router):
     """Always routes to the replica with the lowest current load."""
 
-    def get_replica(
+    async def get_replica(
         self,
         healthy_replicas: List[Replica],
         sess_id: str | None = None,
@@ -55,7 +57,7 @@ class SessionRouter(Router):
     def __init__(self, fallback_router: Router):
         self.fallback_router = fallback_router
 
-    def get_replica(
+    async def get_replica(
         self,
         healthy_replicas: List[Replica],
         sess_id: str | None = None,
@@ -78,7 +80,7 @@ def get_replica(
             del session_map[sess_id]
 
         # Use fallback router to assign a new replica
-        replica = self.fallback_router.get_replica(
+        replica = await self.fallback_router.get_replica(
             healthy_replicas, sess_id, session_map
         )
         session_map[sess_id] = replica.idx
@@ -88,3 +90,111 @@ def get_replica(
             replica.idx,
         )
         return replica
+
+class BatchRouter(Router):
+    """
+    Router wrapper that batches routing decisions.
+    Uses an inner router to pick the replica for each batch.
+
+    Args:
+        inner_router: The underlying Router instance used to make routing decisions
+        batch_max_size: Maximum number of requests to collect in a single batch (default: 8)
+        batch_max_wait_s: Maximum time to wait before processing a batch in seconds (default: 0.01)
+
+    Example:
+        rr_router = RoundRobinRouter()
+        batch_router = BatchRouter(rr_router, batch_max_size=16, batch_max_wait_s=0.02)
+
+        replica = await batch_router.get_replica(healthy_replicas, sess_id, session_map)
+    """
+
+    def __init__(
+        self,
+        inner_router: Router,
+        batch_max_size: int = 8,
+        batch_max_wait_s: float = 0.01,
+    ):
+
+        self.inner_router = inner_router
+        self.batch_max_size = batch_max_size
+        self.batch_max_wait_s = batch_max_wait_s
+
+        # Internal queue for batching routing requests
+        self._queue: asyncio.Queue = asyncio.Queue()
+        # Background task that processes batches continuously
+        self._batch_task: asyncio.Task = asyncio.create_task(self._batch_loop())
+
+    async def _batch_loop(self):
+        """Background task that continuously processes batches of routing requests.
+
+        This is the core batching logic that runs in a separate asyncio task.
+        It collects requests from the queue and processes them in batches based
+        on size and time constraints.
+
+        The loop follows these steps:
+        1. Wait for the first request to start a new batch
+        2. Collect additional requests until batch_max_size or batch_max_wait_s is reached
+        3. Make a single routing decision for the entire batch
+        4. Fulfill all futures with the selected replica
+
+        This process repeats indefinitely until the task is cancelled.
+        """
+        while True:
+            batch = []
+            futs = []
+            sess_ids = []
+            start_time = time.time()
+
+            # Wait for first request
+            fut, healthy_replicas, sess_id, session_map = await self._queue.get()
+            batch.append((healthy_replicas, sess_id, session_map))
+            futs.append(fut)
+            sess_ids.append(sess_id)
+
+            while True:
+                try:
+                    timeout = max(0, self.batch_max_wait_s - (time.time() - start_time))
+                    (
+                        fut,
+                        healthy_replicas,
+                        sess_id,
+                        session_map,
+                    ) = await asyncio.wait_for(self._queue.get(), timeout)
+                    batch.append((healthy_replicas, sess_id, session_map))
+                    futs.append(fut)
+                    sess_ids.append(sess_id)
+
+                    if len(batch) >= self.batch_max_size:
+                        break
+                except asyncio.TimeoutError:
+                    break
+
+            # One routing decision for the whole batch
+            healthy_replicas = batch[-1][0]  # use most recent replica state
+            session_map = batch[-1][2]  # use most recent session map
+
+            # Check if any replicas have become unhealthy
+            healthy_replicas = [r for r in healthy_replicas if r.healthy]
+            replica = await self.inner_router.get_replica(
+                healthy_replicas, None, session_map
+            )
+
+            # Fulfill all futures with the chosen replica
+            for fut in futs:
+                fut.set_result(replica)
+
+    async def get_replica(
+        self,
+        healthy_replicas: List[Replica],
+        sess_id: Optional[str] = None,
+        session_map: Optional[Dict[str, int]] = None,
+    ) -> Replica:
+        """Enqueue request and wait until batch assigns a replica."""
+        loop = asyncio.get_event_loop()
+        fut = loop.create_future()
+
+        # Queue the request for batching - this is non-blocking
+        self._queue.put_nowait((fut, healthy_replicas, sess_id, session_map))
+
+        # Wait for the batch processor to resolve our future
+        return await fut
diff --git a/src/forge/controller/service/service.py b/src/forge/controller/service/service.py
@@ -477,9 +477,9 @@ async def _get_replica(self, sess_id: str | None) -> "Replica":
         healthy_replicas = [r for r in self._replicas if r.healthy]
         if sess_id is None:
             # No session, use the default router
-            return self._default_router.get_replica(healthy_replicas)
+            return await self._default_router.get_replica(healthy_replicas)
 
-        return self._session_router.get_replica(
+        return await self._session_router.get_replica(
             healthy_replicas, sess_id, self._session_replica_map
         )
 
diff --git a/tests/unit_tests/test_service.py b/tests/unit_tests/test_service.py
@@ -14,6 +14,7 @@
 import pytest
 from forge.controller import ForgeActor
 from forge.controller.service import (
+    BatchRouter,
     LeastLoadedRouter,
     Replica,
     ReplicaState,
@@ -666,8 +667,8 @@ async def test_session_router_with_round_robin_fallback():
     fallback = RoundRobinRouter()
     router = SessionRouter(fallback)
 
-    r1 = router.get_replica(replicas, sess_id="sess1", session_map=session_map)
-    r2 = router.get_replica(replicas, sess_id="sess2", session_map=session_map)
+    r1 = await router.get_replica(replicas, sess_id="sess1", session_map=session_map)
+    r2 = await router.get_replica(replicas, sess_id="sess2", session_map=session_map)
 
     assert r1.idx != r2.idx
     assert set(session_map.values()) == {0, 1}
@@ -678,11 +679,121 @@ async def test_session_router_with_round_robin_fallback():
     fallback = LeastLoadedRouter()
     router = SessionRouter(fallback)
 
-    r1 = router.get_replica(replicas, sess_id="sess1", session_map=session_map)
-    r2 = router.get_replica(replicas, sess_id="sess2", session_map=session_map)
+    r1 = await router.get_replica(replicas, sess_id="sess1", session_map=session_map)
+    r2 = await router.get_replica(replicas, sess_id="sess2", session_map=session_map)
 
     assert r1.idx == r2.idx == 0
 
+@pytest.mark.asyncio
+async def test_batching_router_batchsize_with_roundrobin():
+    """Batch should flush when max batch size is reached using RoundRobinRouter."""
+    replicas = [make_replica(0), make_replica(1)]
+    batch_size = 3
+
+    router = BatchRouter(
+        RoundRobinRouter(),
+        batch_max_size=batch_size,
+        batch_max_wait_s=0.5,  # long enough to not trigger timeout
+    )
+
+    # Enqueue `batch_size + 1` requests to force batch flush
+    tasks = [
+        asyncio.create_task(router.get_replica(replicas)) for _ in range(batch_size + 1)
+    ]
+    results = await asyncio.gather(*tasks)
+
+    # Check all results are healthy replicas
+    assert all(r.state == ReplicaState.HEALTHY for r in results)
+
+    # Check results only use existing replica indices
+    indices = {r.idx for r in results}
+    assert indices.issubset({0, 1})
+
+    # Ensure batch queue is empty after flush
+    assert router._queue.qsize() == 0
+
+
+@pytest.mark.asyncio
+async def test_batching_router_skips_unhealthy_replicas():
+    """If a replica becomes unhealthy before batch dispatch, it should be skipped."""
+    replicas = [make_replica(0, load=0), make_replica(1, load=10)]
+
+    router = BatchRouter(
+        LeastLoadedRouter(),
+        batch_max_size=4,
+        batch_max_wait_s=0.5,
+    )
+
+    # Start two requests that will form a batch
+    tasks = [asyncio.create_task(router.get_replica(replicas)) for _ in range(2)]
+
+    # While they are waiting, mark replica 0 (least loaded) as unhealthy
+    await asyncio.sleep(0.01)
+    replicas[0].state = ReplicaState.UNHEALTHY
+
+    results = await asyncio.gather(*tasks)
+
+    # All results must be the *healthy* replica (idx=1)
+    assert all(r.idx == 1 for r in results)
+    assert results[0].state == ReplicaState.HEALTHY
+
+
+@pytest.mark.asyncio
+async def test_batching_router_two_batches_timing():
+    """Test that two sequential batches are processed independently with proper timing."""
+    import time
+
+    replicas = [make_replica(0, load=5), make_replica(1, load=10)]
+    batch_wait_time = 0.05  # 50ms timeout
+
+    router = BatchRouter(
+        LeastLoadedRouter(),
+        batch_max_size=3,
+        batch_max_wait_s=batch_wait_time,
+    )
+
+    # First batch: 2 requests that will timeout
+    start_time = time.time()
+
+    # Create first batch tasks
+    first_batch_tasks = [
+        asyncio.create_task(router.get_replica(replicas)) for _ in range(2)
+    ]
+
+    # Wait for first batch to complete (should timeout after batch_wait_time)
+    first_results = await asyncio.gather(*first_batch_tasks)
+    first_batch_duration = time.time() - start_time
+
+    # Verify first batch took approximately the timeout duration (tighter tolerance)
+    assert (
+        batch_wait_time <= first_batch_duration < batch_wait_time + 0.01
+    )  # 10ms tolerance on 50ms timeout
+
+    # Verify first batch results (should pick lowest load replica)
+    assert all(r.idx == 0 for r in first_results)  # replica 0 has lower load
+    assert all(r.state == ReplicaState.HEALTHY for r in first_results)
+
+    # Second batch: 2 more requests (new timing cycle should start)
+    second_batch_start = time.time()
+
+    # Create second batch tasks
+    second_batch_tasks = [
+        asyncio.create_task(router.get_replica(replicas)) for _ in range(2)
+    ]
+
+    # Wait for second batch to complete
+    second_results = await asyncio.gather(*second_batch_tasks)
+    second_batch_duration = time.time() - second_batch_start
+
+    # Verify second batch also took approximately the timeout duration (tighter tolerance)
+    assert batch_wait_time <= second_batch_duration < batch_wait_time + 0.01
+
+    # Verify second batch results
+    assert all(r.idx == 0 for r in second_results)  # should still pick lowest load
+    assert all(r.state == ReplicaState.HEALTHY for r in second_results)
+
+    # Ensure batch queue is empty after both batches
+    assert router._queue.qsize() == 0
 
 # Router integeration tests
 
@@ -743,4 +854,4 @@ async def test_session_router_assigns_and_updates_session_map_in_service():
         assert values2[assigned_idx] == values1[assigned_idx] + 1
 
     finally:
-        await service.shutdown()
+        await service.shutdown()