Eager rate limit (#3133)

wild-endeavor · web-flow · commit 57c7c7e72e50 · 2025-02-14T09:45:19.000-08:00
Signed-off-by: Yee Hing Tong &lt;wild-endeavor@users.noreply.github.com&gt;
diff --git a/flytekit/core/worker_queue.py b/flytekit/core/worker_queue.py
@@ -23,6 +23,7 @@
 from flytekit.loggers import developer_logger, logger
 from flytekit.models.common import Labels
 from flytekit.models.core.execution import WorkflowExecutionPhase
+from flytekit.utils.rate_limiter import RateLimiter
 
 if typing.TYPE_CHECKING:
     from flytekit.remote.remote_callable import RemoteEntity
@@ -185,6 +186,7 @@ def __init__(self, remote: FlyteRemote, ss: SerializationSettings, tag: str, roo
         )
         self.__runner_thread.start()
         atexit.register(self._close, stopping_condition=self.stopping_condition, runner=self.__runner_thread)
+        self.rate_limiter = RateLimiter(rpm=60)
 
         # Executions should be tracked in the following way:
         #  a) you should be able to list by label, all executions generated by the current eager task,
@@ -219,7 +221,7 @@ def reconcile_one(self, update: Update):
         try:
             item = update.work_item
             if item.wf_exec is None:
-                logger.warning(f"reconcile should launch for {id(item)} entity name: {item.entity.name}")
+                logger.info(f"reconcile should launch for {id(item)} entity name: {item.entity.name}")
                 wf_exec = self.launch_execution(update.work_item, update.idx)
                 update.wf_exec = wf_exec
                 # Set this to running even if the launched execution was a re-run and already succeeded.
@@ -355,7 +357,7 @@ def get_execution_name(self, entity: RunnableEntity, idx: int, input_kwargs: dic
 
     def launch_execution(self, wi: WorkItem, idx: int) -> FlyteWorkflowExecution:
         """This function launches executions."""
-        logger.warning(f"Launching execution for {wi.entity.name} {idx=} with {wi.input_kwargs}")
+        logger.info(f"Launching execution for {wi.entity.name} {idx=} with {wi.input_kwargs}")
         if wi.result is None and wi.error is None:
             l = self.get_labels()
             e = self.get_env()
@@ -370,6 +372,7 @@ def launch_execution(self, wi: WorkItem, idx: int) -> FlyteWorkflowExecution:
                 assert self.ss.version
                 version = self.ss.version
 
+            self.rate_limiter.sync_acquire()
             # todo: if the execution already exists, remote.execute will return that execution. in the future
             #  we can add input checking to make sure the inputs are indeed a match.
             wf_exec = self.remote.execute(
diff --git a/flytekit/utils/rate_limiter.py b/flytekit/utils/rate_limiter.py
@@ -0,0 +1,54 @@
+import asyncio
+from collections import deque
+from datetime import datetime, timedelta
+
+from flytekit.loggers import developer_logger
+from flytekit.utils.asyn import run_sync
+
+
+class RateLimiter:
+    """Rate limiter that allows up to a certain number of requests per minute."""
+
+    def __init__(self, rpm: int):
+        if not isinstance(rpm, int) or rpm <= 0 or rpm > 100:
+            raise ValueError("Rate must be a positive integer between 1 and 100")
+        self.rpm = rpm
+        self.queue = deque()
+        self.sem = asyncio.Semaphore(rpm)
+        self.delay = timedelta(seconds=60)  # always 60 seconds since this we're using a per-minute rate limiter
+
+    def sync_acquire(self):
+        run_sync(self.acquire)
+
+    async def acquire(self):
+        async with self.sem:
+            now = datetime.now()
+            # Start by clearing out old data
+            while self.queue and (now - self.queue[0]) > self.delay:
+                self.queue.popleft()
+
+            # Now that the queue only has valid entries, we'll need to wait if the queue is full.
+            if len(self.queue) >= self.rpm:
+                # Compute necessary delay and sleep that amount
+                # First pop one off, so another coroutine won't try to base its wait time off the same timestamp. But
+                # if you pop it off, the next time this code runs it'll think there's enough spots... so add the
+                # expected time back onto the queue before awaiting. Once you await, you lose the 'thread' and other
+                # coroutines can run.
+                # Basically the invariant is: this block of code leaves the number of items in the queue unchanged:
+                # it'll pop off a timestamp but immediately add one back.
+                # Because of the semaphore, we don't have to worry about the one we add to the end being referenced
+                # because there will never be more than RPM-1 other coroutines running at the same time.
+                earliest = self.queue.popleft()
+                delay: timedelta = (earliest + self.delay) - now
+                if delay.total_seconds() > 0:
+                    next_time = earliest + self.delay
+                    self.queue.append(next_time)
+                    developer_logger.debug(
+                        f"Capacity reached - removed time {earliest} and added back {next_time}, sleeping for {delay.total_seconds()}"
+                    )
+                    await asyncio.sleep(delay.total_seconds())
+                else:
+                    developer_logger.debug(f"No more need to wait, {earliest=} vs {now=}")
+                    self.queue.append(now)
+            else:
+                self.queue.append(now)
diff --git a/tests/flytekit/unit/utils/test_rate_limiter.py b/tests/flytekit/unit/utils/test_rate_limiter.py
@@ -0,0 +1,55 @@
+import pytest
+import sys
+import timeit
+import asyncio
+
+from datetime import timedelta
+from flytekit.utils.rate_limiter import RateLimiter
+
+
+async def launch_requests(rate_limiter: RateLimiter, total: int):
+    tasks = [asyncio.create_task(rate_limiter.acquire()) for _ in range(total)]
+    await asyncio.gather(*tasks)
+
+
+async def helper_for_async(rpm: int, total: int):
+    rate_limiter = RateLimiter(rpm=rpm)
+    rate_limiter.delay = timedelta(seconds=1)
+    await launch_requests(rate_limiter, total)
+
+
+def runner_for_async(rpm: int, total: int):
+    loop = asyncio.get_event_loop()
+    return loop.run_until_complete(helper_for_async(rpm, total))
+
+
+@pytest.mark.asyncio
+def test_rate_limiter():
+    elapsed_time = timeit.timeit(lambda: runner_for_async(2, 2), number=1)
+    elapsed_time_more = timeit.timeit(lambda: runner_for_async(2, 6), number=1)
+    assert elapsed_time < 0.25
+    assert round(elapsed_time_more) == 2
+
+
+async def sync_wrapper(rate_limiter: RateLimiter):
+    rate_limiter.sync_acquire()
+
+
+async def helper_for_sync(rpm: int, total: int):
+    rate_limiter = RateLimiter(rpm=rpm)
+    rate_limiter.delay = timedelta(seconds=1)
+    tasks = [asyncio.create_task(sync_wrapper(rate_limiter)) for _ in range(total)]
+    await asyncio.gather(*tasks)
+
+
+def runner_for_sync(rpm: int, total: int):
+    loop = asyncio.get_event_loop()
+    return loop.run_until_complete(helper_for_sync(rpm, total))
+
+
+@pytest.mark.asyncio
+def test_rate_limiter_s():
+    elapsed_time = timeit.timeit(lambda: runner_for_sync(2, 2), number=1)
+    elapsed_time_more = timeit.timeit(lambda: runner_for_sync(2, 6), number=1)
+    assert elapsed_time < 0.25
+    assert round(elapsed_time_more) == 2