tidyup/review (#64)

hampsterx · web-flow · commit 8ff62d89c79e · 2026-03-13T09:58:45.000+13:00
final bits
diff --git a/.coverage b/.coverage
diff --git a/.gitignore b/.gitignore
@@ -11,5 +11,6 @@ temp.py
 .env
 .mypy_cache
 .tox
+.coverage
 release.sh
 CLAUDE.md
diff --git a/README.md b/README.md
@@ -305,12 +305,21 @@ If you wish to avoid excessive throttling or have multiple producers on a stream
 ```python
 from kinesis import Consumer
 
+# One-shot: consume until idle_timeout (default 2s) with no new records
 async with Consumer(stream_name="test") as consumer:
     async for item in consumer:
         print(item)
-    # Consumer continues to wait for new messages after catching up
+
+# Continuous: wrap in while True to keep consuming across idle gaps
+async with Consumer(stream_name="test") as consumer:
+    while True:
+        async for item in consumer:
+            print(item)
 ```
 
+> **Note**: `async for` ends after `idle_timeout` seconds of queue inactivity (default 2.0s).
+> For continuous consumption, wrap the `async for` in a `while True` loop.
+
 
 Options:
 
@@ -324,7 +333,7 @@ Options:
 | max_queue_size | 10000 | the fetch() task shard will block when queue is at max |
 | max_shard_consumers | None | Max number of shards to use. None = all |
 | record_limit | 10000 | Number of records to fetch with get_records() |
-| sleep_time_no_records | 2 | No of seconds to sleep when caught up |
+| sleep_time_no_records | 2 | Seconds to sleep per shard when no new records are returned by `get_records` |
 | iterator_type | TRIM_HORIZON | Default shard iterator type for new/unknown shards (ie start from start of stream). Alternatives are "LATEST" (ie end of stream), "AT_TIMESTAMP" (ie particular point in time, requires defining `timestamp` arg) |
 | shard_fetch_rate | 1 | No of fetches per second (max = 5). 1 is recommended as allows having multiple consumers without hitting the max limit. |
 | checkpointer | MemoryCheckPointer() | Checkpointer to use |
@@ -337,6 +346,7 @@ Options:
 | create_stream | False | Creates a Kinesis Stream based on the `stream_name` keyword argument. Note if stream already existing it will ignore |
 | create_stream_shards | 1 | Sets the amount of shard you want for your new stream. Note if stream already existing it will ignore  |
 | describe_timeout | 60 | Timeout in seconds for waiting for stream to become ACTIVE during startup. Increase for slow backends (e.g. LocalStack) |
+| idle_timeout | 2.0 | Seconds to wait for new records before ending iteration. Controls how long `async for` blocks on an empty queue before raising `StopAsyncIteration` |
 | timestamp | None | Timestamp to start reading stream from. Used with iterator type "AT_TIMESTAMP"
 
 ## Shard Management
diff --git a/benchmark.py b/benchmark.py
@@ -251,7 +251,7 @@ async def test_producer(
             ) as consumer:
 
                 # Ensure consumer is set up before producing
-                await consumer.start_consumer(wait_iterations=0)
+                consumer._start_fetch_task()
 
                 # Add small delay to ensure consumer is ready
                 await asyncio.sleep(1)
@@ -349,7 +349,7 @@ async def run_benchmark(args):
 
     if not args.dry_run:
         # Create the stream
-        async with StreamManager(stream_name, args.shards) as stream:
+        async with StreamManager(stream_name, args.shards):
 
             for iteration in range(args.iterations):
                 if args.iterations > 1:
@@ -512,7 +512,7 @@ def main():
     def cleanup_handler(signum=None, frame=None):
         """Handle cleanup on exit"""
         try:
-            loop = asyncio.get_running_loop()
+            asyncio.get_running_loop()
             asyncio.create_task(cleanup_all_streams())
         except RuntimeError:
             # No running loop, create one
diff --git a/kinesis/base.py b/kinesis/base.py
@@ -17,6 +17,7 @@ class Base:
     def __init__(
         self,
         stream_name: str,
+        *,
         session: Optional[AioSession] = None,
         endpoint_url: Optional[str] = None,
         region_name: Optional[str] = None,
diff --git a/kinesis/cli/stream.py b/kinesis/cli/stream.py
@@ -20,7 +20,7 @@
 class _ClientHelper(Base):
     """Minimal Base subclass for lightweight Kinesis API access (describe/list only)."""
 
-    def __init__(self, stream_name="", endpoint_url=None, region_name=None):
+    def __init__(self, stream_name="", *, endpoint_url=None, region_name=None):
         super().__init__(
             stream_name=stream_name,
             endpoint_url=endpoint_url,
diff --git a/kinesis/consumer.py b/kinesis/consumer.py
@@ -1,7 +1,6 @@
 import asyncio
 import logging
 from asyncio import TimeoutError
-from asyncio.queues import QueueEmpty
 from datetime import datetime, timezone
 from typing import Any, AsyncIterator, Dict, Optional
 
@@ -36,6 +35,7 @@ class Consumer(Base):
     def __init__(
         self,
         stream_name: str,
+        *,
         session: Optional[AioSession] = None,
         endpoint_url: Optional[str] = None,
         region_name: Optional[str] = None,
@@ -55,6 +55,7 @@ def __init__(
         create_stream: bool = False,
         create_stream_shards: int = 1,
         describe_timeout: int = 60,
+        idle_timeout: float = 2.0,
         timestamp: Optional[datetime] = None,
     ) -> None:
 
@@ -77,6 +78,8 @@ def __init__(
 
         self.sleep_time_no_records = sleep_time_no_records
 
+        self.idle_timeout = idle_timeout
+
         self.max_shard_consumers = max_shard_consumers
 
         self.record_limit = record_limit
@@ -669,26 +672,16 @@ def get_shard_status(self):
             "shard_details": shard_details,
         }
 
-    async def start_consumer(self, wait_iterations=10, wait_sleep=0.25):
-
-        # Start task to fetch periodically
-
+    def _start_fetch_task(self):
         self.fetch_task = asyncio.create_task(self._fetch())
 
-        # Wait a while until we have some results
-        for i in range(0, wait_iterations):
-            if self.fetch_task and self.queue.qsize() == 0:
-                await asyncio.sleep(wait_sleep)
-
-        log.debug("start_consumer completed.. queue size={}".format(self.queue.qsize()))
-
     async def __anext__(self):
 
         if not self.shards:
             await self.get_conn()
 
         if not self.fetch_task:
-            await self.start_consumer()
+            self._start_fetch_task()
 
         # Raise exception from Fetch Task to main task otherwise raise exception inside
         # Fetch Task will fail silently
@@ -702,23 +695,21 @@ async def __anext__(self):
 
         while True:
             try:
-                item = self.queue.get_nowait()
-
-                if item and isinstance(item, dict) and "__CHECKPOINT__" in item:
-                    if self.checkpointer:
-                        await self.checkpointer.checkpoint(
-                            item["__CHECKPOINT__"]["ShardId"],
-                            item["__CHECKPOINT__"]["SequenceNumber"],
-                        )
-                    checkpoint_count += 1
-                    if checkpoint_count >= max_checkpoints:
-                        log.warning(f"Processed {max_checkpoints} checkpoints, stopping iteration")
-                        raise StopAsyncIteration
-                    continue
-
-                return item
+                item = await asyncio.wait_for(self.queue.get(), timeout=self.idle_timeout)
+            except asyncio.TimeoutError:
+                log.debug(f"Queue idle for {self.idle_timeout}s, stopping iteration")
+                raise StopAsyncIteration from None
+
+            if item and isinstance(item, dict) and "__CHECKPOINT__" in item:
+                if self.checkpointer:
+                    await self.checkpointer.checkpoint(
+                        item["__CHECKPOINT__"]["ShardId"],
+                        item["__CHECKPOINT__"]["SequenceNumber"],
+                    )
+                checkpoint_count += 1
+                if checkpoint_count >= max_checkpoints:
+                    log.warning(f"Processed {max_checkpoints} checkpoints, stopping iteration")
+                    raise StopAsyncIteration
+                continue
 
-            except QueueEmpty:
-                log.debug("Queue empty..")
-                await asyncio.sleep(self.sleep_time_no_records)
-                raise StopAsyncIteration
+            return item
diff --git a/kinesis/producer.py b/kinesis/producer.py
@@ -3,6 +3,7 @@
 import math
 import time
 from asyncio.queues import QueueEmpty
+from collections import deque
 from typing import Any, Awaitable, Callable, Optional
 
 from aiobotocore.session import AioSession
@@ -22,6 +23,7 @@ class Producer(Base):
     def __init__(
         self,
         stream_name: str,
+        *,
         session: Optional[AioSession] = None,
         endpoint_url: Optional[str] = None,
         region_name: Optional[str] = None,
@@ -81,14 +83,14 @@ def __init__(
 
         self._flush_lock = asyncio.Lock()
         self._stop_event = asyncio.Event()
-        self.flush_task = asyncio.create_task(self._flush())
+        self.flush_task = None
         self.after_flush_fun = after_flush_fun
 
         # keep track of these (used by unit test only)
         self.throughput_exceeded_count = 0
 
-        # overflow buffer
-        self.overflow = []
+        # overflow buffer (deque for O(1) popleft in get_batch FIFO)
+        self.overflow = deque()
 
         self.flush_total_records = 0
         self.flush_total_size = 0
@@ -134,8 +136,10 @@ async def put(self, data: Any, partition_key: Optional[str] = None) -> None:
 
         # Raise exception from Flush Task to main task otherwise raise exception inside
         # Flush Task will fail silently
-        if self.flush_task.done():
-            raise self.flush_task.exception()
+        if self.flush_task and self.flush_task.done():
+            exc = self.flush_task.exception()
+            if exc:
+                raise exc
 
         if not self.stream_status == self.ACTIVE:
             await self.get_conn()
@@ -149,27 +153,29 @@ async def put(self, data: Any, partition_key: Optional[str] = None) -> None:
         # Update queue size metric
         self.metrics.gauge(MetricType.PRODUCER_QUEUE_SIZE, self.queue.qsize(), {"stream_name": self.stream_name})
 
+    async def start(self):
+        await super().start()
+        # (Re)start flush infrastructure now that we have a live client.
+        self._stop_event = asyncio.Event()
+        self.flush_task = asyncio.create_task(self._flush())
+
     async def close(self):
         log.debug(f"Closing Connection.. (stream status:{self.stream_status})")
-        if not self.stream_status == self.RECONNECT:
-            # Signal flush task to stop gracefully (don't cancel — let in-progress flush complete)
-            self._stop_event.set()
 
-            if self.flush_task and not self.flush_task.done():
-                try:
-                    done, _ = await asyncio.wait([self.flush_task], timeout=2.0)
-                    if not done:
-                        log.debug("Flush task did not finish in time, cancelling")
-                        self.flush_task.cancel()
-                        try:
-                            await self.flush_task
-                        except asyncio.CancelledError:
-                            pass
-                        except Exception as e:
-                            log.debug(f"Error awaiting cancelled flush task: {e}")
-                except Exception as e:
-                    log.debug(f"Error during flush task cleanup: {e}")
+        # Always stop background flush task, even during reconnect,
+        # to avoid a dangling task referencing a closed client.
+        self._stop_event.set()
+
+        if self.flush_task and not self.flush_task.done():
+            # Wait for the flush task to finish — don't cancel it.
+            # _stop_event ensures the loop exits after the current flush() completes,
+            # letting any in-flight shielded put_records() finish rather than
+            # re-queuing items that were already delivered (duplicate prevention).
+            done, _ = await asyncio.wait([self.flush_task], timeout=10.0)
+            if not done:
+                log.warning("Flush task did not finish within 10s, proceeding with close")
 
+        if self.stream_status != self.RECONNECT:
             # Final flush to send any remaining queued items
             await self.flush()
 
@@ -202,8 +208,13 @@ async def flush(self, _skip_if_locked=False):
         async with self._flush_lock:
 
             if self.processor.has_items():
-                for output in self.processor.get_items():
-                    await self.queue.put(output)
+                outputs = list(self.processor.get_items())
+                for output in outputs:
+                    try:
+                        self.queue.put_nowait(output)
+                    except asyncio.QueueFull:
+                        self.overflow.append(output)
+                        log.debug("Queue full during flush, spilled %d items to overflow", 1)
 
             while True:
 
@@ -311,7 +322,7 @@ async def get_batch(self):
             async with self.put_rate_throttle:
 
                 if self.overflow:
-                    item = self.overflow.pop()
+                    item = self.overflow.popleft()
 
                 else:
                     try:
@@ -415,8 +426,8 @@ async def _push_kinesis(self, items):
             except ClientConnectionError:
                 await self.get_conn()
             except asyncio.CancelledError:
-                # In-flight put_records continues (shielded), but we can't get the result.
-                # Re-queue items so the final flush in close() can retry them.
+                # close() no longer cancels the flush task (it awaits completion),
+                # but if something else cancels us, re-queue for at-least-once delivery.
                 log.debug("put_records cancelled, re-queuing %d items to overflow", len(items))
                 self.overflow.extend(items)
                 raise
diff --git a/kinesis/testing.py b/kinesis/testing.py
@@ -188,6 +188,7 @@ class MockProducer:
     def __init__(
         self,
         stream_name: str,
+        *,
         processor: Optional[Processor] = None,
         # Accepted for signature compatibility — ignored
         session=None,
@@ -270,6 +271,7 @@ class MockConsumer:
     def __init__(
         self,
         stream_name: str,
+        *,
         processor: Optional[Processor] = None,
         checkpointer=None,
         iterator_type: str = "TRIM_HORIZON",
diff --git a/tests.py b/tests.py
@@ -18,7 +18,6 @@
     NetstringAggregator,
     NewlineAggregator,
     OutputItem,
-    SimpleAggregator,
 )
 from kinesis.processors import (
     JsonLineProcessor,
@@ -570,7 +569,7 @@ async def test_producer_put_exceed_batch_size(self):
 
     async def test_producer_and_consumer(self):
 
-        async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL) as producer:
+        async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL):
             pass
 
             async with Consumer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL):
@@ -815,7 +814,7 @@ async def test_producer_and_consumer_consume_with_checkpointer_and_latest(self):
             ) as consumer:
 
                 # Manually start
-                await consumer.start_consumer()
+                consumer._start_fetch_task()
 
                 await producer.put("test.B")
 
@@ -961,7 +960,7 @@ async def test_consumer_checkpoint(self):
             ) as consumer:
 
                 # Manually start
-                await consumer.start_consumer()
+                consumer._start_fetch_task()
 
                 await producer.put("test")
 
@@ -1018,7 +1017,7 @@ async def test_producer_producer_limit(self):
                 iterator_type="LATEST",
             ) as consumer:
 
-                await consumer.start_consumer()
+                consumer._start_fetch_task()
 
                 # Wait a bit just to be sure iterator is gonna get late
                 await asyncio.sleep(3)
diff --git a/tests/test_consumer.py b/tests/test_consumer.py
diff --git a/tests/test_producer.py b/tests/test_producer.py