TaskQueue: add get_nowait

carver · carver · commit 9a4884c2d155 · 2018-08-28T06:29:44.000-07:00
diff --git a/tests/trinity/utils/test_task_queue.py b/tests/trinity/utils/test_task_queue.py
@@ -338,3 +338,30 @@ async def test_cannot_readd_same_task():
     await q.add((1, 2))
     with pytest.raises(ValidationError):
         await q.add((2,))
+
+
+@pytest.mark.parametrize('get_size', (1, None))
+def test_get_nowait_queuefull(get_size):
+    q = TaskQueue()
+    with pytest.raises(asyncio.QueueFull):
+        q.get_nowait(get_size)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    'tasks, get_size, expected_tasks',
+    (
+        ((3, 2), 1, (2, )),
+    ),
+)
+async def test_get_nowait(tasks, get_size, expected_tasks):
+    q = TaskQueue()
+    await q.add(tasks)
+
+    batch, tasks = q.get_nowait(get_size)
+
+    assert tasks == expected_tasks
+
+    q.complete(batch, tasks)
+
+    assert all(task not in q for task in tasks)
diff --git a/trinity/utils/datastructures.py b/trinity/utils/datastructures.py
@@ -1,9 +1,8 @@
 from asyncio import (
+    AbstractEventLoop,
     Lock,
     PriorityQueue,
-    Queue,
     QueueFull,
-    BoundedSemaphore,
 )
 from itertools import count
 from typing import (
@@ -22,6 +21,19 @@
 from eth_utils.toolz import identity
 
 TTask = TypeVar('TTask')
+TFunc = TypeVar('TFunc')
+
+
+class FunctionProperty(Generic[TFunc]):
+    """
+    A property class purely to convince mypy to let us assign a function to an
+    instance variable. See more at: https://github.com/python/mypy/issues/708#issuecomment-405812141
+    """
+    def __get__(self, oself: Any, owner: Any) -> TFunc:
+        return self._func
+
+    def __set__(self, oself: Any, value: TFunc) -> None:
+        self._func = value
 
 
 class TaskQueue(Generic[TTask]):
@@ -31,7 +43,7 @@ class TaskQueue(Generic[TTask]):
     A producer of tasks will insert pending tasks with await add(), which will not return until
     all tasks have been added to the queue.
 
-    A task consumer calls await get() to retrieve tasks to attempt. Tasks will be returned in
+    A task consumer calls await get() to retrieve tasks for processing. Tasks will be returned in
     priority order. If no tasks are pending, get()
     will pause until at least one is available. Only one consumer will have a task "checked out"
     from get() at a time.
@@ -42,7 +54,7 @@ class TaskQueue(Generic[TTask]):
     """
 
     # a function that determines the priority order (lower int is higher priority)
-    _order_fn: Callable[[TTask], Any]
+    _order_fn: FunctionProperty[Callable[[TTask], Any]]
 
     # batches of tasks that have been started but not completed
     _in_progress: Dict[int, Tuple[TTask, ...]]
@@ -58,7 +70,7 @@ def __init__(
             maxsize: int = 0,
             order_fn: Callable[[TTask], Any] = identity,
             *,
-            loop=None) -> None:
+            loop: AbstractEventLoop = None) -> None:
         self._maxsize = maxsize
         self._full_lock = Lock(loop=loop)
         self._open_queue = PriorityQueue(maxsize, loop=loop)
@@ -79,7 +91,7 @@ async def add(self, tasks: Tuple[TTask, ...]) -> None:
         already_pending = self._tasks.intersection(tasks)
         if already_pending:
             raise ValidationError(
-                f"Can't readd a task to queue. {already_pending!r} are already present"
+                f"Duplicate tasks detected: {already_pending!r} are already present in the queue"
             )
 
         # make sure to insert the highest-priority items first, in case queue fills up
@@ -124,43 +136,74 @@ async def add(self, tasks: Tuple[TTask, ...]) -> None:
             if self._full_lock.locked() and len(self._tasks) < self._maxsize:
                 self._full_lock.release()
 
+    def get_nowait(self, max_results: int = None) -> Tuple[int, Tuple[TTask, ...]]:
+        """
+        Get pending tasks. If no tasks are pending, raise an exception.
+
+        :param max_results: return up to this many pending tasks. If None, return all pending tasks.
+        :return: (batch_id, tasks to attempt)
+        :raise ~asyncio.QueueFull: if no tasks are available
+        """
+        if self._open_queue.empty():
+            raise QueueFull("No tasks are available to get")
+        else:
+            pending_tasks = self._get_nowait(max_results)
+
+            # Generate a pending batch of tasks, so uncompleted tasks can be inferred
+            next_id = next(self._id_generator)
+            self._in_progress[next_id] = pending_tasks
+
+            return (next_id, pending_tasks)
+
     async def get(self, max_results: int = None) -> Tuple[int, Tuple[TTask, ...]]:
-        """Get all the currently pending tasks. If no tasks pending, wait until one is"""
-        # TODO add argument to optionally limit the number of tasks retrieved
+        """
+        Get pending tasks. If no tasks are pending, wait until a task is added.
+
+        :param max_results: return up to this many pending tasks. If None, return all pending tasks.
+        :return: (batch_id, tasks to attempt)
+        """
         if max_results is not None and max_results < 1:
             raise ValidationError("Must request at least one task to process, not {max_results!r}")
 
         # if the queue is empty, wait until at least one item is available
         queue = self._open_queue
         if queue.empty():
-            first_task = await queue.get()
+            _rank, first_task = await queue.get()
         else:
-            first_task = queue.get_nowait()
-
-        available = queue.qsize()
+            _rank, first_task = queue.get_nowait()
 
         # In order to return from get() as soon as possible, never await again.
-        # Instead, take only the tasks that are already waiting.
-
-        # How many results past the first one do we want?
+        # Instead, take only the tasks that are already available.
         if max_results is None:
-            more_tasks_to_return = available
+            remaining_count = None
         else:
-            more_tasks_to_return = min((available, max_results - 1))
+            remaining_count = max_results - 1
+        remaining_tasks = self._get_nowait(remaining_count)
 
-        # Combine the remaining tasks with the first task we already pulled.
-        ranked_tasks = (first_task, ) + tuple(
-            queue.get_nowait() for _ in range(more_tasks_to_return)
-        )
+        # Combine the first and remaining tasks
+        all_tasks = (first_task, ) + remaining_tasks
 
-        # strip out the rank value used internally, for sorting in the priority queue
-        unranked_tasks = tuple(task for _rank, task in ranked_tasks)
-
-        # save the batch for later, so uncompleted tasks can be inferred
+        # Generate a pending batch of tasks, so uncompleted tasks can be inferred
         next_id = next(self._id_generator)
-        self._in_progress[next_id] = unranked_tasks
+        self._in_progress[next_id] = all_tasks
+
+        return (next_id, all_tasks)
+
+    def _get_nowait(self, max_results: int = None) -> Tuple[TTask, ...]:
+        queue = self._open_queue
+
+        # How many results do we want?
+        available = queue.qsize()
+        if max_results is None:
+            num_tasks = available
+        else:
+            num_tasks = min((available, max_results))
+
+        # Combine the remaining tasks with the first task we already pulled.
+        ranked_tasks = tuple(queue.get_nowait() for _ in range(num_tasks))
 
-        return (next_id, unranked_tasks)
+        # strip out the rank value used internally for sorting in the priority queue
+        return tuple(task for _rank, task in ranked_tasks)
 
     def complete(self, batch_id: int, completed: Tuple[TTask, ...]) -> None:
         if batch_id not in self._in_progress: