Queue headers for processing, instead of batching

carver · carver · commit 127ecb7f5ff0 · 2018-08-28T06:29:44.000-07:00
diff --git a/tests/p2p/test_service.py b/tests/p2p/test_service.py
@@ -26,11 +26,16 @@ async def _run(self):
 async def test_daemon_exit_causes_parent_cancellation():
     service = ParentService()
     asyncio.ensure_future(service.run())
+
     await asyncio.sleep(0.01)
+
     assert service.daemon.is_operational
     assert service.daemon.is_running
+
     await service.daemon.cancel()
     await asyncio.sleep(0.01)
+
     assert not service.is_operational
     assert not service.is_running
-    await service.events.cleaned_up.wait()
+
+    await asyncio.wait_for(service.events.cleaned_up.wait(), timeout=1)
diff --git a/trinity/protocol/eth/peer.py b/trinity/protocol/eth/peer.py
@@ -25,6 +25,8 @@
 
 
 class ETHPeer(BasePeer):
+    max_headers_fetch = constants.MAX_HEADERS_FETCH
+
     _supported_sub_protocols = [ETHProtocol]
     sub_proto: ETHProtocol = None
 
@@ -40,10 +42,6 @@ def requests(self) -> ETHExchangeHandler:
             self._requests = ETHExchangeHandler(self)
         return self._requests
 
-    @property
-    def max_headers_fetch(self) -> int:
-        return constants.MAX_HEADERS_FETCH
-
     def handle_sub_proto_msg(self, cmd: Command, msg: _DecodedMsgType) -> None:
         if isinstance(cmd, NewBlock):
             msg = cast(Dict[str, Any], msg)
diff --git a/trinity/protocol/les/peer.py b/trinity/protocol/les/peer.py
@@ -34,6 +34,8 @@
 
 
 class LESPeer(BasePeer):
+    max_headers_fetch = MAX_HEADERS_FETCH
+
     _supported_sub_protocols = [LESProtocol, LESProtocolV2]
     sub_proto: LESProtocol = None
     # TODO: This will no longer be needed once we've fixed #891, and then it should be removed.
@@ -51,10 +53,6 @@ def requests(self) -> LESExchangeHandler:
             self._requests = LESExchangeHandler(self)
         return self._requests
 
-    @property
-    def max_headers_fetch(self) -> int:
-        return MAX_HEADERS_FETCH
-
     def handle_sub_proto_msg(self, cmd: Command, msg: _DecodedMsgType) -> None:
         if isinstance(cmd, Announce):
             self.head_info = cmd.as_head_info(msg)
diff --git a/trinity/sync/common/chain.py b/trinity/sync/common/chain.py
@@ -1,7 +1,6 @@
 import asyncio
 from abc import abstractmethod
 from typing import (
-    Any,
     AsyncGenerator,
     Tuple,
     Union,
@@ -15,14 +14,16 @@
 from eth.exceptions import (
     HeaderNotFound,
 )
+from eth_typing import (
+    Hash32,
+)
 from eth_utils import (
     ValidationError,
 )
 from eth.rlp.headers import BlockHeader
 
 from p2p import protocol
 from p2p.constants import MAX_REORG_DEPTH, SEAL_CHECK_RANDOM_SAMPLE_RATE
-from p2p.exceptions import NoEligiblePeers
 from p2p.p2p_proto import DisconnectReason
 from p2p.peer import BasePeer, PeerPool, PeerSubscriber
 from p2p.service import BaseService
@@ -45,19 +46,26 @@ class BaseHeaderChainSyncer(BaseService, PeerSubscriber):
     """
     # We'll only sync if we are connected to at least min_peers_to_sync.
     min_peers_to_sync = 1
-    # Should we exit upon completing a sync with a given peer?
-    _exit_on_sync_complete = False
+    # Post-processing steps can exit out of sync (for example, fast sync) by triggering this token:
+    complete_token = None
     # TODO: Instead of a fixed timeout, we should use a variable one that gets adjusted based on
     # the round-trip times from our download requests.
     _reply_timeout = 60
     _seal_check_random_sample_rate = SEAL_CHECK_RANDOM_SAMPLE_RATE
+    # the latest header hash of the peer on the current sync
+    _target_header_hash = None
 
     def __init__(self,
                  chain: AsyncChain,
                  db: AsyncHeaderDB,
                  peer_pool: PeerPool,
                  token: CancelToken = None) -> None:
-        super().__init__(token)
+        self.complete_token = CancelToken('trinity.sync.common.BaseHeaderChainSyncer.SyncCompleted')
+        if token is None:
+            super_service_token = self.complete_token
+        else:
+            super_service_token = token.chain(self.complete_token)
+        super().__init__(super_service_token)
         self.chain = chain
         self.db = db
         self.peer_pool = peer_pool
@@ -66,13 +74,24 @@ def __init__(self,
         self._sync_complete = asyncio.Event()
         self._sync_requests: asyncio.Queue[HeaderRequestingPeer] = asyncio.Queue()
 
+        # pending queue size should be big enough to avoid starving the processing consumers, but
+        # small enough to avoid wasteful over-requests before post-processing can happen
+        max_pending_headers = ETHPeer.max_headers_fetch * 5
+        self.pending_headers: asyncio.Queue[BlockHeader] = asyncio.Queue(max_pending_headers)
+
     @property
     def msg_queue_maxsize(self) -> int:
         # This is a rather arbitrary value, but when the sync is operating normally we never see
         # the msg queue grow past a few hundred items, so this should be a reasonable limit for
         # now.
         return 2000
 
+    def get_target_header_hash(self) -> Hash32:
+        if self._target_header_hash is None:
+            raise ValueError("Cannot check the target hash when there is no active sync")
+        else:
+            return self._target_header_hash
+
     def register_peer(self, peer: BasePeer) -> None:
         self._sync_requests.put_nowait(cast(HeaderRequestingPeer, self.peer_pool.highest_td_peer))
 
@@ -99,19 +118,14 @@ async def _run(self) -> None:
         self.run_task(self._handle_msg_loop())
         with self.subscribe(self.peer_pool):
             while self.is_operational:
-                peer_or_finished: Any = await self.wait_first(
-                    self._sync_requests.get(),
-                    self._sync_complete.wait()
-                )
-
-                # In the case of a fast sync, we return once the sync is completed, and our caller
-                # must then run the StateDownloader.
-                if self._sync_complete.is_set():
+                try:
+                    peer = await self.wait(self._sync_requests.get())
+                except OperationCancelled:
+                    # In the case of a fast sync, we return once the sync is completed, and our
+                    # caller must then run the StateDownloader.
                     return
-
-                # Since self._sync_complete is not set, peer_or_finished can only be a Peer
-                # instance.
-                self.run_task(self.sync(peer_or_finished))
+                else:
+                    self.run_task(self.sync(peer))
 
     async def sync(self, peer: HeaderRequestingPeer) -> None:
         if self._syncing:
@@ -162,7 +176,11 @@ async def _sync(self, peer: HeaderRequestingPeer) -> None:
                 break
 
             try:
-                headers = await self._fetch_missing_headers(peer, start_at)
+                fetch_headers_coro = self._fetch_missing_headers(peer, start_at)
+                headers = await self.complete_token.cancellable_wait(fetch_headers_coro)
+            except OperationCancelled:
+                self.logger.info("Sync with %s completed", peer)
+                break
             except TimeoutError:
                 self.logger.warn("Timeout waiting for header batch from %s, aborting sync", peer)
                 await peer.disconnect(DisconnectReason.timeout)
@@ -192,22 +210,21 @@ async def _sync(self, peer: HeaderRequestingPeer) -> None:
             except ValidationError as e:
                 self.logger.warn("Received invalid headers from %s, aborting sync: %s", peer, e)
                 break
-            try:
-                head_number = await self._process_headers(peer, headers)
-            except NoEligiblePeers:
-                self.logger.info("No peers have the blocks we want, aborting sync")
-                break
-            start_at = head_number + 1
 
-            # Quite often the header batch we receive here includes headers past the peer's reported
-            # head (via the NewBlock msg), so we can't compare our head's hash to the peer's in
-            # order to see if the sync is completed. Instead we just check that we have the peer's
-            # head_hash in our chain.
-            if await self.wait(self.db.coro_header_exists(peer.head_hash)):
-                self.logger.info("Sync with %s completed", peer)
-                if self._exit_on_sync_complete:
-                    self._sync_complete.set()
-                break
+            # Setting the latest header hash for the peer, before queuing header processing tasks
+            self._target_header_hash = peer.head_hash
+
+            await self._queue_headers_for_processing(headers)
+            start_at = headers[-1].block_number + 1
+
+    async def _queue_headers_for_processing(self, headers: Tuple[BlockHeader, ...]) -> None:
+        # this block is an optimization to avoid lots of await calls
+        if len(headers) + self.pending_headers.qsize() <= self.pending_headers.maxsize:
+            for header in headers:
+                self.pending_headers.put_nowait(header)
+        else:
+            for header in headers:
+                await self.pending_headers.put(header)
 
     async def _fetch_missing_headers(
             self, peer: HeaderRequestingPeer, start_at: int) -> Tuple[BlockHeader, ...]:
@@ -245,12 +262,18 @@ async def get_missing_tail(self: 'BaseHeaderChainSyncer',
 
         return tail_headers
 
+    async def pop_all_pending_headers(self) -> Tuple[BlockHeader, ...]:
+        """Get all the currently pending headers. If no headers pending, wait until one is"""
+        queue = self.pending_headers
+        if queue.empty():
+            first_header = await queue.get()
+        else:
+            first_header = queue.get_nowait()
+
+        available = queue.qsize()
+        return (first_header, ) + tuple(queue.get_nowait() for _ in range(available))
+
     @abstractmethod
     async def _handle_msg(self, peer: HeaderRequestingPeer, cmd: protocol.Command,
                           msg: protocol._DecodedMsgType) -> None:
         raise NotImplementedError("Must be implemented by subclasses")
-
-    @abstractmethod
-    async def _process_headers(
-            self, peer: HeaderRequestingPeer, headers: Tuple[BlockHeader, ...]) -> int:
-        raise NotImplementedError("Must be implemented by subclasses")
diff --git a/trinity/sync/full/chain.py b/trinity/sync/full/chain.py
@@ -64,7 +64,6 @@ class FastChainSyncer(BaseHeaderChainSyncer):
     head.
     """
     db: AsyncChainDB
-    _exit_on_sync_complete = True
 
     subscription_msg_types: Set[Type[Command]] = {
         commands.NewBlock,
@@ -78,6 +77,15 @@ class FastChainSyncer(BaseHeaderChainSyncer):
         commands.NewBlockHashes,
     }
 
+    async def _run(self) -> None:
+        self.run_task(self._load_and_process_headers())
+        await super()._run()
+
+    async def _load_and_process_headers(self) -> None:
+        while self.is_operational:
+            headers = await self.pop_all_pending_headers()
+            await self._process_headers(headers)
+
     async def _calculate_td(self, headers: Tuple[BlockHeader, ...]) -> int:
         """Return the score (total difficulty) of the last header in the given list.
 
@@ -94,8 +102,7 @@ async def _calculate_td(self, headers: Tuple[BlockHeader, ...]) -> int:
             td += header.difficulty
         return td
 
-    async def _process_headers(
-            self, peer: HeaderRequestingPeer, headers: Tuple[BlockHeader, ...]) -> int:
+    async def _process_headers(self, headers: Tuple[BlockHeader, ...]) -> None:
         timer = Timer()
         target_td = await self._calculate_td(headers)
         bodies_by_key = await self._download_block_bodies(target_td, headers)
@@ -123,7 +130,16 @@ async def _process_headers(
         self.logger.info(
             "Imported %d blocks (%d txs) in %0.2f seconds, new head: #%d",
             len(headers), txs, timer.elapsed, head.block_number)
-        return head.block_number
+
+        # during fast sync, exit the service when reaching the target hash
+        target_hash = self.get_target_header_hash()
+
+        # Quite often the header batch we receive includes headers past the peer's reported
+        # head (via the NewBlock msg), so we can't compare our head's hash to the peer's in
+        # order to see if the sync is completed. Instead we just check that we have the peer's
+        # head_hash in our chain.
+        if await self.wait(self.db.coro_header_exists(target_hash)):
+            self.complete_token.trigger()
 
     async def _download_block_bodies(self,
                                      target_td: int,
@@ -357,11 +373,9 @@ class RegularChainSyncer(FastChainSyncer):
 
     Here, the run() method will execute the sync loop forever, until our CancelToken is triggered.
     """
-    _exit_on_sync_complete = False
     _seal_check_random_sample_rate = 1
 
-    async def _process_headers(
-            self, peer: HeaderRequestingPeer, headers: Tuple[BlockHeader, ...]) -> int:
+    async def _process_headers(self, headers: Tuple[BlockHeader, ...]) -> None:
         target_td = await self._calculate_td(headers)
         bodies_by_key = await self._download_block_bodies(target_td, headers)
         self.logger.info("Got block bodies for chain segment")
@@ -410,7 +424,6 @@ async def _process_headers(
 
         head = await self.wait(self.db.coro_get_canonical_head())
         self.logger.info("Imported chain segment, new head: #%d", head.block_number)
-        return head.block_number
 
 
 def _is_body_empty(header: BlockHeader) -> bool:
diff --git a/trinity/sync/light/chain.py b/trinity/sync/light/chain.py
@@ -3,13 +3,10 @@
     cast,
     Dict,
     Set,
-    Tuple,
     Type,
     Union,
 )
 
-from eth.rlp.headers import BlockHeader
-
 from p2p.protocol import (
     Command,
     _DecodedMsgType,
@@ -22,7 +19,6 @@
 from trinity.sync.common.chain import BaseHeaderChainSyncer
 from trinity.utils.timer import Timer
 
-
 HeaderRequestingPeer = Union[ETHPeer, LESPeer]
 
 
@@ -35,6 +31,10 @@ class LightChainSyncer(BaseHeaderChainSyncer):
         commands.BlockHeaders,
     }
 
+    async def _run(self) -> None:
+        self.run_task(self._persist_headers())
+        await super()._run()
+
     async def _handle_msg(self, peer: HeaderRequestingPeer, cmd: Command,
                           msg: _DecodedMsgType) -> None:
         if isinstance(cmd, commands.Announce):
@@ -61,14 +61,15 @@ async def _handle_get_block_headers(self, peer: LESPeer, msg: Dict[str, Any]) ->
         self.logger.trace("Replying to %s with %d headers", peer, len(headers))
         peer.sub_proto.send_block_headers(headers, buffer_value=0, request_id=request.request_id)
 
-    async def _process_headers(
-            self, peer: HeaderRequestingPeer, headers: Tuple[BlockHeader, ...]) -> int:
-        timer = Timer()
-        for header in headers:
-            await self.wait(self.db.coro_persist_header(header))
+    async def _persist_headers(self) -> None:
+        while self.is_operational:
+            headers = await self.wait(self.pop_all_pending_headers())
+
+            timer = Timer()
+            for header in headers:
+                await self.wait(self.db.coro_persist_header(header))
 
-        head = await self.wait(self.db.coro_get_canonical_head())
-        self.logger.info(
-            "Imported %d headers in %0.2f seconds, new head: #%d",
-            len(headers), timer.elapsed, head.block_number)
-        return head.block_number
+            head = await self.wait(self.db.coro_get_canonical_head())
+            self.logger.info(
+                "Imported %d headers in %0.2f seconds, new head: #%d",
+                len(headers), timer.elapsed, head.block_number)