Last revision reset (#2137)

noonio · noonio · commit e1f9505e848a · 2025-08-18T11:45:39.000+01:00
&gt; [!NOTE]
&gt; This is currently on top of the 0.22.2 tag and I could not find a 0.22
release branch to open the PR. We should probably not merge it like
this. Instead I should rebase it to master and we cherry pick it back to
do a 0.22.3 release.

The last-known-revision kept by the Etcd network component may be
incorrect if the etcd cluster compacted this revision while the node was
offline or if the last-known-revision state file was removed.

Both cases can be handled by detecting a failing watch request and at
least using the compactRevision from the response.

This is a somewhat exceptional situation and the node state may be
inconsistent because of this. Hence we also log a warning when this
happens.

---

* [x] CHANGELOG updated
* [ ] Documentation updated or not needed
* [x] Haddocks updated
* [x] No new TODOs introduced or explained herafter
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -61,15 +61,15 @@ when the number of persisted `StateChanged` events exceeds the configured `--per
   preserving sequential order and making it easier to identify which rotated log file was used to compute it.
 
 
-## [0.22.2] - 2025.06.30
+## [0.22.2] - 2025-06-30
 
 * Fix wrong hydra-script-tx-ids in networks.json
 
-## [0.22.1] - 2025.06.27
+## [0.22.1] - 2025-06-27
 
 * Fix for bug where node got stalled at `ReplayingState` [#2089](https://github.com/cardano-scaling/hydra/issues/2089)
 
-## [0.22.0] - 2025.06.17
+## [0.22.0] - 2025-06-17
 
 - Tested with `cardano-node 10.1.4` and `cardano-cli 10.1.1.0`.
 
diff --git a/hydra-node/src/Hydra/Network/Etcd.hs b/hydra-node/src/Hydra/Network/Etcd.hs
@@ -20,8 +20,7 @@
 -- only deliver messages that were not seen before. In case we are not connected
 -- to our 'etcd' instance or not enough peers (= on a minority cluster), we
 -- retry sending, but also store messages to broadcast in a 'PersistentQueue',
--- which makes the node resilient against crashes while sending. TODO: Is this
--- needed? performance limitation?
+-- which makes the node resilient against crashes while sending.
 --
 -- Connectivity and compatibility with other nodes on the cluster is tracked
 -- using the key-value service as well:
@@ -93,7 +92,6 @@ import Network.GRPC.Client (
  )
 import Network.GRPC.Client.StreamType.IO (biDiStreaming, nonStreaming)
 import Network.GRPC.Common (GrpcError (..), GrpcException (..), HTTP2Settings (..), NextElem (..), def, defaultHTTP2Settings)
-import Network.GRPC.Common.NextElem (whileNext_)
 import Network.GRPC.Common.Protobuf (Proto (..), Protobuf, defMessage, (.~))
 import Network.GRPC.Etcd (
   Compare'CompareResult (..),
@@ -102,6 +100,7 @@ import Network.GRPC.Etcd (
   Lease,
   Watch,
  )
+import Network.Socket (PortNumber)
 import System.Directory (createDirectoryIfMissing, listDirectory, removeFile)
 import System.Environment.Blank (getEnvironment)
 import System.FilePath ((</>))
@@ -175,7 +174,7 @@ withEtcdNetwork tracer protocolVersion config callback action = do
         traceWith tracer Reconnecting
         pure $ reconnectPolicy doneVar
 
-  clientHost = Host{hostname = "127.0.0.1", port = clientPort}
+  clientHost = Host{hostname = "127.0.0.1", port = getClientPort config}
 
   grpcServer =
     ServerInsecure $
@@ -185,11 +184,6 @@ withEtcdNetwork tracer protocolVersion config callback action = do
         , addressAuthority = Nothing
         }
 
-  -- NOTE: Offset client port by the same amount as configured 'port' is offset
-  -- from the default '5001'. This will result in the default client port 2379
-  -- be used by default still.
-  clientPort = 2379 + port listen - 5001
-
   traceStderr p NetworkCallback{onConnectivity} =
     forever $ do
       bs <- BS.hGetLine (getStderr p)
@@ -249,6 +243,14 @@ withEtcdNetwork tracer protocolVersion config callback action = do
 
   NetworkConfiguration{persistenceDir, listen, advertise, peers, whichEtcd} = config
 
+-- | Get the client port corresponding to a listen address.
+--
+-- The client port used by the started etcd port is offset by the same amount as
+-- the listen address is offset by the default port 5001. This will result in
+-- the default client port 2379 be used by default still.
+getClientPort :: NetworkConfiguration -> PortNumber
+getClientPort NetworkConfiguration{listen} = 2379 + port listen - 5001
+
 -- | Check and write version on etcd cluster. This will retry until we are on a
 -- majority cluster and succeed. If the version does not match a corresponding
 -- 'Connectivity' message is sent via 'NetworkCallback'.
@@ -282,8 +284,7 @@ checkVersion tracer conn ourVersion NetworkCallback{onConnectivity} = do
         Right theirVersion ->
           unless (theirVersion == ourVersion) $
             onConnectivity VersionMismatch{ourVersion, theirVersion = Just theirVersion}
-    else
-      traceWith tracer $ MatchingProtocolVersion{version = ourVersion}
+    else traceWith tracer $ MatchingProtocolVersion{version = ourVersion}
  where
   versionKey = "version"
 
@@ -361,11 +362,13 @@ waitMessages ::
   NetworkCallback msg IO ->
   IO ()
 waitMessages tracer conn directory NetworkCallback{deliver} = do
-  revision <- getLastKnownRevision directory
   withGrpcContext "waitMessages" . forever $ do
     -- NOTE: We have not observed the watch (subscription) fail even when peers
     -- leave and we end up on a minority cluster.
     biDiStreaming conn (rpc @(Protobuf Watch "watch")) $ \send recv -> do
+      revision <- getLastKnownRevision directory
+      let startRevision = fromIntegral (revision + 1)
+      traceWith tracer WatchMessagesStartRevision{startRevision}
       -- NOTE: Request all keys starting with 'msg'. See also section KeyRanges
       -- in https://etcd.io/docs/v3.5/learning/api/#key-value-api
       let watchRequest =
@@ -374,34 +377,48 @@ waitMessages tracer conn directory NetworkCallback{deliver} = do
               & #rangeEnd .~ "msh" -- NOTE: g+1 to query prefixes
               & #startRevision .~ fromIntegral (revision + 1)
       send . NextElem $ defMessage & #createRequest .~ watchRequest
-      whileNext_ recv process
+      loop send recv
     -- Wait before re-trying
     threadDelay 1
  where
-  process res = do
-    let revision = fromIntegral $ res ^. #header . #revision
-    putLastKnownRevision directory revision
-    forM_ (res ^. #events) $ \event -> do
-      let value = event ^. #kv . #value
-      case decodeFull' value of
-        Left err ->
-          traceWith
-            tracer
-            FailedToDecodeValue
-              { key = decodeUtf8 $ event ^. #kv . #key
-              , value = encodeBase16 value
-              , reason = show err
-              }
-        Right msg -> deliver msg
+  loop send recv =
+    recv >>= \case
+      NoNextElem -> pure ()
+      NextElem res ->
+        if res ^. #canceled
+          then do
+            let compactRevision = res ^. #compactRevision
+            traceWith tracer WatchMessagesFallbackTo{compactRevision}
+            putLastKnownRevision directory . fromIntegral $ (compactRevision - 1) `max` 0
+            -- Gracefully close watch stream
+            send NoNextElem
+          else do
+            let revision = res ^. #header . #revision
+            putLastKnownRevision directory . fromIntegral $ revision `max` 0
+            forM_ (res ^. #events) process
+            loop send recv
+
+  process event = do
+    let value = event ^. #kv . #value
+    case decodeFull' value of
+      Left err ->
+        traceWith
+          tracer
+          FailedToDecodeValue
+            { key = decodeUtf8 $ event ^. #kv . #key
+            , value = encodeBase16 value
+            , reason = show err
+            }
+      Right msg -> deliver msg
 
 getLastKnownRevision :: MonadIO m => FilePath -> m Natural
 getLastKnownRevision directory = do
   liftIO $
     try (decodeFileStrict' $ directory </> "last-known-revision") >>= \case
       Right rev -> do
-        pure $ fromMaybe 1 rev
+        pure $ fromMaybe 0 rev
       Left (e :: IOException)
-        | isDoesNotExistError e -> pure 1
+        | isDoesNotExistError e -> pure 0
         | otherwise -> do
             fail $ "Failed to load last known revision: " <> show e
 
@@ -614,5 +631,7 @@ data EtcdLog
   | LowLeaseTTL {ttlRemaining :: Int64}
   | NoKeepAliveResponse
   | MatchingProtocolVersion {version :: ProtocolVersion}
+  | WatchMessagesStartRevision {startRevision :: Int64}
+  | WatchMessagesFallbackTo {compactRevision :: Int64}
   deriving stock (Eq, Show, Generic)
   deriving anyclass (ToJSON, FromJSON)
diff --git a/hydra-node/test/Hydra/NetworkSpec.hs b/hydra-node/test/Hydra/NetworkSpec.hs
@@ -23,7 +23,7 @@ import Hydra.Network (
   ProtocolVersion (..),
   WhichEtcd (..),
  )
-import Hydra.Network.Etcd (withEtcdNetwork)
+import Hydra.Network.Etcd (getClientPort, withEtcdNetwork)
 import Hydra.Network.Message (Message (..))
 import Hydra.Node.Network (NetworkConfiguration (..))
 import System.Directory (removeFile)
@@ -202,14 +202,60 @@ spec = do
                 withEtcdNetwork @Int tracer v1 carolConfig recordCarol $ \_ -> do
                   broadcast n1 1001
                   waitCarol `shouldReturn` 1001
-                -- We can reset the last known view (internal implementation detail)
+
+      it "handles compaction and lost local state" $ \tracer -> do
+        withTempDir "test-etcd" $ \tmp -> do
+          failAfter 20 $ do
+            PeerConfig3{aliceConfig, bobConfig, carolConfig} <- setup3Peers tmp
+            (recordBob, waitBob, _) <- newRecordingCallback
+            (recordCarol, waitCarol, _) <- newRecordingCallback
+            withEtcdNetwork @Int tracer v1 aliceConfig noopCallback $ \n1 ->
+              withEtcdNetwork @Int tracer v1 bobConfig recordBob $ \_ -> do
+                -- First we send 5 messages with carol online
+                withEtcdNetwork @Int tracer v1 carolConfig recordCarol $ \_ -> do
+                  forM_ [1 .. 5] $ \msg -> do
+                    broadcast n1 msg
+                    waitBob `shouldReturn` msg
+                    waitCarol `shouldReturn` msg
+                -- Carol stopped and we continue sending messages
+                forM_ [5 .. 100] $ \msg -> do
+                  broadcast n1 msg
+                  waitBob `shouldReturn` msg
+                -- Even while carol is down, the etcd component would
+                -- "auto-compact" messages. By default down to 1000 messages
+                -- after/every 5 minutes. This is interesting as it should
+                -- result in carol never some messages, but is hard to test
+                -- (without waiting 5 minutes). Instead we issue a direct etcd
+                -- command to compact everything before revision 50.
+                runProcess_ . shell $
+                  "etcdctl compact 50 --endpoints=127.0.0.1:" <> show (getClientPort aliceConfig)
+                -- When carol starts now we would expect it to start catching up
+                -- from the earliest possible revision 50. While missing some
+                -- messages.
+                withEtcdNetwork @Int tracer v1 carolConfig recordCarol $ \_ -> do
+                  -- NOTE: Revision 50 may not correspond to message 50, so we
+                  -- only assert its some message bigger than 25 and expect to
+                  -- see all further messages to 100.
+                  firstMsg <- waitCarol
+                  firstMsg `shouldSatisfy` (> 25)
+                  forM_ [firstMsg + 1 .. 100] $ \msg ->
+                    waitCarol `shouldReturn` msg
+                  -- Carol should be able to receive new messages just fine.
+                  forM_ [101 .. 105] $ \msg -> do
+                    broadcast n1 msg
+                    waitCarol `shouldReturn` msg
+                -- Similarly, should carol lose its local state, we expect it to
+                -- see everything from the last compacted revision 50. We can
+                -- enforce this by removing the corresponding file (an internal
+                -- implementation detail)
                 removeFile (persistenceDir carolConfig </> "last-known-revision")
                 withEtcdNetwork @Int tracer v1 carolConfig recordCarol $ \_ -> do
-                  -- NOTE: The etcd component would "auto-compact" messages down
-                  -- to 1000 messages after 5 minutes. This would result in
-                  -- starting at 1001 here, but is hard to test (without waiting
-                  -- 5 minutes).
-                  forM_ messages $ \msg ->
+                  -- NOTE: Revision 50 may not correspond to message 50, so we
+                  -- only assert its some message bigger than 25 and expect to
+                  -- see all further messages to 105.
+                  firstMsg <- waitCarol
+                  firstMsg `shouldSatisfy` (> 25)
+                  forM_ [firstMsg + 1 .. 105] $ \msg -> do
                     waitCarol `shouldReturn` msg
 
       it "emits cluster id mismatch" $ \tracer -> do
diff --git a/nix/hydra/packages.nix b/nix/hydra/packages.nix
@@ -149,6 +149,7 @@
           buildInputs = [
             nativePkgs.hydra-node.components.tests.tests
             pkgs.check-jsonschema
+            pkgs.etcd # For etcdctl command in tests
           ];
         };
         hydra-cluster-tests = pkgs.mkShellNoCC {