Skip to content

Commit 6b38190

Browse files
authored
Add cluster ID mismatch to connectivity messages (#2045)
fix #2026 --- <!-- Consider each and tick it off one way or the other --> * [x] CHANGELOG updated or not needed * [x] Documentation updated or not needed * [x] Haddocks updated or not needed * [x] No new TODOs introduced or explained herafter
2 parents 9657e17 + 3a0f8f8 commit 6b38190

File tree

21 files changed

+163
-11
lines changed

21 files changed

+163
-11
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ changes.
4343
- This introduces `ToJSON` and `FromJSON` instances on all observation types.
4444
- `CollectComObservation` and `ContestObservation` are made compatible with their `OnChainTx` counterparts.
4545

46+
- Enhanced the error message for `etcd` cluster ID mismatches by including detailed information about
47+
the expected peers versus peers loaded from the `hydra-node` arguments.
48+
4649
## [0.21.0] - 2025-04-28
4750

4851
- New metric for counting the number of active peers: `hydra_head_peers_connected`

docs/docs/known-issues.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ The network topology needs to be statically configured and match across all `hyd
2828
Known errors are:
2929

3030
- `cluster ID mismatch` - the cluster was initiated with a different list of `--peer`s
31-
- check configuration with other participants
31+
- check configuration with other participants. There should be a corresponding log entry `NetworkClusterIDMismatch` with the information on:
32+
- `clusterPeers` - loaded peers info received from `etcd` cluster.
33+
- `configuredPeers` - peers info coming from `hydra-node` arguments.
3234

3335
- `member ... has already been bootstrapped` - missing information in `<persistence-dir>/etcd`
3436
- need to bootstrap new cluster or manual workarounds, see also https://etcd.io/docs/v3.5/op-guide/failures/

hydra-cluster/src/Hydra/Cluster/Scenarios.hs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,31 @@ initWithWrongKeys workDir tracer node@RunningNode{nodeSocket} hydraScriptsTxId =
11721172

11731173
participants `shouldMatchList` expectedParticipants
11741174

1175+
startWithWrongPeers :: FilePath -> Tracer IO EndToEndLog -> RunningNode -> [TxId] -> IO ()
1176+
startWithWrongPeers workDir tracer node@RunningNode{nodeSocket} hydraScriptsTxId = do
1177+
(aliceCardanoVk, _) <- keysFor Alice
1178+
1179+
let contestationPeriod = 2
1180+
aliceChainConfig <- chainConfigFor Alice workDir nodeSocket hydraScriptsTxId [Carol] contestationPeriod
1181+
bobChainConfig <- chainConfigFor Bob workDir nodeSocket hydraScriptsTxId [Alice] contestationPeriod
1182+
1183+
let hydraTracer = contramap FromHydraNode tracer
1184+
withHydraNode hydraTracer aliceChainConfig workDir 3 aliceSk [bobVk] [3, 4] $ \n1 -> do
1185+
-- NOTE: here we deliberately use the wrong peer list for Bob
1186+
withHydraNode hydraTracer bobChainConfig workDir 4 bobSk [aliceVk] [4] $ \_ -> do
1187+
seedFromFaucet_ node aliceCardanoVk 100_000_000 (contramap FromFaucet tracer)
1188+
1189+
(clusterPeers, configuredPeers) <- waitMatch 20 n1 $ \v -> do
1190+
guard $ v ^? key "tag" == Just (Aeson.String "NetworkClusterIDMismatch")
1191+
clusterPeers <- v ^? key "clusterPeers" . _String
1192+
configuredPeers <- v ^? key "misconfiguredPeers" . _String
1193+
pure (clusterPeers, configuredPeers)
1194+
1195+
when (clusterPeers == configuredPeers) $
1196+
failure "Expected clusterPeers and configuredPeers to be different"
1197+
clusterPeers `shouldBe` "0.0.0.0:5003=http://0.0.0.0:5003,0.0.0.0:5004=http://0.0.0.0:5004"
1198+
configuredPeers `shouldBe` "0.0.0.0:5004=http://0.0.0.0:5004"
1199+
11751200
-- | Open a a two participant head and incrementally commit to it.
11761201
canCommit :: Tracer IO EndToEndLog -> FilePath -> RunningNode -> [TxId] -> IO ()
11771202
canCommit tracer workDir node hydraScriptsTxId =

hydra-cluster/test/Test/EndToEndSpec.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ import Hydra.Cluster.Scenarios (
7575
singlePartyHeadFullLifeCycle,
7676
singlePartyUsesScriptOnL2,
7777
singlePartyUsesWithdrawZeroTrick,
78+
startWithWrongPeers,
7879
threeNodesNoErrorsOnOpen,
7980
threeNodesWithMirrorParty,
8081
)
@@ -566,6 +567,13 @@ spec = around (showLogsOnFailure "EndToEndSpec") $ do
566567
publishHydraScriptsAs node Faucet
567568
>>= initWithWrongKeys tmpDir tracer node
568569

570+
it "cluster id mismatch provides useful info in the logs" $ \tracer ->
571+
failAfter 60 $
572+
withClusterTempDir $ \tmpDir -> do
573+
withCardanoNodeDevnet (contramap FromCardanoNode tracer) tmpDir $ \node -> do
574+
publishHydraScriptsAs node Faucet
575+
>>= startWithWrongPeers tmpDir tracer node
576+
569577
it "bob cannot abort alice's head" $ \tracer -> do
570578
failAfter 60 $
571579
withClusterTempDir $ \tmpDir -> do
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"samples": [
3+
{
4+
"clusterPeers": "",
5+
"misconfiguredPeers": "",
6+
"tag": "NetworkClusterIDMismatch"
7+
}
8+
],
9+
"seed": 1341180695
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"samples": [
3+
{
4+
"clusterPeers": "",
5+
"misconfiguredPeers": "$",
6+
"tag": "NetworkClusterIDMismatch"
7+
}
8+
],
9+
"seed": 1702753294
10+
}

hydra-node/json-schemas/api.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ channels:
8080
- $ref: "api.yaml#/components/messages/NetworkConnected"
8181
- $ref: "api.yaml#/components/messages/NetworkDisconnected"
8282
- $ref: "api.yaml#/components/messages/NetworkVersionMismatch"
83+
- $ref: "api.yaml#/components/messages/NetworkClusterIDMismatch"
8384
- $ref: "api.yaml#/components/messages/HeadIsInitializing"
8485
- $ref: "api.yaml#/components/messages/Committed"
8586
- $ref: "api.yaml#/components/messages/HeadIsOpen"
@@ -487,6 +488,13 @@ components:
487488
payload:
488489
$ref: "api.yaml#/components/schemas/NetworkVersionMismatch"
489490

491+
NetworkClusterIDMismatch:
492+
title: NetworkClusterIDMismatch
493+
description: |
494+
The configured peers do not match with other nodes on the network.
495+
payload:
496+
$ref: "api.yaml#/components/schemas/NetworkClusterIDMismatch"
497+
490498
HeadIsInitializing:
491499
title: HeadIsInitializing
492500
description: |
@@ -732,6 +740,7 @@ components:
732740
- $ref: "api.yaml#/components/schemas/NetworkConnected"
733741
- $ref: "api.yaml#/components/schemas/NetworkDisconnected"
734742
- $ref: "api.yaml#/components/schemas/NetworkVersionMismatch"
743+
- $ref: "api.yaml#/components/schemas/NetworkClusterIDMismatch"
735744
- $ref: "api.yaml#/components/schemas/PeerConnected"
736745
- $ref: "api.yaml#/components/schemas/PeerDisconnected"
737746
- $ref: "api.yaml#/components/schemas/HeadIsInitializing"
@@ -985,6 +994,25 @@ components:
985994
timestamp:
986995
$ref: "api.yaml#/components/schemas/UTCTime"
987996

997+
NetworkClusterIDMismatch:
998+
type: object
999+
required:
1000+
- tag
1001+
- seq
1002+
- timestamp
1003+
properties:
1004+
tag:
1005+
type: string
1006+
enum: ["NetworkClusterIDMismatch"]
1007+
clusterPeers:
1008+
type: string
1009+
misconfiguredPeers:
1010+
type: string
1011+
seq:
1012+
$ref: "api.yaml#/components/schemas/SequenceNumber"
1013+
timestamp:
1014+
$ref: "api.yaml#/components/schemas/UTCTime"
1015+
9881016
HeadIsInitializing:
9891017
type: object
9901018
required:

hydra-node/src/Hydra/API/Server.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ mkTimedServerOutputFromStateEvent event =
249249
StateChanged.NetworkConnected -> Just NetworkConnected
250250
StateChanged.NetworkDisconnected -> Just NetworkDisconnected
251251
StateChanged.NetworkVersionMismatch{..} -> Just NetworkVersionMismatch{..}
252+
StateChanged.NetworkClusterIDMismatch{..} -> Just NetworkClusterIDMismatch{..}
252253
StateChanged.PeerConnected{..} -> Just PeerConnected{..}
253254
StateChanged.PeerDisconnected{..} -> Just PeerDisconnected{..}
254255
StateChanged.TransactionReceived{} -> Nothing

hydra-node/src/Hydra/API/ServerOutput.hs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ data ServerOutput tx
142142
{ ourVersion :: ProtocolVersion
143143
, theirVersion :: Maybe ProtocolVersion
144144
}
145+
| NetworkClusterIDMismatch
146+
{ clusterPeers :: Text
147+
, misconfiguredPeers :: Text
148+
}
145149
| PeerConnected {peer :: Host}
146150
| PeerDisconnected {peer :: Host}
147151
| HeadIsInitializing {headId :: HeadId, parties :: [Party]}
@@ -273,6 +277,7 @@ prepareServerOutput config response =
273277
NetworkConnected -> encodedResponse
274278
NetworkDisconnected -> encodedResponse
275279
NetworkVersionMismatch{} -> encodedResponse
280+
NetworkClusterIDMismatch{} -> encodedResponse
276281
PeerConnected{} -> encodedResponse
277282
PeerDisconnected{} -> encodedResponse
278283
SnapshotSideLoaded{} -> encodedResponse

hydra-node/src/Hydra/HeadLogic.hs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,16 @@ import Hydra.Tx.OnChainId (OnChainId)
103103
import Hydra.Tx.Party (Party (vkey))
104104
import Hydra.Tx.Snapshot (ConfirmedSnapshot (..), Snapshot (..), SnapshotNumber, SnapshotVersion, getSnapshot)
105105

106-
onConnectionEvent :: Network.Connectivity -> Outcome tx
107-
onConnectionEvent = \case
106+
onConnectionEvent :: Text -> Network.Connectivity -> Outcome tx
107+
onConnectionEvent misconfiguredPeers = \case
108108
Network.NetworkConnected ->
109109
newState NetworkConnected
110110
Network.NetworkDisconnected ->
111111
newState NetworkDisconnected
112112
Network.VersionMismatch{ourVersion, theirVersion} ->
113113
newState NetworkVersionMismatch{ourVersion, theirVersion}
114+
Network.ClusterIDMismatch{clusterPeers} ->
115+
newState NetworkClusterIDMismatch{clusterPeers, misconfiguredPeers}
114116
Network.PeerConnected{peer} ->
115117
newState PeerConnected{peer}
116118
Network.PeerDisconnected{peer} ->
@@ -1313,7 +1315,7 @@ update ::
13131315
Outcome tx
13141316
update env ledger st ev = case (st, ev) of
13151317
(_, NetworkInput _ (ConnectivityEvent conn)) ->
1316-
onConnectionEvent conn
1318+
onConnectionEvent env.configuredPeers conn
13171319
(Idle _, ClientInput Init) ->
13181320
onIdleClientInit env
13191321
(Idle _, ChainInput Observation{observedTx = OnInitTx{headId, headSeed, headParameters, participants}, newChainState}) ->
@@ -1426,6 +1428,7 @@ aggregate st = \case
14261428
NetworkConnected -> st
14271429
NetworkDisconnected -> st
14281430
NetworkVersionMismatch{} -> st
1431+
NetworkClusterIDMismatch{} -> st
14291432
PeerConnected{} -> st
14301433
PeerDisconnected{} -> st
14311434
HeadInitialized{parameters = parameters@HeadParameters{parties}, headId, headSeed, chainState} ->
@@ -1793,6 +1796,7 @@ aggregateChainStateHistory history = \case
17931796
NetworkConnected -> history
17941797
NetworkDisconnected -> history
17951798
NetworkVersionMismatch{} -> history
1799+
NetworkClusterIDMismatch{} -> history
17961800
PeerConnected{} -> history
17971801
PeerDisconnected{} -> history
17981802
HeadInitialized{chainState} -> pushNewState chainState history

0 commit comments

Comments
 (0)