Skip to content

Commit 766e84f

Browse files
committed
Add more useful information on cluster misconfiguration
Also provide e2e test to assert we see what we expect in the logs Signed-off-by: Sasha Bogicevic <[email protected]>
1 parent 481311e commit 766e84f

File tree

18 files changed

+91
-34
lines changed

18 files changed

+91
-34
lines changed

docs/docs/known-issues.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ The network topology needs to be statically configured and match across all `hyd
2828
Known errors are:
2929

3030
- `cluster ID mismatch` - the cluster was initiated with a different list of `--peer`s
31-
- check configuration with other participants
31+
- check configuration with other participants. There should be a corresponding log entry `NetworkClusterIDMismatch` with the information on:
32+
- `clusterPeers` - loaded peers info received from `etcd` cluster.
33+
- `configuredPeers` - peers info coming from `hydra-node` arguments.
34+
- `reportingHost` - host that received the error.
3235

3336
- `member ... has already been bootstrapped` - missing information in `<persistence-dir>/etcd`
3437
- need to bootstrap new cluster or manual workarounds, see also https://etcd.io/docs/v3.5/op-guide/failures/

hydra-cluster/src/Hydra/Cluster/Scenarios.hs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,35 @@ initWithWrongKeys workDir tracer node@RunningNode{nodeSocket} hydraScriptsTxId =
11721172

11731173
participants `shouldMatchList` expectedParticipants
11741174

1175+
startWithWrongPeers :: FilePath -> Tracer IO EndToEndLog -> RunningNode -> [TxId] -> IO ()
1176+
startWithWrongPeers workDir tracer node@RunningNode{nodeSocket} hydraScriptsTxId = do
1177+
(aliceCardanoVk, _) <- keysFor Alice
1178+
1179+
let contestationPeriod = 2
1180+
aliceChainConfig <- chainConfigFor Alice workDir nodeSocket hydraScriptsTxId [Carol] contestationPeriod
1181+
bobChainConfig <- chainConfigFor Bob workDir nodeSocket hydraScriptsTxId [Alice] contestationPeriod
1182+
1183+
let hydraTracer = contramap FromHydraNode tracer
1184+
withHydraNode hydraTracer aliceChainConfig workDir 3 aliceSk [bobVk] [3, 4] $ \n1 -> do
1185+
-- NOTE: here we deliberately use the wrong peer list for Bob, which should be visible
1186+
-- in network message sent to Alice.
1187+
withHydraNode hydraTracer bobChainConfig workDir 4 bobSk [aliceVk] [4] $ \_ -> do
1188+
seedFromFaucet_ node aliceCardanoVk 100_000_000 (contramap FromFaucet tracer)
1189+
1190+
(clusterPeers, reportingHost, configuredPeers) <- waitMatch 20 n1 $ \v -> do
1191+
guard $ v ^? key "tag" == Just (Aeson.String "NetworkClusterIDMismatch")
1192+
clusterPeers <- v ^? key "clusterPeers" . _String
1193+
reportingHost <- v ^? key "reportingHost" . _String
1194+
configuredPeers <- v ^? key "misconfiguredPeers" . _String
1195+
pure (clusterPeers, reportingHost, configuredPeers)
1196+
1197+
when (clusterPeers == configuredPeers) $
1198+
failure "Expected clusterPeers and configuredPeers to be different"
1199+
-- NOTE: reporting host is the host receiving the error, which is Alice
1200+
reportingHost `shouldBe` "http://0.0.0.0:5003"
1201+
clusterPeers `shouldBe` "0.0.0.0:5003=http://0.0.0.0:5003,0.0.0.0:5004=http://0.0.0.0:5004"
1202+
configuredPeers `shouldBe` "0.0.0.0:5004=http://0.0.0.0:5004"
1203+
11751204
-- | Open a a two participant head and incrementally commit to it.
11761205
canCommit :: Tracer IO EndToEndLog -> FilePath -> RunningNode -> [TxId] -> IO ()
11771206
canCommit tracer workDir node hydraScriptsTxId =

hydra-cluster/test/Test/EndToEndSpec.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ import Hydra.Cluster.Scenarios (
7575
singlePartyHeadFullLifeCycle,
7676
singlePartyUsesScriptOnL2,
7777
singlePartyUsesWithdrawZeroTrick,
78+
startWithWrongPeers,
7879
threeNodesNoErrorsOnOpen,
7980
threeNodesWithMirrorParty,
8081
)
@@ -566,6 +567,13 @@ spec = around (showLogsOnFailure "EndToEndSpec") $ do
566567
publishHydraScriptsAs node Faucet
567568
>>= initWithWrongKeys tmpDir tracer node
568569

570+
it "cluster id mismatch provides useful info in the logs" $ \tracer ->
571+
failAfter 60 $
572+
withClusterTempDir $ \tmpDir -> do
573+
withCardanoNodeDevnet (contramap FromCardanoNode tracer) tmpDir $ \node -> do
574+
publishHydraScriptsAs node Faucet
575+
>>= startWithWrongPeers tmpDir tracer node
576+
569577
it "bob cannot abort alice's head" $ \tracer -> do
570578
failAfter 60 $
571579
withClusterTempDir $ \tmpDir -> do

hydra-node/golden/ServerOutput/NetworkClusterIDMismatch.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
{
22
"samples": [
33
{
4-
"localClusterID": "",
5-
"remotePeerClusterID": "",
4+
"clusterPeers": "",
5+
"misconfiguredPeers": "`",
6+
"reportingHost": "",
67
"tag": "NetworkClusterIDMismatch"
78
}
89
],

hydra-node/golden/StateChanged/NetworkClusterIDMismatch.json

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
{
22
"samples": [
33
{
4-
"configuredParties": [
5-
{
6-
"vkey": "d05f801e08648c87a1182768655803d4261ae2545573cd3a58b837f1541691de"
7-
}
8-
],
9-
"localClusterID": "",
10-
"remotePeerClusterID": "",
4+
"clusterPeers": "",
5+
"misconfiguredPeers": "$",
6+
"reportingHost": "",
117
"tag": "NetworkClusterIDMismatch"
128
}
139
],

hydra-node/json-schemas/api.yaml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,14 +1004,12 @@ components:
10041004
tag:
10051005
type: string
10061006
enum: ["NetworkClusterIDMismatch"]
1007-
localClusterID:
1007+
clusterPeers:
10081008
type: string
1009-
remotePeerClusterID:
1009+
reportingHost:
1010+
type: string
1011+
misconfiguredPeers:
10101012
type: string
1011-
configuredParties:
1012-
type: array
1013-
items:
1014-
$ref: "api.yaml#/components/schemas/Party"
10151013
seq:
10161014
$ref: "api.yaml#/components/schemas/SequenceNumber"
10171015
timestamp:

hydra-node/src/Hydra/API/ServerOutput.hs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,9 @@ data ServerOutput tx
143143
, theirVersion :: Maybe ProtocolVersion
144144
}
145145
| NetworkClusterIDMismatch
146-
{ localClusterID :: Text
147-
, remotePeerClusterID :: Text
146+
{ clusterPeers :: Text
147+
, reportingHost :: Text
148+
, misconfiguredPeers :: Text
148149
}
149150
| PeerConnected {peer :: Host}
150151
| PeerDisconnected {peer :: Host}

hydra-node/src/Hydra/HeadLogic.hs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,16 @@ import Hydra.Tx.Party (Party (vkey))
104104
import Hydra.Tx.Snapshot (ConfirmedSnapshot (..), Snapshot (..), SnapshotNumber, SnapshotVersion, getSnapshot)
105105

106106

107-
onConnectionEvent :: [Party] -> Network.Connectivity -> Outcome tx
108-
onConnectionEvent configuredParties = \case
107+
onConnectionEvent :: Text -> Network.Connectivity -> Outcome tx
108+
onConnectionEvent misconfiguredPeers = \case
109109
Network.NetworkConnected ->
110110
newState NetworkConnected
111111
Network.NetworkDisconnected ->
112112
newState NetworkDisconnected
113113
Network.VersionMismatch{ourVersion, theirVersion} ->
114114
newState NetworkVersionMismatch{ourVersion, theirVersion}
115-
Network.ClusterIDMismatch{localClusterID, remotePeerClusterID} ->
116-
newState NetworkClusterIDMismatch{localClusterID, remotePeerClusterID, configuredParties}
115+
Network.ClusterIDMismatch{clusterPeers, reportingHost} ->
116+
newState NetworkClusterIDMismatch{clusterPeers, reportingHost, misconfiguredPeers}
117117
Network.PeerConnected{peer} ->
118118
newState PeerConnected{peer}
119119
Network.PeerDisconnected{peer} ->
@@ -1316,7 +1316,7 @@ update ::
13161316
Outcome tx
13171317
update env ledger st ev = case (st, ev) of
13181318
(_, NetworkInput _ (ConnectivityEvent conn)) ->
1319-
onConnectionEvent env.otherParties conn
1319+
onConnectionEvent env.configuredPeers conn
13201320
(Idle _, ClientInput Init) ->
13211321
onIdleClientInit env
13221322
(Idle _, ChainInput Observation{observedTx = OnInitTx{headId, headSeed, headParameters, participants}, newChainState}) ->

hydra-node/src/Hydra/HeadLogic/Outcome.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ data StateChanged tx
6161
, theirVersion :: Maybe ProtocolVersion
6262
}
6363
| NetworkClusterIDMismatch
64-
{ localClusterID :: Text
65-
, remotePeerClusterID :: Text
66-
, configuredParties :: [Party]
64+
{ clusterPeers :: Text
65+
, reportingHost :: Text
66+
, misconfiguredPeers :: Text
6767
}
6868
| HeadInitialized
6969
{ parameters :: HeadParameters

hydra-node/src/Hydra/Network.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@ data Connectivity
186186
, theirVersion :: Maybe ProtocolVersion
187187
}
188188
| ClusterIDMismatch
189-
{ localClusterID :: Text
190-
, remotePeerClusterID :: Text
189+
{ clusterPeers :: Text
190+
, reportingHost :: Text
191191
}
192192
deriving stock (Generic, Eq, Show)
193193
deriving anyclass (ToJSON)

0 commit comments

Comments
 (0)