Skip to content

Commit 5029b4f

Browse files
authored
workaround to restart on member already bootstrapped (#2073)
<!-- Describe your change here --> Closes #1937 Add workaround for known etcd cluster join issue by setting the ETCD_INITIAL_CLUSTER_STATE environment variable to "existing" (default is "new"). Notes: - This workaround is effective as long as the peer maintains its network configuration; which determines its etcd member ID and keeps it stable upon restarts, even on newly created persistence. - When set to "existing", the etcd member attempts to join an already bootstrapped cluster. - If the value is incorrect, etcd will attempt to start but fail safely. --- <!-- Consider each and tick it off one way or the other --> * [X] CHANGELOG updated or not needed * [X] Documentation updated or not needed * [X] Haddocks updated or not needed * [X] No new TODOs introduced or explained herafter --------- Signed-off-by: Sasha Bogicevic <[email protected]>
1 parent b21e5ab commit 5029b4f

File tree

3 files changed

+51
-2
lines changed

3 files changed

+51
-2
lines changed

docs/docs/known-issues.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Known errors are:
3333
- `configuredPeers` - peers info coming from `hydra-node` arguments.
3434

3535
- `member ... has already been bootstrapped` - missing information in `<persistence-dir>/etcd`
36-
- need to bootstrap new cluster or manual workarounds, see also https://etcd.io/docs/v3.5/op-guide/failures/
36+
- restart your hydra-node with the `ETCD_INITIAL_CLUSTER_STATE` environment variable set to `existing` (`new` is the default), see also https://etcd.io/docs/v3.3/op-guide/configuration/
3737

3838
We should be able to work around these UX issues using [etcd discovery](https://etcd.io/docs/v3.5/op-guide/clustering/#etcd-discovery) eventually.
3939

hydra-cluster/src/Hydra/Cluster/Scenarios.hs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ import Hydra.API.HTTPServer (
3939
DraftCommitTxResponse (..),
4040
TransactionSubmitted (..),
4141
)
42-
import Hydra.API.ServerOutput (HeadStatus (Idle))
42+
import Hydra.API.ServerOutput (HeadStatus (..))
4343
import Hydra.Cardano.Api (
4444
Coin (..),
4545
Era,
@@ -149,6 +149,7 @@ import Network.HTTP.Req (
149149
)
150150
import Network.HTTP.Simple (getResponseBody, httpJSON, setRequestBodyJSON)
151151
import Network.HTTP.Types (urlEncode)
152+
import System.Environment (setEnv, unsetEnv)
152153
import System.FilePath ((</>))
153154
import System.Process (callProcess)
154155
import Test.Hydra.Tx.Fixture (testNetworkId)
@@ -1743,6 +1744,46 @@ canSideLoadSnapshot tracer workDir backend hydraScriptsTxId = do
17431744
where
17441745
hydraTracer = contramap FromHydraNode tracer
17451746

1747+
canResumeOnMemberAlreadyBootstrapped :: ChainBackend backend => Tracer IO EndToEndLog -> FilePath -> backend -> [TxId] -> IO ()
1748+
canResumeOnMemberAlreadyBootstrapped tracer workDir backend hydraScriptsTxId = do
1749+
let clients = [Alice, Bob]
1750+
[(aliceCardanoVk, _aliceCardanoSk), (bobCardanoVk, _)] <- forM clients keysFor
1751+
seedFromFaucet_ backend aliceCardanoVk 100_000_000 (contramap FromFaucet tracer)
1752+
seedFromFaucet_ backend bobCardanoVk 100_000_000 (contramap FromFaucet tracer)
1753+
1754+
networkId <- Backend.queryNetworkId backend
1755+
let contestationPeriod = 1
1756+
aliceChainConfig <-
1757+
chainConfigFor Alice workDir backend hydraScriptsTxId [Bob] contestationPeriod
1758+
<&> setNetworkId networkId
1759+
bobChainConfig <-
1760+
chainConfigFor Bob workDir backend hydraScriptsTxId [Alice] contestationPeriod
1761+
<&> setNetworkId networkId
1762+
1763+
withHydraNode hydraTracer aliceChainConfig workDir 1 aliceSk [bobVk] [1, 2] $ \n1 -> do
1764+
waitMatch 20 n1 $ \v -> do
1765+
guard $ v ^? key "tag" == Just "Greetings"
1766+
guard $ v ^? key "headStatus" == Just (toJSON Idle)
1767+
withHydraNode hydraTracer bobChainConfig workDir 2 bobSk [aliceVk] [1, 2] $ \n2 -> do
1768+
waitMatch 20 n2 $ \v -> do
1769+
guard $ v ^? key "tag" == Just "Greetings"
1770+
guard $ v ^? key "headStatus" == Just (toJSON Idle)
1771+
1772+
threadDelay 5
1773+
1774+
callProcess "rm" ["-rf", workDir </> "state-2"]
1775+
1776+
withHydraNode hydraTracer bobChainConfig workDir 2 bobSk [aliceVk] [1, 2] (const $ pure ())
1777+
`shouldThrow` \(e :: SomeException) ->
1778+
"hydra-node" `isInfixOf` show e
1779+
&& "etcd" `isInfixOf` show e
1780+
1781+
setEnv "ETCD_INITIAL_CLUSTER_STATE" "existing"
1782+
withHydraNode hydraTracer bobChainConfig workDir 2 bobSk [aliceVk] [1, 2] (const $ pure ())
1783+
unsetEnv "ETCD_INITIAL_CLUSTER_STATE"
1784+
where
1785+
hydraTracer = contramap FromHydraNode tracer
1786+
17461787
-- | Three hydra nodes open a head and we assert that none of them sees errors if a party is duplicated.
17471788
threeNodesWithMirrorParty :: ChainBackend backend => Tracer IO EndToEndLog -> FilePath -> backend -> [TxId] -> IO ()
17481789
threeNodesWithMirrorParty tracer workDir backend hydraScriptsTxId = do

hydra-cluster/test/Test/EndToEndSpec.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import CardanoClient (
1212
)
1313
import CardanoNode (
1414
withBackend,
15+
withCardanoNodeDevnet,
1516
)
1617
import Control.Lens ((^..), (^?))
1718
import Control.Monad (foldM_)
@@ -52,6 +53,7 @@ import Hydra.Cluster.Scenarios (
5253
canCommit,
5354
canDecommit,
5455
canRecoverDeposit,
56+
canResumeOnMemberAlreadyBootstrapped,
5557
canSeePendingDeposits,
5658
canSideLoadSnapshot,
5759
canSubmitTransactionThroughAPI,
@@ -656,6 +658,12 @@ spec = around (showLogsOnFailure "EndToEndSpec") $ do
656658
publishHydraScriptsAs backend Faucet
657659
>>= canSideLoadSnapshot tracer tmpDir backend
658660

661+
it "can resume when member has already been bootstrapped" $ \tracer -> do
662+
withClusterTempDir $ \tmpDir -> do
663+
withCardanoNodeDevnet (contramap FromCardanoNode tracer) tmpDir $ \_ backend ->
664+
publishHydraScriptsAs backend Faucet
665+
>>= canResumeOnMemberAlreadyBootstrapped tracer tmpDir backend
666+
659667
describe "two hydra heads scenario" $ do
660668
it "two heads on the same network do not conflict" $ \tracer ->
661669
failAfter 60 $

0 commit comments

Comments
 (0)