diff --git a/execution_chain/config.nim b/execution_chain/config.nim index ee2ec7007f..9b6970fe50 100644 --- a/execution_chain/config.nim +++ b/execution_chain/config.nim @@ -91,6 +91,7 @@ type noCommand `import` `import-rlp` + `capture-log` RpcFlag* {.pure.} = enum ## RPC flags @@ -345,14 +346,6 @@ type defaultValue: 4'u64 name: "debug-persist-batch-size" .}: uint64 - beaconSyncTargetFile* {. - hidden - desc: "Load a file containg an rlp-encoded object \"(Header,Hash32)\" " & - "to be used " & - "as the first target before any other request from the CL " & - "is accepted" - name: "debug-beacon-sync-target-file" .}: Option[InputFile] - rocksdbMaxOpenFiles {. hidden defaultValue: defaultMaxOpenFiles @@ -510,6 +503,37 @@ type defaultValueDesc: "\"jwt.hex\" in the data directory (see --data-dir)" name: "jwt-secret" .}: Option[InputFile] + beaconSyncTraceFile* {. + separator: "\pBEACON SYNC OPTIONS:" + desc: "Enable tracer and write capture data to the argument file" + name: "beacon-sync-trace-file" .}: Option[OutFile] + + beaconSyncTraceSessions* {. + defaultValue: 1 + desc: "Run a trace for this many sessions " & + "(i.e. from activation to suspension)" + name: "beacon-sync-trace-sessions" .}: int + + beaconSyncReplayFile* {. + desc: "Read from trace capture file for full replay" + name: "beacon-sync-replay-file" .}: Option[InputFile] + + beaconSyncReplayNoisyFrom* {. + desc: "Extra replay logging starting with argument record number" + name: "beacon-sync-replay-noisy-from" .}: Option[uint] + + beaconSyncReplayFakeImport* {. + desc: "Suppress block import (for test runs)" + defaultValue: false + name: "beacon-sync-replay-fake-import" .}: bool + + beaconSyncTargetFile* {. + hidden + desc: "Load a file containg an rlp-encoded object " & + "\"(Header,Hash32)\" to be used as the first target before " & + "any other request from the CL is accepted" + name: "debug-beacon-sync-target-file" .}: Option[InputFile] + of `import`: maxBlocks* {. desc: "Maximum number of blocks to import" @@ -567,6 +591,12 @@ type desc: "One or more RLP encoded block(s) files" name: "blocks-file" }: seq[InputFile] + of `capture-log`: + beaconSyncCaptureFile* {. + argument + desc: "Read from capture file for log output" + name: "beacon-sync-capture-file" .}: Option[InputFile] + func parseHexOrDec256(p: string): UInt256 {.raises: [ValueError].} = if startsWith(p, "0x"): parse(p, UInt256, 16) diff --git a/execution_chain/core/chain/header_chain_cache.nim b/execution_chain/core/chain/header_chain_cache.nim index 1a2334914c..7978c84177 100644 --- a/execution_chain/core/chain/header_chain_cache.nim +++ b/execution_chain/core/chain/header_chain_cache.nim @@ -137,6 +137,12 @@ const # Private debugging and print functions # ------------------------------------------------------------------------------ +import pkg/stew/byteutils + +type ClMesg {.used.} = object + head: Header + fin: Hash32 + func bnStr(w: BlockNumber): string = "#" & $w @@ -238,7 +244,9 @@ proc persistInfo(hc: HeaderChainRef) = proc persistClear(hc: HeaderChainRef) = ## Clear persistent database - let w = hc.kvt.getInfo.valueOr: return + let w = hc.kvt.getInfo.valueOr: + trace "HeaderChain.persistClear", nHeaders="n/a", hc=hc.toStr + return for bn in w.least .. w.last: hc.kvt.delHeader(bn) # Occasionally flush the current data @@ -246,6 +254,8 @@ proc persistClear(hc: HeaderChainRef) = hc.kvt.persist() hc.kvt.delInfo() hc.kvt.persist() + trace "HeaderChain.persistClear", least=w.least.bnStr, last=w.last.bnStr, + nHeaders=(w.last - w.least + 1), hc=hc.toStr # ------------------------------------------------------------------------------ # Private functions @@ -283,6 +293,7 @@ proc tryFcParent(hc: HeaderChainRef; hdr: Header): HeaderChainMode = if baseNum + 1 < hdr.number: return collecting # inconclusive + trace "HeaderChain.tryFcParent: orhpaned", base=baseNum.bnStr, hc=hc.toStr return orphan # maybe on the wrong branch # ------------------------------------------------------------------------------ @@ -325,8 +336,15 @@ proc headUpdateFromCL(hc: HeaderChainRef; h: Header; f: Hash32) = metrics.set(nec_sync_dangling, h.number.int64) # Inform client app about that a new session has started. - hc.notify() hc.chain.pendingFCU = f + hc.notify() + + when false: + # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv + trace "fcUpdateFromCL: Replay target", base=hc.chain.baseNumber.bnStr, + latest=hc.chain.latestNumber.bnStr, trg=h.bnStr, fin=f.short, + scrum=encodePayload(ClMesg(head: h, fin: f)).toHex + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # For logging and metrics hc.session.consHeadNum = h.number @@ -552,6 +570,9 @@ proc put*( hc.session.ante = rev[revTopInx] metrics.set(nec_sync_dangling, hc.session.ante.number.int64) + trace "HeaderChain.put: saved headers", offset, revTopInx, + base=hc.chain.baseNumber.bnStr, hc=hc.toStr + # Save updates. persist to DB hc.persistInfo() diff --git a/execution_chain/nimbus_execution_client.nim b/execution_chain/nimbus_execution_client.nim index 48b21427b5..3b391bb529 100644 --- a/execution_chain/nimbus_execution_client.nim +++ b/execution_chain/nimbus_execution_client.nim @@ -11,7 +11,7 @@ import ../execution_chain/compile_info import - std/[os, osproc, strutils, net, options], + std/[os, osproc, net, options, streams], chronicles, eth/net/nat, metrics, @@ -28,6 +28,7 @@ import ./db/core_db/persistent, ./db/storage_types, ./sync/wire_protocol, + ./sync/beacon/replay, ./common/chain_config_hash, ./portal/portal @@ -120,6 +121,24 @@ proc setupP2P(nimbus: NimbusNode, conf: NimbusConf, nimbus.beaconSyncRef = BeaconSyncRef.init( nimbus.ethNode, nimbus.fc, conf.maxPeers) + # Optional tracer + if conf.beaconSyncTraceFile.isSome(): + nimbus.beaconSyncRef.tracerInit( + conf.beaconSyncTraceFile.unsafeGet.string, conf.beaconSyncTraceSessions) + + # Optional replay + if conf.beaconSyncReplayFile.isSome(): + if conf.beaconSyncTraceFile.isSome(): + fatal "Cannot have both "& + "--beacon-sync-trace-file and --beacon-sync-replay-file" + if conf.beaconSyncTargetFile.isSome(): + fatal "Cannot have both "& + "--beacon-sync-target-file and --beacon-sync-replay-file" + nimbus.beaconSyncRef.replayInit( + conf.beaconSyncReplayFile.unsafeGet.string, + conf.beaconSyncReplayNoisyFrom.get(high uint), + conf.beaconSyncReplayFakeImport) + # Optional for pre-setting the sync target (i.e. debugging) if conf.beaconSyncTargetFile.isSome(): nimbus.beaconSyncRef.targetInit conf.beaconSyncTargetFile.unsafeGet.string @@ -198,6 +217,18 @@ proc run(nimbus: NimbusNode, conf: NimbusConf) = version = FullVersionStr, conf + case conf.cmd + of NimbusCmd.`capture-log`: + if conf.beaconSyncCaptureFile.isNone(): + fatal "Capture file is required, use --beacon-sync-capture-file" + quit(QuitFailure) + let st = conf.beaconSyncCaptureFile.unsafeGet.string.newFileStream fmRead + ReplayReaderRef.init(st).captureLog(proc(): bool = + nimbus.state == NimbusState.Stopping) + return + else: + discard + # Trusted setup is needed for processing Cancun+ blocks # If user not specify the trusted setup, baked in # trusted setup will be loaded, lazily. @@ -276,7 +307,8 @@ proc run(nimbus: NimbusNode, conf: NimbusConf) = setupP2P(nimbus, conf, com) setupRpc(nimbus, conf, com) - if conf.maxPeers > 0 and conf.engineApiServerEnabled(): + if (conf.maxPeers > 0 and conf.engineApiServerEnabled()) or + conf.beaconSyncReplayFile.isSome(): # Not starting syncer if there is definitely no way to run it. This # avoids polling (i.e. waiting for instructions) and some logging. if not nimbus.beaconSyncRef.start(): diff --git a/execution_chain/sync/beacon.nim b/execution_chain/sync/beacon.nim index 498b081f6d..0fac31aa1c 100644 --- a/execution_chain/sync/beacon.nim +++ b/execution_chain/sync/beacon.nim @@ -15,43 +15,76 @@ import pkg/stew/[interval_set, sorted_set], ../core/chain, ../networking/p2p, - ./beacon/[worker, worker_desc], + ./beacon/worker/blocks/blocks_fetch, + ./beacon/worker/blocks/blocks_import, + ./beacon/worker/headers/headers_fetch, + ./beacon/worker/update, + ./beacon/[trace, replay, worker, worker_desc], ./[sync_desc, sync_sched, wire_protocol] - logScope: topics = "beacon sync" type BeaconSyncRef* = RunnerSyncRef[BeaconCtxData,BeaconBuddyData] +# ------------------------------------------------------------------------------ +# Interceptable handlers +# ------------------------------------------------------------------------------ + +proc schedDaemonCB( + ctx: BeaconCtxRef; + ): Future[Duration] + {.async: (raises: []).} = + return worker.runDaemon(ctx, "RunDaemon") # async/template + +proc schedStartCB(buddy: BeaconBuddyRef): bool = + return worker.start(buddy, "RunStart") + +proc schedStopCB(buddy: BeaconBuddyRef) = + worker.stop(buddy, "RunStop") + +proc schedPoolCB(buddy: BeaconBuddyRef; last: bool; laps: int): bool = + return worker.runPool(buddy, last, laps, "RunPool") + +proc schedPeerCB( + buddy: BeaconBuddyRef; + ): Future[Duration] + {.async: (raises: []).} = + return worker.runPeer(buddy, "RunPeer") # async/template + +proc noOpBuddy(buddy: BeaconBuddyRef) = discard + +proc noOpCtx(ctx: BeaconCtxRef; maybePeer: Opt[BeaconBuddyRef]) = discard + # ------------------------------------------------------------------------------ # Virtual methods/interface, `mixin` functions # ------------------------------------------------------------------------------ proc runSetup(ctx: BeaconCtxRef): bool = - worker.setup(ctx, "RunSetup") + return worker.setup(ctx, "RunSetup") proc runRelease(ctx: BeaconCtxRef) = worker.release(ctx, "RunRelease") -proc runDaemon(ctx: BeaconCtxRef): Future[Duration] {.async: (raises: []).} = - return worker.runDaemon(ctx, "RunDaemon") - proc runTicker(ctx: BeaconCtxRef) = worker.runTicker(ctx, "RunTicker") + +proc runDaemon(ctx: BeaconCtxRef): Future[Duration] {.async: (raises: []).} = + return await ctx.handler.schedDaemon(ctx) + proc runStart(buddy: BeaconBuddyRef): bool = - worker.start(buddy, "RunStart") + return buddy.ctx.handler.schedStart(buddy) proc runStop(buddy: BeaconBuddyRef) = - worker.stop(buddy, "RunStop") + buddy.ctx.handler.schedStop(buddy) proc runPool(buddy: BeaconBuddyRef; last: bool; laps: int): bool = - worker.runPool(buddy, last, laps, "RunPool") + return buddy.ctx.handler.schedPool(buddy, last, laps) proc runPeer(buddy: BeaconBuddyRef): Future[Duration] {.async: (raises: []).} = - return worker.runPeer(buddy, "RunPeer") + return await buddy.ctx.handler.schedPeer(buddy) # ------------------------------------------------------------------------------ # Public functions @@ -66,17 +99,61 @@ proc init*( var desc = T() desc.initSync(ethNode, maxPeers) desc.ctx.pool.chain = chain + + # Set up handlers so they can be overlayed + desc.ctx.pool.handlers = BeaconHandlersRef( + version: 0, + activate: updateActivateCB, + suspend: updateSuspendCB, + schedDaemon: schedDaemonCB, + schedStart: schedStartCB, + schedStop: schedStopCB, + schedPool: schedPoolCB, + schedPeer: schedPeerCB, + getBlockHeaders: getBlockHeadersCB, + syncBlockHeaders: noOpBuddy, + getBlockBodies: getBlockBodiesCB, + syncBlockBodies: noOpBuddy, + importBlock: importBlockCB, + syncImportBlock: noOpCtx) + desc +proc tracerInit*(desc: BeaconSyncRef; outFile: string, nSessions: int) = + ## Set up tracer (not be called when replay is enabled) + if not desc.ctx.traceSetup(outFile, nSessions): + fatal "Cannot set up trace handlers -- STOP", fileName=outFile, nSessions + quit(QuitFailure) + +proc replayInit*( + desc: BeaconSyncRef; + inFile: string; + startNoisy = high(uint); + fakeImport = false; + ) = + ## Set up replay (not be called when trace is enabled) + if not desc.ctx.replaySetup(inFile, startNoisy): + fatal "Cannot set up replay handlers -- STOP", fileName=inFile + quit(QuitFailure) + proc targetInit*(desc: BeaconSyncRef; rlpFile: string) = ## Set up inital sprint (intended for debugging) + doAssert desc.ctx.handler.version == 0 desc.ctx.initalTargetFromFile(rlpFile, "targetInit").isOkOr: raiseAssert error proc start*(desc: BeaconSyncRef): bool = - desc.startSync() + if desc.startSync(): + desc.ctx.traceStart() + desc.ctx.replayStart() + return true + # false proc stop*(desc: BeaconSyncRef) {.async.} = + desc.ctx.traceStop() + desc.ctx.traceRelease() + desc.ctx.replayStop() + desc.ctx.replayRelease() await desc.stopSync() # ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay.nim b/execution_chain/sync/beacon/replay.nim new file mode 100644 index 0000000000..4e144ffb0a --- /dev/null +++ b/execution_chain/sync/beacon/replay.nim @@ -0,0 +1,26 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + ./replay/replay_reader/reader_init, + ./replay/[replay_desc, replay_reader, replay_setup, replay_start_stop] + +export + ReplayReaderRef, + captureLog, + init, + replay_setup, + replay_start_stop + +# End diff --git a/execution_chain/sync/beacon/replay/README.md b/execution_chain/sync/beacon/replay/README.md new file mode 100644 index 0000000000..df985936ae --- /dev/null +++ b/execution_chain/sync/beacon/replay/README.md @@ -0,0 +1,46 @@ +Inspection of Capture Data And Replay +===================================== + +Inspection +---------- + +Given a (probably gzipped) capture file **(capture)** as a result of +tracing, its content can be visualised via + + nimbus_execution_client \ + capture-log \ + --beacon-sync-capture-file=(capture) + +Replay +------ + +Copy and secure the current database directory **(database)** as **(dbcopy)**, +say. Then start a capture run on the original data base as + + nimbus_execution_client \ + --datadir=(database) \ + --beacon-sync-trace-file=(capture) \ + ... + +where **(capture)** will contain all the data for the replay. This file can +bebome quite big (e.g. 30GiB for the last 120k blocks synchronised on +*mainnet*) but can be gzipped after the capture run was stopped. + +Monitor the capture run so it can be stopped at an an appropriate state using +metrics or logs. With the above command line argumants, only the next sync +session is logged ranging from the activation message (when *Activating syncer* +is logged) up intil the suspend message (when *Suspending syncer* is logged.) + +Now, the captured run can be replayed on the secured database copy +**(dbcopy)** with the (probably gzipped) **(capture)** file via + + nimbus_execution_client \ + --datadir=(dbcopy) \ + --beacon-sync-replay-file=(capture) \ + ... + +where ihe additional arguments **...** of either command above need not be +the same. + +Note that you need another copy of **(database)** if you need to re-exec the +latter command line statement. diff --git a/execution_chain/sync/beacon/replay/nim.cfg b/execution_chain/sync/beacon/replay/nim.cfg new file mode 100644 index 0000000000..d8027ca89c --- /dev/null +++ b/execution_chain/sync/beacon/replay/nim.cfg @@ -0,0 +1,4 @@ +-d:"chronicles_sinks=textlines[stderr]" +-d:"chronicles_runtime_filtering=on" +-d:"chronicles_disable_thread_id" +-d:"chronicles_line_numbers:1" diff --git a/execution_chain/sync/beacon/replay/replay_desc.nim b/execution_chain/sync/beacon/replay/replay_desc.nim new file mode 100644 index 0000000000..a8a1e0c7c7 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_desc.nim @@ -0,0 +1,126 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/streams, + pkg/chronos, + ../trace/trace_desc, + ./replay_reader/reader_desc, + ./replay_runner/runner_desc + +export + reader_desc, + runner_desc, + trace_desc + +const + ReplayBaseHandlersID* = 2 + ReplayOverlayHandlersID* = 20 + + replayWaitForCompletion* = chronos.nanoseconds(100) + ## Wait for other pseudo/async thread to have completed something + + replayWaitMuted* = chronos.milliseconds(200) + ## Some handlers are muted, but keep them in a waiting loop so + ## the system can terminate + +type + ReplayBaseHandlersRef* = ref object of BeaconHandlersRef + ## Extension for caching state so that the replay start can be + ## synchronised with, e.g. after the syncer has started + strm*: Stream ## Input stream + startNoisy*: uint ## Cycle threshold for noisy logging + fakeImport*: bool ## No database import if `true` + + ReplayRef* = ref object of BeaconHandlersRef + reader*: ReplayReaderRef ## Input records + backup*: BeaconHandlersRef ## Can restore previous handlers + runner*: ReplayRunnerRef ## Replay descriptor + + + ReplayPayloadRef* = ref object of RootRef + ## Decoded payload base record + recType*: TraceRecType + + ReplayVersionInfo* = ref object of ReplayPayloadRef + data*: TraceVersionInfo + + # ------------- + + ReplaySyncActvFailed* = ref object of ReplayPayloadRef + data*: TraceSyncActvFailed + + ReplaySyncActivated* = ref object of ReplayPayloadRef + data*: TraceSyncActivated + + ReplaySyncHibernated* = ref object of ReplayPayloadRef + data*: TraceSyncHibernated + + # ------------- + + ReplaySchedDaemonBegin* = ref object of ReplayPayloadRef + data*: TraceSchedDaemonBegin + + ReplaySchedDaemonEnd* = ref object of ReplayPayloadRef + data*: TraceSchedDaemonEnd + + ReplaySchedStart* = ref object of ReplayPayloadRef + data*: TraceSchedStart + + ReplaySchedStop* = ref object of ReplayPayloadRef + data*: TraceSchedStop + + ReplaySchedPool* = ref object of ReplayPayloadRef + data*: TraceSchedPool + + ReplaySchedPeerBegin* = ref object of ReplayPayloadRef + data*: TraceSchedPeerBegin + + ReplaySchedPeerEnd* = ref object of ReplayPayloadRef + data*: TraceSchedPeerEnd + + # ------------- + + ReplayFetchHeaders* = ref object of ReplayPayloadRef + data*: TraceFetchHeaders + + ReplaySyncHeaders* = ref object of ReplayPayloadRef + data*: TraceSyncHeaders + + + ReplayFetchBodies* = ref object of ReplayPayloadRef + data*: TraceFetchBodies + + ReplaySyncBodies* = ref object of ReplayPayloadRef + data*: TraceSyncBodies + + + ReplayImportBlock* = ref object of ReplayPayloadRef + data*: TraceImportBlock + + ReplaySyncBlock* = ref object of ReplayPayloadRef + data*: TraceSyncBlock + +# ------------------------------------------------------------------------------ +# Public helpers +# ------------------------------------------------------------------------------ + +func replay*(ctx: BeaconCtxRef): ReplayRef = + ## Getter, get replay descriptor (if any) + if ctx.handler.version == 20: + return ctx.handler.ReplayRef + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_helpers.nim b/execution_chain/sync/beacon/replay/replay_helpers.nim new file mode 100644 index 0000000000..cbba926213 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_helpers.nim @@ -0,0 +1,73 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay helpers + +{.push raises:[].} + +import + std/strutils, + pkg/[chronos, eth/common], + ../trace/trace_start_stop/handlers/helpers as trace_helpers, + ../worker/helpers as worker_helpers + +export + trace_helpers.idStr, + trace_helpers.short, + worker_helpers + +# ------------------------------------------------------------------------------ +# Public helpers +# ------------------------------------------------------------------------------ + +func ageStr*(w: chronos.Duration): string = + var + res = newStringOfCap(32) + nsLeft = w.nanoseconds() + + # Inspired by `chronos/timer.toString()` + template f( + pfxChr: static[char]; + pfxLen: static[int]; + ela: static[chronos.Duration]; + sep: static[string]; + ) = + let n = uint64(nsLeft div ela.nanoseconds()) + when pfxLen == 0: + let s = if 0 < n: $n else: "" + else: + let s = $n + when 0 < pfxLen: + res.add pfxChr.repeat(max(0, pfxLen - s.len)) + res.add s + when pfxLen == 0: + if 0 < n: res.add sep + else: + res.add sep + nsLeft = nsLeft mod ela.nanoseconds() + + f(' ', 0, chronos.Day, " ") + f('0', 2, chronos.Hour, ":") + f('0', 2, chronos.Minute, ":") + f('0', 2, chronos.Second, ".") + f('0', 3, chronos.Millisecond, ".") + f('0', 3, chronos.Microsecond, "") + + res + +func toUpperFirst*(w: string): string = + if 1 < w.len: + $w[0].toUpperAscii & w.substr(1) + else: + w + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_reader.nim b/execution_chain/sync/beacon/replay/replay_reader.nim new file mode 100644 index 0000000000..4b5d375b9f --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_reader.nim @@ -0,0 +1,72 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/[net, syncio], + ./replay_reader/[reader_unpack, reader_reclog], + ./replay_desc + +type + StopFn* = proc(): bool {.gcsafe, raises: [].} + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc nextRecord*(rp: ReplayReaderRef): ReplayPayloadRef = + ## Retrieve the next record from the capture + while true: + var line = rp.readLine(rp).valueOr: + return ReplayPayloadRef(nil) + if 0 < line.len and line[0] != '#': + return line.unpack() + +proc captureLog*( + rp: ReplayReaderRef; + prt: ReplayRecLogPrintFn; + stop: StopFn; + ) = + ## Cycle through capture records from `rp` and feed them to the + ## argument `prt()`. + var n = 0 + while not stop(): + let w = rp.nextRecord() + if w.isNil and rp.atEnd(rp): + break + n.inc + prt w.recLogToStrList(n) + prt n.recLogToStrEnd() + +proc captureLog*( + rp: ReplayReaderRef; + stop: StopFn; + ) = + ## Pretty print linewise records from the capture `rp`. + rp.captureLog(stdout.recLogPrint(), stop) + +# ------------------------------------------------------------------------------ +# Public iterators +# ------------------------------------------------------------------------------ + +iterator records*(rp: ReplayReaderRef): ReplayPayloadRef = + ## Iterate over all capture records + while true: + let record = rp.nextRecord() + if record.isNil and rp.atEnd(rp): + break + yield record + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_reader/reader_desc.nim b/execution_chain/sync/beacon/replay/replay_reader/reader_desc.nim new file mode 100644 index 0000000000..3c48011fee --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_reader/reader_desc.nim @@ -0,0 +1,41 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/streams, + pkg/results, + ./reader_gunzip + +type + ReplayRecLogPrintFn* = proc(s: seq[string]) {.gcsafe, raises: [].} + ## Print output (e.g. used in `lineLog()`) for logger + + ReplayReadLineFn* = + proc(rp: ReplayReaderRef): Opt[string] {.gcsafe, raises: [].} + ## Reader filter, e.g. for zipped data + + ReplayAtEndFn* = + proc(rp: ReplayReaderRef): bool {.gcsafe, raises: [].} + ## Indicated end of stream + + ReplayReaderRef* = ref object + ## Reader descriptor + inStream*: Stream ## Dump file for ethxx data packets + gzFilter*: GUnzipRef ## Apply GUnzip filter to stream + readLine*: ReplayReadLineFn ## Reader function + atEnd*: ReplayAtEndFn ## EOF indicator + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_reader/reader_gunzip.nim b/execution_chain/sync/beacon/replay/replay_reader/reader_gunzip.nim new file mode 100644 index 0000000000..d1c4cd6dad --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_reader/reader_gunzip.nim @@ -0,0 +1,352 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed except +# according to those terms. + +## Incremental unzip based on `Stream` input (derived from +## `test/replay/unzip.nim`.) + +{.push raises:[].} + +import + std/[os, streams, strutils], + pkg/[chronicles, results, zlib] + +logScope: + topics = "replay gunzip" + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + + ReadBufLen = 2048 + ## Size of data chunks to be read from stream. + +type + GUnzipStatus* = tuple + zError: ZError + info: string + + GUnzipRef* = ref object + mz: ZStream ## Gzip sub-system + nextInBuf: array[4096,char] ## Input buffer for gzip `mz.next_in` + nextOutBuf: array[2048,char] ## Output buffer for gzip `mz.next_out` + + inStream: Stream ## Input stream + inName: string ## Registered gzip file name (if any) + outDoneOK: bool ## Gzip/inflate stream end indicator + + lnCache: string ## Input line buffer, used by `nextLine` + lnError: GUnzipStatus ## Last error cache for line iterator + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Gunzip exception" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb & " -- STOP", error=($e.name), msg=e.msg + quit(quitCode) + +proc extractLine(gz: GUnzipRef; start: int): Opt[string] = + ## Extract the first string from line buffer. Any newline characters at + ## the line end will be stripped. The argument `start` is the position + ## where to start searching for the `\n` character. + ## + # Find `\n` in the buffer if there is any + if gz.lnCache.len <= start: + return err() + var nlPos = gz.lnCache.find(char('\n'), start) + if nlPos < 0: + return err() + + # Assemble return value + var line = gz.lnCache.toOpenArray(0,nlPos-1).substr() + line.stripLineEnd + + # Update line cache + gz.lnCache = if gz.lnCache.len <= nlPos + 1: "" + else: gz.lnCache.toOpenArray(nlPos+1, gz.lnCache.len-1).substr() + + # Done + ok(move line) + +# ------------------------------------------------------------------------------ +# Private inflate function +# ------------------------------------------------------------------------------ + +proc loadInput(gz: GUnzipRef; data: openArray[char]): string = + ## Fill input chache for `explode()` and return the overflow. + ## + # Gzip input buffer general layout + # :: + # | <---------------- nextInBuf.len -------------------------> | + # |--------------------+--------------------+------------------| + # | <--- total_in ---> | <--- avail_in ---> | <--- unused ---> | + # | | + # nextInBuf next_in + # + # to be initialised as + # :: + # | <---------------- nextInBuf.len -------------------------> | + # |--------------------------+---------------------------------| + # | <------ avail_in ------> | <----------- unused ----------> | + # | + # nextInBuf + # next_in + # + var buffer = newSeqUninit[char](gz.mz.avail_in.int + data.len) + + # Collect remaining data first + if 0 < gz.mz.avail_in: + (addr buffer[0]).copyMem(gz.mz.next_in, gz.mz.avail_in) + + # Append new data + (addr buffer[gz.mz.avail_in]).copyMem(addr data[0], data.len) + + # Realign gzip input buffer and fill as much as possible from `buffer[]` + gz.mz.next_in = cast[ptr uint8](addr gz.nextInBuf[0]) + gz.mz.total_in = 0 + + if gz.nextInBuf.len < buffer.len: + # The `buffer[]` does not fully fit into `nextInBuf[]`. + (addr gz.nextInBuf).copyMem(addr buffer[0], gz.nextInBuf.len) + gz.mz.avail_in = gz.nextInBuf.len.cuint + # Return overflow + return buffer.toOpenArray(gz.nextInBuf.len, buffer.len-1).substr() + + (addr gz.nextInBuf).copyMem(addr buffer[0], buffer.len) + gz.mz.avail_in = buffer.len.cuint + return "" + + +proc explodeImpl(gz: GUnzipRef; overflow: var string): Result[string,ZError] = + ## Implement `explode()` processing. + ## + if gz.outDoneOK: + return err(Z_STREAM_END) + + var + outData = "" + zRes = Z_STREAM_END + + while not gz.outDoneOK and 0 < gz.mz.avail_in: + gz.mz.next_out = cast[ptr uint8](addr gz.nextOutBuf[0]) + gz.mz.avail_out = gz.nextOutBuf.len.cuint + gz.mz.total_out = 0 + + # Save input state to compare with, below + let availIn = gz.mz.avail_in + + # Deflate current block `next_in[]` => `next_out[]` + zRes = gz.mz.inflate(Z_SYNC_FLUSH) + if zRes == Z_STREAM_END: + gz.outDoneOK = true + zRes = gz.mz.inflateEnd() + # Dont't stop here, `outData` needs to be assigned + if zRes != Z_OK: + break + + # Append processed data + if 0 < gz.mz.total_out: + outData &= gz.nextOutBuf.toOpenArray(0, gz.mz.total_out-1).substr() + + if gz.outDoneOK: + break + + if gz.mz.avail_in < availIn: + # Re-load overflow + if 0 < overflow.len: + overflow = gz.loadInput overflow.toOpenArray(0, overflow.len-1) + + elif gz.mz.avail_out == gz.nextOutBuf.len.cuint: + # Stop unless state change + zRes = Z_BUF_ERROR + break + + if zRes != Z_OK: + return err(zRes) + + ok(outData) + + +proc explode(gz: GUnzipRef; data: openArray[char]): Result[string,ZError] = + ## Inflate the `data[]` argument together with the rest from the previous + ## inflation action and returns the inflated value (and possibly the input + ## buffer overflow.) + ## + var overflow = gz.loadInput data + gz.explodeImpl(overflow) + +proc explode(gz: GUnzipRef): Result[string,ZError] = + ## Variant of `explode()` which clears the rest of the input buffer. + ## + var overflow = "" + gz.explodeImpl(overflow) + +# ------------------------------------------------------------------------------ +# Public +# ------------------------------------------------------------------------------ + +proc init*(T: type GUnzipRef; inStream: Stream): Result[T,GUnzipStatus] = + ## Set up gUnzip filter and prepare for deflating. + ## + const info = "GUnzipRef.init(): " + var chunk: array[ReadBufLen,char] + + # Read header buffer from stream + var chunkLen: int + info.onException(DontQuit): + chunkLen = inStream.readData(addr chunk, chunk.len) + + # Parse GZIP header (RFC 1952) + if chunkLen < 18: + return err((Z_STREAM_ERROR, "Stream too short")) + if (chunk[0].ord != 0x1f or # magic number + chunk[1].ord != 0x8b or # magic number + chunk[2].ord != 0x08) or # deflate + (chunk[3].ord and 0xf7) != 0: # unsupported flags + return err((Z_STREAM_ERROR, "Wrong magic or flags")) + + # Set start of payload + var + pylStart = 10 + inName = "" + if (chunk[3].ord and 8) == 8: # FNAME + var endPos = chunk.toOpenArray(pylStart, chunkLen-1).find char(0) + if endPos < 0: + return err((Z_STREAM_ERROR, "Advertised but missing file name")) + endPos += pylStart # need absolute position in `chunk[]` + inName = chunk.toOpenArray(pylStart, endPos-1).substr() + pylStart = endPos + 1 + + # Initialise descriptor + let gz = GUnzipRef( + inStream: inStream, + inName: inName) + + # Initialise `zlib` and return + let gRc = gz.mz.inflateInit2(Z_RAW_DEFLATE) + if gRc != Z_OK: + return err((gRc,"Zlib init error")) + + # Store unused buffer data for the first read + gz.mz.avail_in = (chunk.len - pylStart).cuint + (addr gz.nextInBuf).copyMem(addr chunk[pylStart], gz.mz.avail_in.int) + gz.mz.next_in = cast[ptr uint8](addr gz.nextInBuf[0]) + gz.mz.total_in = 0 # i.e. left aligned data + + ok(gz) + +proc name*(gz: GUnzipRef): string = + ## Getter: returns registered name (if any) + gz.inName + + +proc nextChunk*(gz: GUnzipRef): Result[string,GUnzipStatus] = + ## Fetch next unzipped data chunk, return and empty string if input + ## is exhausted. + ## + const info = "nextChunk(GUnzipRef): " + + if gz.outDoneOK: + return err((Z_STREAM_END,"")) + + var + chunk: array[ReadBufLen,char] + chunkLen = 0 + data = "" + + info.onException(DontQuit): + chunkLen = gz.inStream.readData(addr chunk, chunk.len) + + if 0 < chunkLen: + data = gz.explode(chunk.toOpenArray(0, chunkLen-1)).valueOr: + return err((error,"Decoding error")) + else: + var atEnd = false + info.onException(DontQuit): + atEnd = gz.inStream.atEnd() + if atEnd: + data = gz.explode().valueOr: + return err((error,"Decoding error")) + else: + return err((Z_STREAM_ERROR, "Stream too short")) + + return ok(move data) + + +proc nextLine*(gz: GUnzipRef): Result[string,GUnzipStatus] = + ## If the gzip stream is expected to contain text data only it can be + ## retrieved line wise. The line string returned has the EOL characters + ## stripped. + ## + ## If all lines are exhausted, the error code `Z_STREAM_END` is returned. + ## + # Check whether there is a full line in the buffer, already + gz.extractLine(0).isErrOr: + return ok(value) + + # Load next chunk(s) into line cache and (try to) extract a complete line. + while not gz.outDoneOK: + let chunk = gz.nextChunk().valueOr: + if gz.outDoneOK: + break + return err(error) + + # Append data chunk to line cache and (try to) extract a line. + let inLen = gz.lnCache.len + gz.lnCache &= chunk + gz.extractLine(inLen).isErrOr: + return ok(value) + # continue + + # Last line (may be partial) + if 0 < gz.lnCache.len: + var line = gz.lnCache + line.stripLineEnd + gz.lnCache = "" + return ok(move line) + + err((Z_STREAM_END,"")) + + +proc atEnd*(gz: GUnzipRef): bool = + ## Returns `true` if data are exhausted. + gz.outDoneOK and gz.lnCache.len == 0 + + +iterator line*(gz: GUnzipRef): string = + ## Iterate over `nextLine()` until the input stream is exhausted. + gz.lnError = (Z_OK, "") + while true: + var ln = gz.nextLine().valueOr: + gz.lnError = error + break + yield ln + +func lineStatus*(gz: GUnzipRef): GUnzipStatus = + ## Error (or no-error) status after the `line()` iterator has terminated. + gz.lnError + +func lineStatusOk*(gz: GUnzipRef): bool = + ## Returns `true` if the `line()` iterator has terminated without error. + gz.lnError[0] in {Z_OK, Z_STREAM_END} + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_reader/reader_init.nim b/execution_chain/sync/beacon/replay/replay_reader/reader_init.nim new file mode 100644 index 0000000000..c655172f96 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_reader/reader_init.nim @@ -0,0 +1,142 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/[endians, os, streams, strutils], + pkg/[chronicles, eth/common], + ../replay_desc, + ./reader_gunzip + +logScope: + topics = "replay reader" + +type + FileSignature = enum + Unknown = 0 + Plain + Gzip + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Replay stream exception" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb & " -- STOP", error=($e.name), msg=e.msg + quit(quitCode) + +proc getFileSignature(strm: Stream): (FileSignature,uint16) = + const info = "getSignature(): " + var u16: uint16 + info.onException(QuitFailure): + let v16 = strm.peekUint16() + (addr u16).bigEndian16(addr v16) + + # Gzip signature + if u16 == 0x1f8b'u16: + return (Gzip,u16) + + # Ascii signature: /[0-9A-Z] / + let (c0, c1) = (char(u16 shr 8), char(u16.uint8)) + if (c0.isDigit or c0.isUpperAscii or c0 == '#') and (c1 in {' ','\r','\n'}): + return (Plain,u16) + + (Unknown,u16) + +# ------------------------------------------------------------------------------ +# Private record reader functions +# ------------------------------------------------------------------------------ + +proc plainReadLine(rp: ReplayReaderRef): Opt[string] = + const info = "plainReadLine(ReplayRef): " + info.onException(DontQuit): + if not rp.inStream.atEnd(): + return ok(rp.inStream.readLine) + err() + +proc plainAtEnd(rp: ReplayReaderRef): bool = + const info = "plainAtEnd(ReplayRef): " + info.onException(DontQuit): + return rp.inStream.atEnd() + true + +proc gUnzipReadLine(rp: ReplayReaderRef): Opt[string] = + const info = "gzipReadLine(ReplayRef): " + var ln = rp.gzFilter.nextLine().valueOr: + if not rp.gzFilter.lineStatusOk(): + let err = rp.gzFilter.lineStatus() + info info & "GUnzip filter error", zError=err.zError, info=err.info + discard + return err() + ok(move ln) + +proc gUnzipAtEnd(rp: ReplayReaderRef): bool = + rp.gzFilter.atEnd() + +# ------------------------------------------------------------------------------ +# Public constructor(s) +# ------------------------------------------------------------------------------ + +proc init*(T: type ReplayReaderRef; strm: Stream): T = + const info = "ReplayRef.init(): " + + if strm.isNil: + fatal info & "Cannot use nil stream for reading -- STOP" + quit(QuitFailure) + + let + (sig, u16) = strm.getFileSignature() # Check file encoding + rp = T(inStream: strm) # Set up descriptor + + # Set up line reader, probably with gunzip/deflate filter + case sig: + of Plain: + rp.readLine = plainReadLine + rp.atEnd = plainAtEnd + of Gzip: + rp.gzFilter = GUnzipRef.init(strm).valueOr: + fatal info & "Cannot assign gunzip reader -- STOP" + quit(QuitFailure) + rp.readLine = gUnzipReadLine + rp.atEnd = gUnzipAtEnd + of Unknown: + fatal info & "Unsupported file encoding -- STOP", + fileSignature=("0x" & $u16.toHex(4)) + quit(QuitFailure) + + rp + + +proc destroy*(rp: ReplayReaderRef) = + const info = "destroy(ReplayRef): " + info.onException(DontQuit): + rp.inStream.flush() + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_reader/reader_reclog.nim b/execution_chain/sync/beacon/replay/replay_reader/reader_reclog.nim new file mode 100644 index 0000000000..f3efbf4566 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_reader/reader_reclog.nim @@ -0,0 +1,336 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/[net, strformat, strutils, syncio], + pkg/[chronicles, chronos, eth/common], + ../[replay_desc, replay_helpers] + +logScope: + topics = "replay reader" + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc addX( + q: var seq[string]; + info: static[string]; + lnr: int; + base: TraceRecBase; + ) = + ## Output header + q.add base.time.ageStr() + q.add info + if 0 < lnr and base.serial != lnr.uint: + q.add $base.serial & "!" & $lnr + else: + q.add $base.serial + + if 0 < base.frameID: + q.add base.frameID.idStr + else: + q.add "*" + q.add $base.nPeers + q.add ($base.syncState).toUpperFirst() + q.add ($base.chainMode).toUpperFirst() + + q.add base.baseNum.bnStr() + q.add base.latestNum.bnStr() + + if base.chainMode in {collecting,ready,orphan}: + q.add base.antecedent.bnStr() + else: + q.add "*" + + q.add (if (base.stateAvail and 1) != 0: $base.peerCtrl else: "*") + q.add (if (base.stateAvail and 2) != 0: "peerID=" & base.peerID.short() + else: "*") + + if 0 < base.hdrUnprChunks: + q.add "uHdr=" & $base.hdrUnprLen & "/" & + $base.hdrUnprChunks & "/" & + $base.hdrUnprLastLen & ":" & + $base.hdrUnprLast.bnStr + + if 0 < base.blkUnprChunks: + q.add "uBlk=" & $base.blkUnprLen & "/" & + $base.blkUnprChunks & "/" & + $base.blkUnprLeast.bnStr & ":" & + $base.blkUnprLeastLen + + if (base.stateAvail and 12) != 0 and + (0 < base.nHdrErrors or 0 < base.nBlkErrors): + q.add "nErr=(" & $base.nHdrErrors & "," & $base.nBlkErrors & ")" + + if (base.stateAvail and 16) != 0: + q.add "slowPeer=" & base.slowPeer.short() + +# ------------------------------------------------------------------------------ +# Private record handlers +# ------------------------------------------------------------------------------ + +func toStrOops(n: int): seq[string] = + @["?", $n] + +# ----------- + +func toStrSeq(n: int; w: TraceVersionInfo): seq[string] = + var res = newSeqOfCap[string](15) + res.addX("=Version", n, w) + res.add "version=" & $w.version + res.add "network=" & $w.networkId + res + +# ----------- + +func toStrSeq(n: int; w: TraceSyncActvFailed): seq[string] = + var res = newSeqOfCap[string](15) + res.addX("=ActvFailed", n, w) + res + +func toStrSeq(n: int; w: TraceSyncActivated): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=Activated", n, w) + res.add "head=" & w.head.bnStr + res.add "finHash=" & w.finHash.short + res + +func toStrSeq(n: int; w: TraceSyncHibernated): seq[string] = + var res = newSeqOfCap[string](15) + res.addX("=Suspended", n, w) + res + +# ----------- + +func toStrSeq(n: int; w: TraceSchedDaemonBegin): seq[string] = + var res = newSeqOfCap[string](15) + res.addX("+Daemon", n, w) + res + +func toStrSeq(n: int; w: TraceSchedDaemonEnd): seq[string] = + var res = newSeqOfCap[string](15) + res.addX("-Daemon", n, w) + res + +func toStrSeq(n: int; w: TraceSchedStart): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=StartPeer", n, w) + res.add "peer=" & $w.peerIP & ":" & $w.peerPort + if not w.accept: + res.add "rejected" + res + +func toStrSeq(n: int; w: TraceSchedStop): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=StopPeer", n, w) + res.add "peer=" & $w.peerIP & ":" & $w.peerPort + res + +func toStrSeq(n: int; w: TraceSchedPool): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=Pool", n, w) + res.add "peer=" & $w.peerIP & ":" & $w.peerPort + res.add "last=" & $w.last + res.add "laps=" & $w.laps + res.add "stop=" & $w.stop + res + +func toStrSeq(n: int; w: TraceSchedPeerBegin): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("+Peer", n, w) + res.add "peer=" & $w.peerIP & ":" & $w.peerPort + res + +func toStrSeq(n: int; w: TraceSchedPeerEnd): seq[string] = + var res = newSeqOfCap[string](15) + res.addX("-Peer", n, w) + res + +# ----------- + +func toStrSeq(n: int; w: TraceFetchHeaders): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=HeadersFetch", n, w) + let + rLen = w.req.maxResults + rRev = if w.req.reverse: "rev" else: "" + if w.req.startBlock.isHash: + res.add "req=" & w.req.startBlock.hash.short & "[" & $rLen & "]" & rRev + else: + res.add "req=" & w.req.startBlock.number.bnStr & "[" & $rLen & "]" & rRev + if 0 < w.req.skip: + res.add "skip=" & $w.req.skip + if (w.fieldAvail and 1) != 0: + res.add "res=[" & $w.fetched.packet.headers.len & "]" + res.add "ela=" & w.fetched.elapsed.toStr + if (w.fieldAvail and 2) != 0: + if w.error.excp.ord == 0: + res.add "failed" + else: + res.add "excp=" & ($w.error.excp).substr(1) + if w.error.msg.len != 0: + res.add "error=" & w.error.name & "(" & w.error.msg & ")" + res.add "ela=" & w.error.elapsed.toStr + res + +func toStrSeq(n: int; w: TraceSyncHeaders): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=HeadersSync", n, w) + res + + +func toStrSeq(n: int; w: TraceFetchBodies): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=BodiesFetch", n, w) + res.add "req=" & w.ivReq.bnStr & "[" & $w.req.blockHashes.len & "]" + if (w.fieldAvail and 1) != 0: + res.add "res=[" & $w.fetched.packet.bodies.len & "]" + res.add "ela=" & w.fetched.elapsed.toStr + if (w.fieldAvail and 2) != 0: + if w.error.excp.ord == 0: + res.add "failed" + else: + res.add "excp=" & ($w.error.excp).substr(1) + if w.error.msg.len != 0: + res.add "error=" & w.error.name & "(" & w.error.msg & ")" + res.add "ela=" & w.error.elapsed.toStr + res + +func toStrSeq(n: int; w: TraceSyncBodies): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=BodiesSync", n, w) + res + + +func toStrSeq(n: int; w: TraceImportBlock): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=BlockImport", n, w) + res.add "block=" & w.ethBlock.bnStr + res.add "effPeerID=" & w.effPeerID.short + if (w.fieldAvail and 1) != 0: + res.add "ela=" & w.elapsed.toStr + if (w.fieldAvail and 2) != 0: + if w.error.excp.ord == 0: + res.add "failed" + else: + res.add "excp=" & ($w.error.excp).substr(1) + if w.error.msg.len != 0: + res.add "error=" & w.error.name & "(" & w.error.msg & ")" + res.add "ela=" & w.error.elapsed.toStr + res + +func toStrSeq(n: int; w: TraceSyncBlock): seq[string] = + var res = newSeqOfCap[string](20) + res.addX("=BlockSync", n, w) + res + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc recLogPrint*(fh: File): ReplayRecLogPrintFn = + ## The function provides an example for a call back pretty printer + ## for `lineLog()`. + return proc(w: seq[string]) = + try: + block doFields: + if w.len <= 9: + fh.write w.join(" ") + break doFields + + # at least 9 fields + fh.write "" & + &"{w[0]:>18} {w[1]:<13} {w[2]:>6} " & + &"{w[3]:>5} {w[4]:>2} {w[5]:<13} " & + &"{w[6]:<10} {w[7]:>10} {w[8]:>10} " & + &"{w[9]:>10}" + + if w.len <= 11: + if w.len == 11: + fh.write " " + fh.write w[10] + break doFields + + # at least 12 fields + if w.len <= 12: + fh.write &" {w[10]:<10} " + fh.write w[11] + break doFields + + # more than 12 fields + fh.write &" {w[10]:<10} {w[11]:<15}" + + # at least 13 fields + fh.write " " + fh.write w[12 ..< w.len].join(" ") + + fh.write "\n" + except IOError as e: + warn "lineLogPrint(): Exception while writing to file", + name=($e.name), msg=e.msg + +# ----------- + +func recLogToStrEnd*(n: int): seq[string] = + @[".", $n] + +proc recLogToStrList*(pyl: ReplayPayloadRef; lnr = 0): seq[string] = + case pyl.recType: + of TrtOops: + lnr.toStrOops() + + of TrtVersionInfo: + lnr.toStrSeq(pyl.ReplayVersionInfo.data) + + of TrtSyncActvFailed: + lnr.toStrSeq(pyl.ReplaySyncActvFailed.data) + of TrtSyncActivated: + lnr.toStrSeq(pyl.ReplaySyncActivated.data) + of TrtSyncHibernated: + lnr.toStrSeq(pyl.ReplaySyncHibernated.data) + + of TrtSchedDaemonBegin: + lnr.toStrSeq(pyl.ReplaySchedDaemonBegin.data) + of TrtSchedDaemonEnd: + lnr.toStrSeq(pyl.ReplaySchedDaemonEnd.data) + of TrtSchedStart: + lnr.toStrSeq(pyl.ReplaySchedStart.data) + of TrtSchedStop: + lnr.toStrSeq(pyl.ReplaySchedStop.data) + of TrtSchedPool: + lnr.toStrSeq(pyl.ReplaySchedPool.data) + of TrtSchedPeerBegin: + lnr.toStrSeq(pyl.ReplaySchedPeerBegin.data) + of TrtSchedPeerEnd: + lnr.toStrSeq(pyl.ReplaySchedPeerEnd.data) + + of TrtFetchHeaders: + lnr.toStrSeq(pyl.ReplayFetchHeaders.data) + of TrtSyncHeaders: + lnr.toStrSeq(pyl.ReplaySyncHeaders.data) + + of TrtFetchBodies: + lnr.toStrSeq(pyl.ReplayFetchBodies.data) + of TrtSyncBodies: + lnr.toStrSeq(pyl.ReplaySyncBodies.data) + + of TrtImportBlock: + lnr.toStrSeq(pyl.ReplayImportBlock.data) + of TrtSyncBlock: + lnr.toStrSeq(pyl.ReplaySyncBlock.data) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_reader/reader_unpack.nim b/execution_chain/sync/beacon/replay/replay_reader/reader_unpack.nim new file mode 100644 index 0000000000..653aeccb94 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_reader/reader_unpack.nim @@ -0,0 +1,180 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay environment + +{.push raises:[].} + +import + std/[net, os, streams, strutils], + pkg/[chronicles, chronos, eth/common, stew/base64, stew/byteutils], + ./reader_gunzip, + ../replay_desc + +logScope: + topics = "replay reader" + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + +# ------------------------------------------------------------------------------ +# Private mixin helpers for RLP decoder +# ------------------------------------------------------------------------------ + +proc read(rlp: var Rlp; T: type Hash): T {.raises:[RlpError].} = + when sizeof(T) != sizeof(uint): + # `castToUnsigned()` is defined in `std/private/bitops_utils` and + # included by `std/bitops` but not exported (as of nim 2.2.4) + {.error: "Expected that Hash is based on int".} + Hash(int(cast[int64](rlp.read(uint64)))) + +proc read(rlp: var Rlp; T: type chronos.Duration): T {.raises:[RlpError].} = + chronos.nanoseconds(cast[int64](rlp.read(uint64))) + +proc read(rlp: var Rlp; T: type Port): T {.raises:[RlpError].} = + Port(rlp.read(uint16)) + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Replay stream reader exception" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb & " -- STOP", error=($e.name), msg=e.msg + quit(quitCode) + +proc init(T: type; blob: string; recType: static[TraceRecType]; U: type): T = + const info = "init(" & $recType & "): " + var rlpBlob: seq[byte] + info.onException(DontQuit): + rlpBlob = Base64.decode(blob) + return T( + recType: recType, + data: rlp.decode(rlpBlob, U)) + +# ------------------------------------------------------------------------------ +# Public record decoder functions +# ------------------------------------------------------------------------------ + +proc unpack*(line: string): ReplayPayloadRef = + if line.len < 3: + return ReplayPayloadRef(nil) + + var recType: TraceRecType + if line[0].isDigit: + let n = line[0].ord - '0'.ord + if high(TraceRecType).ord < n: + return ReplayPayloadRef(nil) + recType = TraceRecType(n) + + elif line[0].isUpperAscii: + let n = line[0].ord - 'A'.ord + 10 + if high(TraceRecType).ord < n: + return ReplayPayloadRef(nil) + recType = TraceRecType(n) + + else: + return ReplayPayloadRef(nil) + + let data = line.substr(2, line.len-1) + case recType: + of TrtOops: + return ReplayPayloadRef( + recType: TrtOops) + + of TrtVersionInfo: + return ReplayVersionInfo.init( + data, TrtVersionInfo, TraceVersionInfo) + + # ------------------ + + of TrtSyncActvFailed: + return ReplaySyncActvFailed.init( + data, TrtSyncActvFailed, TraceSyncActvFailed) + + of TrtSyncActivated: + return ReplaySyncActivated.init( + data, TrtSyncActivated, TraceSyncActivated) + + of TrtSyncHibernated: + return ReplaySyncHibernated.init( + data, TrtSyncHibernated, TraceSyncHibernated) + + # ------------------ + + of TrtSchedDaemonBegin: + return ReplaySchedDaemonBegin.init( + data, TrtSchedDaemonBegin, TraceSchedDaemonBegin) + + of TrtSchedDaemonEnd: + return ReplaySchedDaemonEnd.init( + data, TrtSchedDaemonEnd, TraceSchedDaemonEnd) + + of TrtSchedStart: + return ReplaySchedStart.init( + data, TrtSchedStart, TraceSchedStart) + + of TrtSchedStop: + return ReplaySchedStop.init( + data, TrtSchedStop, TraceSchedStop) + + of TrtSchedPool: + return ReplaySchedPool.init( + data, TrtSchedPool, TraceSchedPool) + + of TrtSchedPeerBegin: + return ReplaySchedPeerBegin.init( + data, TrtSchedPeerBegin, TraceSchedPeerBegin) + + of TrtSchedPeerEnd: + return ReplaySchedPeerEnd.init( + data, TrtSchedPeerEnd, TraceSchedPeerEnd) + + # ------------------ + + of TrtFetchHeaders: + return ReplayFetchHeaders.init( + data, TrtFetchHeaders, TraceFetchHeaders) + + of TrtSyncHeaders: + return ReplaySyncHeaders.init( + data, TrtSyncHeaders, TraceSyncHeaders) + + + of TrtFetchBodies: + return ReplayFetchBodies.init( + data, TrtFetchBodies, TraceFetchBodies) + + of TrtSyncBodies: + return ReplaySyncBodies.init( + data, TrtSyncBodies, TraceSyncBodies) + + + of TrtImportBlock: + return ReplayImportBlock.init( + data, TrtImportBlock, TraceImportBlock) + + of TrtSyncBlock: + return ReplaySyncBlock.init( + data, TrtSyncBlock, TraceSyncBlock) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner.nim b/execution_chain/sync/beacon/replay/replay_runner.nim new file mode 100644 index 0000000000..4ed28651b1 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner.nim @@ -0,0 +1,46 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/chronos, + ./replay_runner/runner_dispatch, + ./[replay_desc, replay_reader] + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc runDispatcher*( + runner: ReplayRunnerRef; + reader: ReplayReaderRef; + stopIf: ReplayStopRunnnerFn; + ) {.async: (raises: []).} = + for w in reader.records(): + # Can continue? + if stopIf(): + break + + # Dispatch next instruction record + await runner.dispatch(w) + + # Wait for optional task switch + try: await sleepAsync replayWaitForCompletion + except CancelledError: break + + # Finish + await runner.dispatchEnd() + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_desc.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_desc.nim new file mode 100644 index 0000000000..adeb49fbb7 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_desc.nim @@ -0,0 +1,106 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner +## +## TODO +## * Job(ID) -> stage(ID) + +{.push raises:[].} + +import + std/tables, + ../../../../networking/p2p, + ../../../wire_protocol, + ../../trace/trace_desc, + ../../worker_desc + +type + ReplayStopRunnnerFn* = proc(): bool {.gcsafe, raises: [].} + ## Loop control directive for runner/dispatcher + + ReplayWaitError* = tuple + ## Capture exception or error context for waiting/polling instance + excp: BeaconErrorType + name: string + msg: string + + # --------- data messages --------- + + ReplayMsgRef* = ref object of RootRef + ## Sub task context ## Identifies captured environment + recType*: TraceRecType ## Sub-type selector + + ReplayFetchHeadersMsgRef* = ref object of ReplayMsgRef + ## Staged headers fetch data + instr*: TraceFetchHeaders ## Full context/environment + + ReplaySyncHeadersMsgRef* = ref object of ReplayMsgRef + ## Staged headers fetch data + instr*: TraceSyncHeaders ## Full context/environment + + ReplayFetchBodiesMsgRef* = ref object of ReplayMsgRef + ## Bodies fetch task indicator + instr*: TraceFetchBodies ## Full context/environment + + ReplaySyncBodiesMsgRef* = ref object of ReplayMsgRef + ## Bodies fetch task indicator + instr*: TraceSyncBodies ## Full context/environment + + ReplayImportBlockMsgRef* = ref object of ReplayMsgRef + ## Bodies fetch task indicator + instr*: TraceImportBlock ## Full context/environment + + ReplaySyncBlockMsgRef* = ref object of ReplayMsgRef + ## Bodies fetch task indicator + instr*: TraceSyncBlock ## Full context/environment + + # --------- + + ReplayDaemonRef* = ref object + ## Daemeon job frame + run*: ReplayRunnerRef ## Back-reference for convenience + frameID*: uint64 ## Begin/end frame + message*: ReplayMsgRef ## Data message channel + + ReplayBuddyRef* = ref object of BeaconBuddyRef + ## Replacement of `BeaconBuddyRef` in `runPeer()` and `runPool()` + isNew*: bool ## Set in `getOrNewPeer()` when created + run*: ReplayRunnerRef ## Back-reference for convenience + frameID*: uint64 ## Begin/end frame + message*: ReplayMsgRef ## Data message channel + + ReplayEthState* = object + ## Some feake settings to pretent eth/xx compatibility + capa*: Dispatcher ## Cabability `eth68`, `eth69`, etc. + prots*: array[MAX_PROTOCOLS,RootRef] ## `capa` init flags, protocol states + + ReplayRunnerRef* = ref object + # Global state + ctx*: BeaconCtxRef ## Beacon syncer descriptor + worker*: BeaconHandlersRef ## Refers to original handlers table + ethState*: ReplayEthState ## For ethxx compatibility + stopRunner*: bool ## Shut down request + nSessions*: int ## Numer of sessions left + + # Local state + daemon*: ReplayDaemonRef ## Currently active daemon, or `nil` + peers*: Table[Hash,ReplayBuddyRef] ## Begin/End for base frames + nPeers*: uint ## Track active peer instances + + # Instruction handling + instrNumber*: uint ## Instruction counter + + # Debugging + noisy*: bool ## Activates extra logging noise + startNoisy*: uint ## Cycle threshold for noisy logging + fakeImport*: bool ## No database import if `true` + +# End diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch.nim new file mode 100644 index 0000000000..19fe9de019 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch.nim @@ -0,0 +1,103 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/[chronicles, chronos], + ../../../../networking/p2p, + ../replay_desc, + ./runner_dispatch/[dispatch_blocks, dispatch_headers, dispatch_sched, + dispatch_sync, dispatch_version] + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc dispatch*( + run: ReplayRunnerRef; + pyl: ReplayPayloadRef; + ) {.async: (raises: []).} = + ## Execure next instruction + ## + run.instrNumber.inc + run.noisy = run.startNoisy <= run.instrNumber + + if run.noisy: trace "+dispatch()", n=run.instrNumber, recType=pyl.recType, + nBuddies=run.peers.len, nDaemons=(if run.daemon.isNil: 0 else: 1) + + case pyl.recType: + of TrtOops: + warn "dispatch(): Oops, unexpected void record", n=run.instrNumber + + of TrtVersionInfo: + run.versionInfoWorker(pyl.ReplayVersionInfo.data, "=Version") + + of TrtSyncActvFailed: + run.syncActvFailedWorker(pyl.ReplaySyncActvFailed.data, "=ActvFailed") + of TrtSyncActivated: + run.syncActivateWorker(pyl.ReplaySyncActivated.data, "=Activated") + of TrtSyncHibernated: + run.syncSuspendWorker(pyl.ReplaySyncHibernated.data, "=Suspended") + + # Simple scheduler single run (no begin/end) functions + of TrtSchedStart: + run.schedStartWorker(pyl.ReplaySchedStart.data, "=StartPeer") + of TrtSchedStop: + run.schedStopWorker(pyl.ReplaySchedStop.data, "=StopPeer") + of TrtSchedPool: + run.schedPoolWorker(pyl.ReplaySchedPool.data, "=Pool") + + # Workers, complex run in background + of TrtSchedDaemonBegin: + await run.schedDaemonBegin(pyl.ReplaySchedDaemonBegin.data, "+Daemon") + of TrtSchedDaemonEnd: + await run.schedDaemonEnd(pyl.ReplaySchedDaemonEnd.data, "-Daemon") + of TrtSchedPeerBegin: + await run.schedPeerBegin(pyl.ReplaySchedPeerBegin.data, "+Peer") + of TrtSchedPeerEnd: + await run.schedPeerEnd(pyl.ReplaySchedPeerEnd.data, "-Peer") + + # Leaf handlers providing input data to background tasks `runDaemon()` + # and/or `runPeer()`. + of TrtFetchHeaders: + await run.sendHeaders(pyl.ReplayFetchHeaders.data, "=HeadersFetch") + of TrtSyncHeaders: + await run.sendHeaders(pyl.ReplaySyncHeaders.data, "=HeadersSync") + + of TrtFetchBodies: + await run.sendBodies(pyl.ReplayFetchBodies.data, "=BodiesFetch") + of TrtSyncBodies: + await run.sendBodies(pyl.ReplaySyncBodies.data, "=BodiesSync") + + of TrtImportBlock: + await run.sendBlock(pyl.ReplayImportBlock.data, "=BlockImport") + of TrtSyncBlock: + await run.sendBlock(pyl.ReplaySyncBlock.data, "=BlockSync") + + if run.noisy: trace "-dispatch()", n=run.instrNumber, recType=pyl.recType, + nBuddies=run.peers.len, nDaemons=(if run.daemon.isNil: 0 else: 1) + + +proc dispatchEnd*( + run: ReplayRunnerRef; + ) {.async: (raises: []).} = + # Finish + run.instrNumber.inc + info "End replay", n=run.instrNumber + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_blocks.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_blocks.nim new file mode 100644 index 0000000000..b91f442224 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_blocks.nim @@ -0,0 +1,255 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/[chronicles, chronos, eth/common, stew/interval_set], + ../../../../wire_protocol, + ../../replay_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc toBnRange( + ctx: BeaconCtxRef; + lst: openArray[Hash32]; + info: static[string]; + ): BnRange = + ## Resolve block hashes as interval of block numbers + let rs = BnRangeSet.init() + for w in lst: + let h = ctx.hdrCache.get(w).valueOr: + raiseAssert info & ": Cannot resolve" & + ", hash=" & w.short + if rs.merge(h.number,h.number) != 1: + raiseAssert info & ": dulplicate hash" & + ", hash=" & w.short & ", number=" & h.bnStr + rs.ge().expect "valid BnRange" + +proc bnStr( + lst: openArray[Hash32]; + buddy: BeaconBuddyRef; + info: static[string]; + ): string = + buddy.ctx.toBnRange(lst, info).bnStr + +proc toStr(e: BeaconError; anyTime = false): string = + "(" & $e[0] & + "," & $e[1] & + "," & $e[2] & + "," & (if anyTime: "*" else: e[3].toStr) & + ")" + +# ---------------- + +func getResponse( + instr: TraceFetchBodies; + ): Result[FetchBodiesData,BeaconError] = + if (instr.fieldAvail and 1) != 0: + ok(instr.fetched) + else: + err(instr.error) + +func getResponse( + instr: TraceImportBlock; + ): Result[Duration,BeaconError] = + if (instr.fieldAvail and 1) != 0: + ok(instr.elapsed) + else: + err(instr.error) + +func getBeaconError(e: ReplayWaitError): BeaconError = + (e[0], e[1], e[2], Duration()) + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc importBlockHandlerImpl( + desc: ReplayDaemonRef|ReplayBuddyRef; + ethBlock: EthBlock; + effPeerID: Hash; + info: static[string]; + ): Future[Result[Duration,BeaconError]] + {.async: (raises: []).} = + + let + n = desc.iNum + peer = desc.peerStr + peerID = desc.peerIdStr + + var data: TraceImportBlock + desc.withInstr(typeof data, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + + if effPeerID != instr.effPeerID: + raiseAssert info & ": eff. peer arguments differ" & + ", n=" & $n & + ", serial=" & $instr.serial & + ", peer=" & $peer & + ", peerID=" & $peerID & + ", ethBlock=" & ethBlock.bnStr & + # ----- + ", effPeerID=" & effPeerID.short & + ", expected=" & instr.effPeerID.short + + if ethBlock != instr.ethBlock: + raiseAssert info & ": block arguments differ" & + ", n=" & $n & + ", serial=" & $instr.serial & + ", peer=" & $peer & + ", peerID=" & $peerID & + ", effPeerID=" & effPeerID.short & + # ----- + ", ethBlock=" & ethBlock.bnStr & + ", expected=%" & instr.ethBlock.bnStr & + # ----- + ", ethBlock=%" & ethBlock.computeRlpHash.short & + ", expected=%" & instr.ethBlock.computeRlpHash.short + data = instr + + let + ctx = desc.run.ctx + rpl = ctx.replay + if not rpl.runner.fakeImport: + when desc is ReplayDaemonRef: + let maybePeer = results.Opt[BeaconBuddyRef].err() + elif desc is ReplayBuddyRef: + let maybePeer = results.Opt[BeaconBuddyRef].ok(desc) + + let rc = await rpl.backup.importBlock(ctx, maybePeer, ethBlock, effPeerID) + if rc.isErr or (data.fieldAvail and 2) != 0: + const info = info & ": result values differ" + let serial = data.serial + if rc.isErr and (data.fieldAvail and 2) == 0: + warn info, n, serial, peer, peerID, + got="err" & rc.error.toStr, expected="ok" + elif rc.isOk and (data.fieldAvail and 2) != 0: + warn info, n, serial, peer, peerID, + got="ok", expected="err" & data.error.toStr(true) + elif rc.error.excp != data.error.excp or + rc.error.msg != data.error.msg: + warn info, n, serial, peer, peerID, + got="err" & rc.error.toStr, expected="err" & data.error.toStr(true) + + desc.withInstr(TraceSyncBlock, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + discard # no-op, visual alignment + + return data.getResponse() + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc fetchBodiesHandler*( + buddy: BeaconBuddyRef; + req: BlockBodiesRequest; + ): Future[Result[FetchBodiesData,BeaconError]] + {.async: (raises: []).} = + const info = "&fetchBodies" + let buddy = ReplayBuddyRef(buddy) + + var data: TraceFetchBodies + buddy.withInstr(typeof data, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + if req != instr.req: + raiseAssert info & ": arguments differ" & + ", serial=" & $instr.serial & + ", peer=" & $buddy.peer & + # ----- + ", nBlockHashes=" & $req.blockHashes.len & + ", expected=" & $instr.ivReq.len & + # ----- + ", blockHashes=" & req.blockHashes.bnStr(buddy, info) & + ", expected=" & instr.ivReq.bnStr + data = instr + + buddy.withInstr(TraceSyncBodies, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + discard # no-op, visual alignment + + return data.getResponse() + + +proc importBlockHandler*( + ctx: BeaconCtxRef; + maybePeer: Opt[BeaconBuddyRef]; + ethBlock: EthBlock; + effPeerID: Hash; + ): Future[Result[Duration,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `importBlock()` handler. + const info = "&importBlock" + + if maybePeer.isSome(): + let buddy = ReplayBuddyRef(maybePeer.value) + return await buddy.importBlockHandlerImpl(ethBlock, effPeerID, info) + + # Verify that the daemon is properly initialised + let + run = ctx.replay.runner + daemon = run.daemon + if daemon.isNil: + raiseAssert info & ": system error (no daemon)" & + ", serial=" & + ", peer=n/a" & + ", effPeerID=" & effPeerID.short + + return await daemon.importBlockHandlerImpl(ethBlock, effPeerID, info) + +# ------------------------------------------------------------------------------ +# Public functions, data feed +# ------------------------------------------------------------------------------ + +proc sendBodies*( + run: ReplayRunnerRef; + instr: TraceFetchBodies|TraceSyncBodies; + info: static[string]; + ) {.async: (raises: []).} = + ## Stage bodies request/response data + let buddy = run.getPeer(instr, info).expect "valid sync peer" + discard buddy.pushInstr(instr, info) + +proc sendBlock*( + run: ReplayRunnerRef; + instr: TraceImportBlock|TraceSyncBlock; + info: static[string]; + ) {.async: (raises: []).} = + ## Stage block request/response data + if (instr.stateAvail and 2) != 0: + # So it was captured run from a sync peer + let buddy = run.getPeer(instr, info).expect "valid sync peer" + discard buddy.pushInstr(instr, info) + + # Verify that the daemon is properly initialised + elif run.daemon.isNil: + raiseAssert info & ": system error (no daemon)" & + ", serial=" & $instr.serial & + ", peer=n/a" + + else: + discard run.daemon.pushInstr(instr, info) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_headers.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_headers.nim new file mode 100644 index 0000000000..64b0524aec --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_headers.nim @@ -0,0 +1,107 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/[chronicles, chronos, eth/common], + ../../../../wire_protocol, + ../../replay_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc `==`(a,b: BlockHeadersRequest): bool = + if a.maxResults == b.maxResults and + a.skip == b.skip: + if a.startBlock.isHash: + if b.startBlock.isHash and + a.startBlock.hash == b.startBlock.hash: + return true + else: + if not b.startBlock.isHash and + a.startBlock.number == b.startBlock.number: + return true + +func getResponse( + instr: TraceFetchHeaders; + ): Result[FetchHeadersData,BeaconError] = + if (instr.fieldAvail and 1) != 0: + ok(instr.fetched) + else: + err(instr.error) + +func getBeaconError(e: ReplayWaitError): BeaconError = + (e[0], e[1], e[2], Duration()) + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc fetchHeadersHandler*( + buddy: BeaconBuddyRef; + req: BlockHeadersRequest; + ): Future[Result[FetchHeadersData,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `getBlockHeaders()` handler. + const info = "&fetchHeaders" + let buddy = ReplayBuddyRef(buddy) + + var data: TraceFetchHeaders + buddy.withInstr(typeof data, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + if req != instr.req: + raiseAssert info & ": arguments differ" & + ", n=" & $buddy.iNum & + ", serial=" & $instr.serial & + ", frameID=" & instr.frameID.idStr & + ", peer=" & $buddy.peer & + # ----- + ", reverse=" & $req.reverse & + ", expected=" & $instr.req.reverse & + # ----- + ", reqStart=" & req.startBlock.toStr & + ", expected=" & instr.req.startBlock.toStr & + # ----- + ", reqLen=" & $req.maxResults & + ", expected=" & $instr.req.maxResults + data = instr + + buddy.withInstr(TraceSyncHeaders, info): + if not instr.isAvailable(): + return err(iError.getBeaconError()) # Shutdown? + discard # no-op, visual alignment + + return data.getResponse() + +# ------------------------------------------------------------------------------ +# Public functions, data feed +# ------------------------------------------------------------------------------ + +proc sendHeaders*( + run: ReplayRunnerRef; + instr: TraceFetchHeaders|TraceSyncHeaders; + info: static[string]; + ) {.async: (raises: []).} = + ## Stage headers request/response data + let buddy = run.getPeer(instr, info).expect "valid sync peer" + discard buddy.pushInstr(instr, info) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_helpers.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_helpers.nim new file mode 100644 index 0000000000..bf8dc46237 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_helpers.nim @@ -0,0 +1,779 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + std/[net, tables], + pkg/[chronos, chronicles, stew/interval_set], + ../../../../../networking/[p2p, p2p_peers], + ../../../../wire_protocol, + ../../[replay_desc, replay_helpers] + +export + replay_helpers + +logScope: + topics = "replay runner" + +type + ReplayWaitResult* = Result[void,ReplayWaitError] + + ReplayInstance = ReplayDaemonRef | ReplayBuddyRef + + SubInstrType = TraceFetchHeaders | TraceSyncHeaders | + TraceFetchBodies | TraceSyncBodies | + TraceImportBlock | TraceSyncBlock + + InstrType = TraceSchedDaemonBegin | TraceSchedDaemonEnd | + TraceSchedPeerBegin | TraceSchedPeerEnd | + SubInstrType + +# ------------------------------------------------------------------------------ +# Private debugging helper(s) +# ------------------------------------------------------------------------------ + +func noisy(count: int): bool = + if count < 100: + return (count mod 55) == 0 + if count < 1000: + return (count mod 111) == 0 + return (count mod 1111) == 0 + +# ------------------------------------------------------------------------------ +# Private helper(s) +# ------------------------------------------------------------------------------ + +template waitForConditionImpl( + run: ReplayRunnerRef; + info: static[string]; + cond: untyped; + ): ReplayWaitResult = + ## Async/template + ## + ## Wait until the condition `cond()` becomes`true`. If a `stopRunner` flag + ## is becomes `true`, this wait function function returns `err(..)`, and + ## `ok()` otherwise. + ## + var bodyRc = ReplayWaitResult.ok() + block body: + let n {.inject.} = run.instrNumber + var count {.inject.} = 0 + while true: + count.inc + + if run.stopRunner: + chronicles.info info & ": runner stopped", n, count + bodyRc = ReplayWaitResult.err((ENoException,"",info&": runner stopped")) + break body + + if cond: + break body + + #trace info & ": polling continue", n, count + + try: + await sleepAsync replayWaitForCompletion + except CancelledError as e: + chronicles.info info & ": cancelled while waiting -- STOP", n + run.stopRunner = false + bodyRc = ReplayWaitResult.err((ECancelledError,$e.name,e.msg)) + break body + # End `while()` + + bodyRc # result + + +func syncedEnvCondImpl( + desc: ReplayInstance; + instr: InstrType; + info: static[string]; + ): bool = + ## Condition function for `waitForConditionImpl()` for synchronising state. + ## + let ctx = desc.run.ctx + + #if serial != run.instrNumber: + # return false + + if instr.hdrUnprChunks != ctx.hdr.unprocessed.chunks().uint: + return false + if 0 < instr.hdrUnprChunks: + if instr.hdrUnprLen != ctx.hdr.unprocessed.total(): + return false + let iv = ctx.hdr.unprocessed.le().expect "valid iv" + if instr.hdrUnprLast != iv.maxPt or + instr.hdrUnprLastLen != iv.len: + return false + if instr.antecedent != ctx.hdrCache.antecedent.number: + return false + + if instr.blkUnprChunks != ctx.blk.unprocessed.chunks().uint: + return false + if 0 < instr.blkUnprChunks: + if instr.blkUnprLen != ctx.blk.unprocessed.total(): + return false + let iv = ctx.blk.unprocessed.ge().expect "valid iv" + if instr.blkUnprLeast != iv.minPt or + instr.blkUnprLeastLen != iv.len: + return false + + return true + + +proc newPeerImpl( + run: ReplayRunnerRef; + instr: TraceSchedStart|TraceSchedStop|TraceSchedPool|TraceSchedPeerBegin; + info: static[string]; + ): ReplayBuddyRef = + ## Register a new peer. + ## + run.peers.withValue(instr.peerID, val): + warn info & ": peer exists already", n=run.instrNumber, + serial=instr.serial, peer=($val.peer) + val.isNew = false + return val[] + + var buddy = ReplayBuddyRef( + isNew: true, + run: run, + ctx: run.ctx, + only: BeaconBuddyData( + nRespErrors: (instr.nHdrErrors, + instr.nBlkErrors)), + peerID: instr.peerID, + peer: Peer( + dispatcher: run.ethState.capa, + peerStates: run.ethState.prots, + remote: Node( + node: ENode( + address: enode.Address( + ip: instr.peerIP, + tcpPort: instr.peerPort, + udpPort: instr.peerPort))))) + + run.peers[instr.peerID] = buddy + return (move buddy) + +# ------------------------------------------------------------------------------ +# Private functions, environment checkers +# ------------------------------------------------------------------------------ + +proc baseStatesDifferImpl( + desc: ReplayRunnerRef | ReplayInstance; + instr: TraceRecBase; + info: static[string]; + ): bool = + when desc is ReplayRunnerRef: + let (run, peer) = (desc, "n/a") + when desc is ReplayDaemonRef: + let (run, peer) = (desc.run, "n/a") + when desc is ReplayBuddyRef: + let (run, peer) = (desc.run, desc.peer) + + let + ctx = run.ctx + n = run.instrNumber + serial = instr.serial + var + statesDiffer = false + + if serial != n: + statesDiffer = true + info info & ": serial numbers differ", n, peer, serial, expected=n + + if ctx.pool.lastState != instr.syncState: + statesDiffer = true + info info & ": sync states differ", n, serial, peer, + state=ctx.pool.lastState, expected=instr.syncState + + if ctx.hdrCache.state != instr.chainMode: + statesDiffer = true + info info & ": header chain modes differ", n, serial, peer, + chainMode=ctx.hdrCache.state, expected=instr.chainMode + elif instr.chainMode in {collecting,ready,orphan} and + instr.antecedent != ctx.hdrCache.antecedent.number: + statesDiffer = true + info info & ": header chain antecedents differ", n, serial, peer, + antecedent=ctx.hdrCache.antecedent.bnStr, expected=instr.antecedent.bnStr + + if ctx.pool.nBuddies != instr.nPeers.int: + statesDiffer = true + info info & ": number of active peers differs", n, serial, peer, + nBuddies=ctx.pool.nBuddies, expected=instr.nPeers + + if ctx.poolMode != instr.poolMode: + statesDiffer = true + info info & ": pool modes/reorgs differ", n, serial, peer, + poolMode=ctx.poolMode, expected=instr.poolMode + + return statesDiffer + + +proc unprocListsDifferImpl( + desc: ReplayRunnerRef | ReplayInstance; + instr: TraceRecBase; + info: static[string]; + ): bool = + when desc is ReplayRunnerRef: + let (run, peer) = (desc, "n/a") + when desc is ReplayDaemonRef: + let (run, peer) = (desc.run, "n/a") + when desc is ReplayBuddyRef: + let (run, peer) = (desc.run, desc.peer) + + let + ctx = run.ctx + n = run.instrNumber + serial = instr.serial + var + statesDiffer = false + + # Unprocessed block numbers for header + if instr.hdrUnprChunks != ctx.hdr.unprocessed.chunks().uint: + statesDiffer = true + info info & ": unproc headers lists differ", n, serial, peer, + listChunks=ctx.hdr.unprocessed.chunks(), expected=instr.hdrUnprChunks + if 0 < instr.hdrUnprChunks: + if instr.hdrUnprLen != ctx.hdr.unprocessed.total(): + statesDiffer = true + info info & ": unproc headers lists differ", n, serial, peer, + listLen=ctx.hdr.unprocessed.total(), expected=instr.hdrUnprLen + let iv = ctx.hdr.unprocessed.le().expect "valid iv" + if instr.hdrUnprLastLen != iv.len: + statesDiffer = true + info info & ": unproc headers lists differ", n, serial, peer, + lastIvLen=iv.len, expected=instr.hdrUnprLastLen + if instr.hdrUnprLast != iv.maxPt: + statesDiffer = true + info info & ": unproc headers lists differ", n, serial, peer, + lastIvMax=iv.maxPt, expected=instr.hdrUnprLast + + # Unprocessed block numbers for blocks + if instr.blkUnprChunks != ctx.blk.unprocessed.chunks().uint: + statesDiffer = true + info info & ": unproc blocks lists differ", n, serial, peer, + listChunks=ctx.blk.unprocessed.chunks(), expected=instr.blkUnprChunks + if 0 < instr.blkUnprChunks: + if instr.blkUnprLen != ctx.blk.unprocessed.total(): + statesDiffer = true + info info & ": unproc blocks lists differ", n, serial, peer, + listLen=ctx.blk.unprocessed.total(), expected=instr.blkUnprLen + let iv = ctx.blk.unprocessed.ge().expect "valid iv" + if instr.blkUnprLeastLen != iv.len: + statesDiffer = true + info info & ": unproc blocks lists differ", n, serial, peer, + lastIvLen=iv.len, expected=instr.blkUnprLeastLen + if instr.blkUnprLeast != iv.minPt: + statesDiffer = true + info info & ": unproc blocks lists differ", n, serial, peer, + lastIvMax=iv.maxPt, expected=instr.blkUnprLeast + + return statesDiffer + + +proc peerStatesDifferImpl( + desc: ReplayBuddyRef; + instr: TraceRecBase; + info: static[string]; + ): bool = + let + peer = desc.peer + n = desc.run.instrNumber + serial = instr.serial + var + statesDiffer = false + + if desc.ctrl.state != instr.peerCtrl: + statesDiffer = true + info info & ": peer ctrl states differ", n, serial, peer, + ctrl=desc.ctrl.state, expected=instr.peerCtrl + + if instr.nHdrErrors != desc.only.nRespErrors.hdr: + statesDiffer = true + info info & ": peer header errors differ", n, serial, peer, + nHdrErrors=desc.only.nRespErrors.hdr, expected=instr.nHdrErrors + + if instr.nBlkErrors != desc.only.nRespErrors.blk: + statesDiffer = true + info info & ": peer body errors differ", n, serial, peer, + nBlkErrors=desc.only.nRespErrors.blk, expected=instr.nBlkErrors + + return statesDiffer + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +func iNum*(desc: ReplayInstance|ReplayRunnerRef|BeaconCtxRef): uint = + when desc is ReplayRunnerRef: + desc.instrNumber + elif desc is BeaconCtxRef: + desc.replay.runner.instrNumber + else: + desc.run.instrNumber + +func toStr*(w: BlockHashOrNumber): string = + if w.isHash: w.hash.short else: w.number.bnStr + +func peerStr*(desc: ReplayInstance): string = + when desc is ReplayBuddyRef: + $desc.peer + elif desc is ReplayDaemonRef: + "n/a" + +func peerIdStr*(desc: ReplayInstance): string = + when desc is ReplayBuddyRef: + desc.peerID.short + elif desc is ReplayDaemonRef: + "n/a" + +# ----------------- + +proc stopError*(run: ReplayRunnerRef; info: static[string]) = + error info & " -- STOP", n=run.instrNumber + run.stopRunner = false + +proc stopOk*(run: ReplayRunnerRef; info: static[string]) = + info info & " -- STOP", n=run.instrNumber + run.stopRunner = false + +# ----------------- + +proc checkSyncerState*( + desc: ReplayRunnerRef | ReplayInstance; + instr: TraceRecBase; + info: static[string]; + ): bool + {.discardable.} = + ## Check syncer states against all captured state variables of the + ## `instr` argument. + var statesDiffer = false + + if desc.baseStatesDifferImpl(instr, info): + statesDiffer = true + + if desc.unprocListsDifferImpl(instr, info): + statesDiffer = true + + when desc is ReplayBuddyRef: + if desc.peerStatesDifferImpl(instr, info): + statesDiffer = true + + return statesDiffer + +# ------------------------------------------------------------------------------ +# Public functions, peer/daemon descriptor management +# ------------------------------------------------------------------------------ + +proc getPeer*( + run: ReplayRunnerRef; + instr: TraceRecBase; + info: static[string]; + ): Opt[ReplayBuddyRef] = + ## Get peer from peers table (if any) + run.peers.withValue(instr.peerID, buddy): + return ok(buddy[]) + + trace info & ": no peer", n=run.iNum, serial=instr.serial, + peerID=instr.peerID.short + return err() + + +proc newPeer*( + run: ReplayRunnerRef; + instr: TraceSchedStart; + info: static[string]; + ): ReplayBuddyRef = + ## Register a new peer. + ## + return run.newPeerImpl(instr, info) + + +proc getOrNewPeerFrame*( + run: ReplayRunnerRef; + instr: TraceSchedStop|TraceSchedPool|TraceSchedPeerBegin; + info: static[string]; + ): ReplayBuddyRef = + ## Get an existing one or register a new peer and set up `stage[0]`. + ## + var buddy: ReplayBuddyRef + run.peers.withValue(instr.peerID, val): + buddy = val[] + buddy.isNew = false + do: + buddy = run.newPeerImpl(instr, info) + + if buddy.frameID == 0: + buddy.frameID = instr.frameID + elif buddy.frameID != instr.frameID: + error info & ": frame unexpected", n=buddy.iNum, serial=instr.serial, + frameID=buddy.frameID.idStr, expected=instr.frameID.idStr + return move(buddy) + + +proc delPeer*( + buddy: ReplayBuddyRef; + info: static[string]; + ) = + ## Delete peer ID from registry and return the environment for the + ## deleted peer ID. + ## + let run = buddy.run + if run.peers.hasKey(buddy.peerID): + run.peers.del buddy.peerID + else: + if run.noisy: trace info & ": stale peer ignored", n=buddy.iNum, + peer=($buddy.peer), peerID=buddy.peerID.short + +# ----------------- + +proc getDaemon*( + run: ReplayRunnerRef; + info: static[string]; + ): Opt[ReplayDaemonRef] = + ## Similar to `getPeer()` for daemon + if not run.daemon.isNil: + return ok(run.daemon) + + warn info & ": no daemon", n=run.instrNumber + return err() + + +proc newDaemonFrame*( + run: ReplayRunnerRef; + instr: TraceSchedDaemonBegin; + info: static[string]; + ): Opt[ReplayDaemonRef] = + ## Similar to `getOrNewPeerFrame()` for daemon. + if run.daemon.isNil: + run.daemon = ReplayDaemonRef( + run: run, + frameID: instr.frameID) + return ok(run.daemon) + + warn info & ": daemon already registered", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr + return err() + + +proc delDaemon*( + daemon: ReplayDaemonRef; + info: static[string]; + ) = + ## Similar to `delPeer()` for daemon + let run = daemon.run + if run.daemon.isNil: + if run.noisy: trace info & ": stale daemon ignored", n=run.instrNumber + else: + run.daemon = ReplayDaemonRef(nil) + +# ------------------------------------------------------------------------------ +# Public functions, process/handler synchronisation +# ------------------------------------------------------------------------------ + +proc waitForSyncedEnv*( + desc: ReplayInstance; + instr: InstrType; + info: static[string]; + ): Future[ReplayWaitResult] + {.async: (raises: []).} = + ## .. + ## + when desc is ReplayBuddyRef: + # The scheduler (see `sync_sched.nim`) might have disconnected the peer + # already as is captured in the instruction environment. This does not + # apply to `zombie` settings which will be done by the application. + if instr.peerCtrl == Stopped and not desc.ctrl.stopped: + desc.ctrl.stopped = true + + let + run = desc.run + serial {.inject,used.} = instr.serial + peer {.inject,used.} = desc.peerStr + peerID {.inject,used.} = desc.peerIdStr + + if run.noisy: trace info & ": process to be synced", n=desc.iNum, serial, + peer, peerID + + let rc = desc.run.waitForConditionImpl(info): + if count.noisy: # for logging/debugging only + trace info & ": polling for sync", n=desc.iNum, serial, peer, count + desc.checkSyncerState(instr, info) # for logging/debugging only + + desc.syncedEnvCondImpl(instr, info) + + if rc.isErr(): + # Shutdown? + if run.noisy: trace info & ": process sync error", n=desc.iNum, + serial, peer, peerID, name=rc.error.name, msg=rc.error.msg + return err(rc.error) + + if run.noisy: trace info & ": process synced ok", n=desc.iNum, serial, + peer, peerID + desc.checkSyncerState(instr, info) + + return ok() + +# ------------------ + +proc processFinished*( + desc: ReplayInstance; + instr: TraceSchedDaemonBegin|TraceSchedPeerBegin; + info: static[string]; + ) = + ## Register that the process has finished + ## + # Verify that sub-processes did not change the environment + doAssert desc.frameID == instr.frameID + + # Mark the pocess `done` + desc.frameID = 0 + + if desc.run.noisy: trace info & ": terminating", n=desc.iNum, + serial=instr.serial, frameID=instr.frameID.idStr, peer=desc.peerStr + + +template whenProcessFinished*( + desc: ReplayInstance; + instr: TraceSchedDaemonEnd|TraceSchedPeerEnd; + info: static[string]; + ): ReplayWaitResult = + ## Async/template + ## + ## Execude the argument `code` when the process related to the `instr` + ## argument flag has finished. The variables and functions available for + ## `code` are: + ## * `error` -- error data, initialised if `instr.isAvailable()` is `false` + ## + var bodyRc = ReplayWaitResult.ok() + block body: + let + run = desc.run + peer {.inject,used.} = desc.peerStr + peerID {.inject,used.} = desc.peerIdStr + serial {.inject,used.} = instr.serial + + if desc.frameID != 0: + doAssert desc.frameID == instr.frameID + + if run.noisy: trace info & ": wait for terminated", n=desc.iNum, serial, + frameID=instr.frameID.idStr, peer, peerID + + bodyRc = desc.run.waitForConditionImpl(info): + desc.frameID == 0 + + if bodyRc.isErr: + break body + + if run.noisy: + trace info & ": terminated OK", n=desc.iNum, serial, + frameID=instr.frameID.idStr, peer + desc.checkSyncerState(instr, info) + + # Synchronise against captured environment + bodyRc = desc.run.waitForConditionImpl(info): + if count.noisy: + trace info & ": polling for sync", n=desc.iNum, serial, + peer, peerID, count + desc.checkSyncerState(instr, info) + + desc.syncedEnvCondImpl(instr, info) + + if bodyRc.isErr: + break body + + if run.noisy: trace info & ": finished", n=desc.iNum, serial, + frameID=instr.frameID.idStr, peer, peerID + # End body + + bodyRc # result + +# ------------------ + +template pushInstr*( + desc: ReplayInstance; + instr: SubInstrType; + info: static[string]; + ): ReplayWaitResult = + ## Async/template + ## + ## Stage session data, then wait for the background process to consume the + ## session data using `withInstr()`. + ## + var bodyRc = ReplayWaitResult.ok() + block: + when instr is TraceFetchHeaders: + type T = ReplayFetchHeadersMsgRef + const dataType {.inject.} = TrtFetchHeaders + elif instr is TraceSyncHeaders: + type T = ReplaySyncHeadersMsgRef + const dataType {.inject.} = TrtSyncHeaders + + elif instr is TraceFetchBodies: + type T = ReplayFetchBodiesMsgRef + const dataType {.inject.} = TrtFetchBodies + elif instr is TraceSyncBodies: + type T = ReplaySyncBodiesMsgRef + const dataType {.inject.} = TrtSyncBodies + + elif instr is TraceImportBlock: + type T = ReplayImportBlockMsgRef + const dataType {.inject.} = TrtImportBlock + elif instr is TraceSyncBlock: + type T = ReplaySyncBlockMsgRef + const dataType {.inject.} = TrtSyncBlock + + # Verify that the stage is based on a proper environment + doAssert desc.frameID != 0 # this is not `instr.frameID` + + let + run = desc.run + peer {.inject,used.} = desc.peerStr + peerID {.inject,used.} = desc.peerIdStr + serial {.inject.} = instr.serial + + doAssert serial == desc.iNum + doAssert desc.message.isNil + + # Stage/push session data + desc.message = T( + recType: dataType, + instr: instr) + + block body: + # Wait for sync # FIXME, really needed? + bodyRc = desc.run.waitForConditionImpl(info): # FIXME, really needed? + if count.noisy: + trace info & ": polling for sync", n=desc.iNum, serial, + peer, peerID, dataType, count + desc.checkSyncerState(instr, info) + + desc.syncedEnvCondImpl(instr, info) # FIXME, really needed? + + if bodyRc.isErr(): + break body + + doAssert serial == desc.iNum + + if desc.run.noisy: trace info & ": sent data", n=desc.iNum, serial, + peer, peerID, dataType + + # Wait for message to be swallowed + bodyRc = desc.run.waitForConditionImpl(info): + if count.noisy: + trace info & ": polling for ackn", n=desc.iNum, serial, + peer, peerID, dataType, count + + desc.message.isNil + + if bodyRc.isErr(): + break body + # End body + + if run.noisy: trace info & ": done", n=desc.iNum, serial, + peer, peerID, dataType + doAssert desc.iNum == serial + + bodyRc # result + + +template withInstr*( + desc: ReplayInstance; + I: type SubInstrType; + info: static[string]; + code: untyped; + ) = + ## Async/template + ## + ## Execude the argument `code` with the data sent by a feeder. The variables + ## and functions available for `code` are: + ## * `instr` -- instruction data, available if `instr.isAvailable()` is `true` + ## * `iError` -- error data, initialised if `instr.isAvailable()` is `false` + ## + block: + when I is TraceFetchHeaders: + const dataType {.inject.} = TrtFetchHeaders + type M = ReplayFetchHeadersMsgRef + elif I is TraceSyncHeaders: + const dataType {.inject.} = TrtSyncHeaders + type M = ReplaySyncHeadersMsgRef + + elif I is TraceFetchBodies: + const dataType {.inject.} = TrtFetchBodies + type M = ReplayFetchBodiesMsgRef + elif I is TraceSyncBodies: + const dataType {.inject.} = TrtSyncBodies + type M = ReplaySyncBodiesMsgRef + + elif I is TraceImportBlock: + const dataType {.inject.} = TrtImportBlock + type M = ReplayImportBlockMsgRef + elif I is TraceSyncBlock: + const dataType {.inject.} = TrtSyncBlock + type M = ReplaySyncBlockMsgRef + + let + run = desc.run + peer {.inject,used.} = desc.peerStr + peerID {.inject,used.} = desc.peerIdStr + + if run.noisy: trace info & ": get data", n=desc.iNum, serial="n/a", + frameID="n/a", peer, dataType + + # Reset flag and wait for staged data to disappear from stack + let rc = run.waitForConditionImpl(info): + if count.noisy: trace info & ": polling for data", n=desc.iNum, + serial="n/a", frameID="n/a", peer, peerID, dataType, count + + not desc.message.isNil + + var + iError {.inject.}: ReplayWaitError + instr {.inject.}: I + + if rc.isOk(): + instr = M(desc.message).instr + doAssert desc.message.recType == dataType + doAssert instr.serial == desc.iNum + + when desc is ReplayBuddyRef: + # The scheduler (see `sync_sched.nim`) might have disconnected the + # peer already which would be captured in the instruction environment. + # This does not apply to `zombie` settings which will be handled by + # the application `code`. + if instr.peerCtrl == Stopped and not desc.ctrl.stopped: + desc.ctrl.stopped = true + else: + iError = rc.error + + template isAvailable(_: typeof instr): bool {.used.} = rc.isOk() + + code + + if rc.isOk(): + doAssert not desc.message.isNil + doAssert desc.message.recType == dataType + doAssert instr.serial == desc.iNum + + desc.checkSyncerState(instr, info) + + if run.noisy: trace info & ": got data", n=desc.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, peer, peerID, dataType + + desc.message = M(nil) + + discard # no-op, visual alignment + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_sched.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_sched.nim new file mode 100644 index 0000000000..471959b183 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_sched.nim @@ -0,0 +1,209 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + std/tables, + pkg/[chronicles, chronos, eth/common], + ../../replay_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Private helper +# ------------------------------------------------------------------------------ + +proc schedDaemonProcessImpl( + daemon: ReplayDaemonRef; + instr: TraceSchedDaemonBegin; + info: static[string]; + ) {.async: (raises: []).} = + ## Run the task `schedDaemon()`. This function has to be run background + ## process (using `asyncSpawn`.) + ## + let run = daemon.run + if run.noisy: trace info & ": begin", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, syncState=instr.syncState + + discard await run.worker.schedDaemon(run.ctx) + daemon.processFinished(instr, info) + + if run.noisy: trace info & ": end", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, syncState=instr.syncState + + +proc schedPeerProcessImpl( + buddy: ReplayBuddyRef; + instr: TraceSchedPeerBegin; + info: static[string]; + ) {.async: (raises: []).} = + ## Run the task `schedPeer()`. This function has to be run background + ## process (using `asyncSpawn`.) + ## + let run = buddy.run + if run.noisy: trace info & ": begin", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, peer=($buddy.peer), peerID=buddy.peerID.short, + syncState=instr.syncState + + # Activate peer + buddy.run.nPeers.inc + + discard await run.worker.schedPeer(buddy) + buddy.processFinished(instr, info) + + if run.noisy: trace info & ": end", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, peer=($buddy.peer), peerID=buddy.peerID.short, + syncState=instr.syncState + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc schedDaemonBegin*( + run: ReplayRunnerRef; + instr: TraceSchedDaemonBegin; + info: static[string]; + ) {.async: (raises: []).} = + ## Run the `schedDaemon()` task. + ## + # Synchronise against captured environment and start process + let daemon = run.newDaemonFrame(instr, info).valueOr: return + discard await daemon.waitForSyncedEnv(instr, info) + asyncSpawn daemon.schedDaemonProcessImpl(instr, info) + + +proc schedDaemonEnd*( + run: ReplayRunnerRef; + instr: TraceSchedDaemonEnd; + info: static[string]; + ) {.async: (raises: []).} = + ## Clean up (in foreground) after `schedDaemon()` process has terminated. + ## + let daemon = run.getDaemon(info).valueOr: return + daemon.whenProcessFinished(instr, info).isErrOr: + daemon.delDaemon(info) # Clean up + + +proc schedStartWorker*( + run: ReplayRunnerRef; + instr: TraceSchedStart; + info: static[string]; + ) = + ## Runs `schedStart()` in the foreground. + ## + let + buddy = run.newPeer(instr, info) + accept = run.worker.schedStart(buddy) + + if run.noisy: trace info & ": begin", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, peer=($buddy.peer), peerID=buddy.peerID.short + + if accept != instr.accept: + warn info & ": result argument differs", n=run.iNum, serial=instr.serial, + peer=buddy.peer, expected=instr.accept, result=accept + + # Syncer state was captured when leaving the `schedStart()` handler. + buddy.checkSyncerState(instr, info) + + if not accept: + buddy.delPeer(info) # Clean up + + if run.noisy: trace info & ": end", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, peer=($buddy.peer), peerID=buddy.peerID.short + + +proc schedStopWorker*( + run: ReplayRunnerRef; + instr: TraceSchedStop; + info: static[string]; + ) = + ## Runs `schedStop()` in the foreground. + ## + let buddy = run.getOrNewPeerFrame(instr, info) + run.worker.schedStop(buddy) + + # As the `schedStop()` function environment was captured only after the + # syncer was activated, there might still be some unregistered peers hanging + # around. So it is perfectly OK to see the peer for the first time, here + # which has its desciptor sort of unintialised (relative to `instr`.) + if not buddy.isNew: + # Syncer state was captured when leaving the `schedStop()` handler. + if instr.peerCtrl == Stopped and not buddy.ctrl.stopped: + buddy.ctrl.stopped = true + buddy.checkSyncerState(instr, info) + + # Clean up + buddy.delPeer(info) + + info info & ": done", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, peer=($buddy.peer), peerID=buddy.peerID.short + + +proc schedPoolWorker*( + run: ReplayRunnerRef; + instr: TraceSchedPool; + info: static[string]; + ) = + ## Runs `schedPool()` in the foreground. + ## + let buddy = run.getOrNewPeerFrame(instr, info) + + if 0 < run.nPeers: + warn info & ": no active peers allowed", n=run.iNum, serial=instr.serial, + peer=buddy.peer, nPeers=run.nPeers, expected=0 + + # The scheduler will reset the `poolMode` flag before starting the + # `schedPool()` function. + run.ctx.poolMode = false + + discard run.worker.schedPool(buddy, instr.last, instr.laps.int) + + # Syncer state was captured when leaving the `schedPool()` handler. + buddy.checkSyncerState(instr, info) + + # Reset frame data + buddy.frameID = 0 + + info info & ": done", n=run.iNum, serial=instr.serial, + frameID=instr.frameID.idStr, peer=($buddy.peer), peerID=buddy.peerID.short + + +proc schedPeerBegin*( + run: ReplayRunnerRef; + instr: TraceSchedPeerBegin; + info: static[string]; + ) {.async: (raises: []).} = + ## Run the `schedPeer()` task. + ## + # Synchronise against captured environment and start process + let buddy = run.getOrNewPeerFrame(instr, info) + discard await buddy.waitForSyncedEnv(instr, info) + asyncSpawn buddy.schedPeerProcessImpl(instr, info) + + +proc schedPeerEnd*( + run: ReplayRunnerRef; + instr: TraceSchedPeerEnd; + info: static[string]; + ) {.async: (raises: []).} = + ## Clean up (in foreground) after `schedPeer()` process has terminated. + ## + let buddy = run.getPeer(instr, info).valueOr: return + buddy.whenProcessFinished(instr, info).isErrOr: + buddy.run.nPeers.dec # peer is not active, anymore + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_sync.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_sync.nim new file mode 100644 index 0000000000..b0ccfb8e63 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_sync.nim @@ -0,0 +1,91 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/chronicles, + ../../../../../core/chain, + ../../replay_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc syncActvFailedWorker*( + run: ReplayRunnerRef; + instr: TraceSyncActvFailed; + info: static[string]; + ) = + trace info, n=run.iNum, serial=instr.serial + + +proc syncActivateWorker*( + run: ReplayRunnerRef; + instr: TraceSyncActivated; + info: static[string]) = + let + serial = instr.serial + ctx = run.ctx + + if not ctx.hibernate: + warn info & ": already activated", n=run.iNum, serial + return + + var activationOK = true + if ctx.chain.baseNumber != instr.baseNum: + error info & ": cannot activate (bases must match)", n=run.iNum, serial, + base=ctx.chain.baseNumber.bnStr, expected=instr.baseNum.bnStr + activationOK = false + + if activationOK: + ctx.hdrCache.headTargetUpdate(instr.head, instr.finHash) + + # Set the number of active buddies (avoids some moaning.) + run.ctx.pool.nBuddies = instr.nPeers.int + run.checkSyncerState(instr, info) + + if ctx.hibernate or not activationOK: + if run.noisy: trace "=ActvFailed", n=run.iNum, serial + run.stopError(info & ": activation failed") + else: + # No need for scheduler noise (e.g. disconnect messages.) + ctx.noisyLog = false + trace "=Activated", n=run.iNum, serial + + +proc syncSuspendWorker*( + run: ReplayRunnerRef; + instr: TraceSyncHibernated; + info: static[string]; + ) = + let ctx = run.ctx + if ctx.hibernate: + run.stopError(info & ": suspend failed") + return + + run.checkSyncerState(instr, info) + if run.noisy: trace "=Suspended", n=run.iNum, serial=instr.serial + + # Shutdown if there are no remaining sessions left + if 1 < run.nSessions: + run.nSessions.dec + else: + run.stopOk(info & ": session(s) terminated") + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_version.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_version.nim new file mode 100644 index 0000000000..2b69b08bdc --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_dispatch/dispatch_version.nim @@ -0,0 +1,75 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/chronicles, + ../../../../../core/chain, + ../../replay_desc, + ./dispatch_helpers + +logScope: + topics = "replay runner" + +# ------------------------------------------------------------------------------ +# Public dispatcher handlers +# ------------------------------------------------------------------------------ + +proc versionInfoWorker*( + run: ReplayRunnerRef; + instr: TraceVersionInfo; + info: static[string]; + ) = + let + serial = instr.serial + ctx = run.ctx + var + versionOK = true + + if serial != 1: + error info & ": not the first record", serial, expected=1 + versionOK = false + + if run.instrNumber != 1: + error info & ": record count mismatch", n=run.instrNumber, expected=1 + versionOK = false + + if instr.version != TraceVersionID: + error info & ": wrong version", serial, + traceLayoutVersion=instr.version, expected=TraceVersionID + versionOK = false + + if instr.networkId != ctx.chain.com.networkId: + error info & ": wrong network", serial, + networkId=instr.networkId, expected=ctx.chain.com.networkId + versionOK = false + + if ctx.chain.baseNumber < instr.baseNum: + error info & ": cannot start (base too low)", serial, + base=ctx.chain.baseNumber.bnStr, replayBase=instr.baseNum.bnStr + versionOK = false + + if not ctx.hibernate: + error info & ": syncer must not be activated, yet", serial + versionOK = false + + if not versionOK: + run.stopError(info & ": version match failed") + return + + chronicles.info info, TraceVersionID, serial + run.checkSyncerState(instr, info) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_runner/runner_init.nim b/execution_chain/sync/beacon/replay/replay_runner/runner_init.nim new file mode 100644 index 0000000000..36dcd48ab9 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_runner/runner_init.nim @@ -0,0 +1,86 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + pkg/chronos, + ../../../../networking/[p2p, p2p_peers, peer_pool], + ../../../wire_protocol, + ../replay_desc + +logScope: + topics = "replay" + +# ------------------------------------------------------------------------------ +# Private helper(s) +# ------------------------------------------------------------------------------ + +proc getDispatcher(): Dispatcher = + ## Return a list of all known protocols and pretend all are supported + var po = PeerObserver() + po.addProtocol eth68 + po.addProtocol eth69 + + var q: array[MAX_PROTOCOLS,Opt[uint64]] + q[0] = Opt.none(uint64) + q[1] = Opt.some(16'u64) + for n in 2 .. po.protocols.len: + q[n] = Opt.some(q[n-1].value + po.protocols[n-1].messages[^1].id) + + Dispatcher(protocolOffsets: q) + + +proc getProtocolStates(): array[MAX_PROTOCOLS,RootRef] = + ## Pretend that all `getDispatcher()` list items are initialised + var q: typeof(result) + q[0] = RootRef(nil) + q[1] = EthPeerState(initialized: true) + q[2] = Eth69PeerState(initialized: true) + q + +# ------------------------------------------------------------------------------ +# Public constructor(s) +# ------------------------------------------------------------------------------ + +proc init*(T: type ReplayEthState): T = + ## For ethxx compatibility + T(capa: getDispatcher(), + prots: getProtocolStates()) + +proc init*( + T: type ReplayRunnerRef; + ctx: BeaconCtxRef; + startNoisy: uint; + fakeImport: bool; + ): T = + ## .. + # Enable protocols in dispatcher + const info = "ReplayRunnerRef(): " + if 10 <= ctx.handler.version: + fatal info & "Need original handlers version", + handlerVersion=ctx.handler.version + quit(QuitFailure) + + T(ctx: ctx, + worker: ctx.pool.handlers, + ethState: ReplayEthState.init(), + startNoisy: startNoisy, + fakeImport: fakeImport) + + +proc destroy*(run: ReplayRunnerRef) = + discard + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_setup.nim b/execution_chain/sync/beacon/replay/replay_setup.nim new file mode 100644 index 0000000000..09c84f1e1e --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_setup.nim @@ -0,0 +1,97 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Trace environment setup & destroy + +{.push raises:[].} + +import + std/streams, + pkg/chronicles, + ./replay_desc + +logScope: + topics = "beacon reaplay" + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Replay stream exception -- STOP" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb, error=($e.name), msg=e.msg + quit(quitCode) + +# ------------------------------------------------------------------------------ +# Public constructor/destructor +# ------------------------------------------------------------------------------ + +proc replaySetup*( + ctx: BeaconCtxRef; + fileName: string; + startNoisy: uint; + ): bool = + ## .. + const info = "replaySetup(): " + + if ctx.handler.version != 0: + error info & "Overlay session handlers activated already", + ID=ctx.handler.version + return false + + let strm = fileName.newFileStream fmRead + if strm.isNil: + error info & "Cannot open trace file for reading", fileName + return false + + ctx.pool.handlers = ReplayBaseHandlersRef( + version: ReplayBaseHandlersID, + strm: strm, + startNoisy: startNoisy, + activate: ctx.handler.activate, + suspend: ctx.handler.suspend, + schedDaemon: ctx.handler.schedDaemon, + schedStart: ctx.handler.schedStart, + schedStop: ctx.handler.schedStop, + schedPool: ctx.handler.schedPool, + schedPeer: ctx.handler.schedPeer, + getBlockHeaders: ctx.handler.getBlockHeaders, + syncBlockHeaders: ctx.handler.syncBlockHeaders, + getBlockBodies: ctx.handler.getBlockBodies, + syncBlockBodies: ctx.handler.syncBlockBodies, + importBlock: ctx.handler.importBlock, + syncImportBlock: ctx.handler.syncImportBlock) + + true + +proc replayRelease*(ctx: BeaconCtxRef) = + const info = "replayRelease(): " + + if ctx.handler.version == ReplayBaseHandlersID: + info.onException(DontQuit): + ctx.handler.ReplayBaseHandlersRef.strm.close() + ctx.pool.handlers.version = 0 + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_start_stop.nim b/execution_chain/sync/beacon/replay/replay_start_stop.nim new file mode 100644 index 0000000000..016178ee79 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_start_stop.nim @@ -0,0 +1,121 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Replay runner + +{.push raises:[].} + +import + std/streams, + pkg/[chronicles, chronos], + ./replay_reader/reader_init, + ./replay_runner/runner_init, + ./replay_start_stop/handlers/[blocks, headers, sched, sync], + ./[replay_desc, replay_runner] + +logScope: + topics = "replay" + +const + startInfo = "replayStart(): " + +# ------------------------------------------------------------------------------ +# Private helper(s) +# ------------------------------------------------------------------------------ + +proc destroy(rpl: ReplayRef; info: static[string]) = + info info & "Terminating .." + rpl.reader.destroy() + rpl.runner.destroy() + +proc unlessStop(rpl: ReplayRef; info: static[string]): ReplayStopRunnnerFn = + let ctx = rpl.runner.ctx + proc(): bool = + if ctx.handler.version != 20 or + rpl.runner.stopRunner: + rpl.destroy info + return true + false + +# ------------------------------------------------------------------------------ +# Private function(s) +# ------------------------------------------------------------------------------ + +proc phoneySched(ctx: BeaconCtxRef) {.async: (raises: []).} = + const info = "phoneySched(): " + let rpl = ctx.replay + if rpl.isNil: + warn info & "No usable syncer environment", + handlerVersion=ctx.handler.version + else: + await rpl.runner.runDispatcher(rpl.reader, stopIf=rpl.unlessStop info) + +# ------------------------------------------------------------------------------ +# Public constructor/destructor +# ------------------------------------------------------------------------------ + +proc replayStop*(ctx: BeaconCtxRef) = + let rpl = ctx.replay + if not rpl.isNil: + # Signals shutdown to `phonySched()` + ctx.pool.handlers = rpl.backup + + +proc replayStart*( + ctx: BeaconCtxRef; + strm: Stream; + startNoisy: uint; + fakeImport: bool) = + ## Start replay emulator + ## + if ctx.handler.version in {0, ReplayBaseHandlersID}: + ctx.pool.handlers = ReplayRef( + + # Install new Overlay handler descriptor + reader: ReplayReaderRef.init(strm), + backup: ctx.pool.handlers, + runner: ReplayRunnerRef.init(ctx, startNoisy, fakeImport), + + # Set up redirect handlers for tracing + version: ReplayOverlayHandlersID, + activate: activateReplay, + suspend: suspendReplay, + schedDaemon: schedDaemonMuted, + schedStart: schedStartMuted, + schedStop: schedStopMuted, + schedPool: schedPoolMuted, + schedPeer: schedPeerMuted, + getBlockHeaders: fetchHeadersReplay, + syncBlockHeaders: syncHeadersMuted, + getBlockBodies: fetchBodiesReplay, + syncBlockBodies: syncBodiesMuted, + importBlock: importBlockReplay, + syncImportBlock: syncBlockMuted) + + # Start fake scheduler + asyncSpawn ctx.phoneySched() + + elif ctx.handler.version != ReplayOverlayHandlersID: + fatal startInfo & "Overlay session handlers activated already", + ID=ctx.handler.version + quit(QuitFailure) + + +proc replayStart*(ctx: BeaconCtxRef) = + ## Variant of `traceStart()` for pre-initialised handlers (see + ## `replaySetup()`.) + ## + if ctx.handler.version == ReplayBaseHandlersID: + let hdl = ctx.handler.ReplayBaseHandlersRef + ctx.replayStart(hdl.strm, hdl.startNoisy, hdl.fakeImport) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_start_stop/handlers/blocks.nim b/execution_chain/sync/beacon/replay/replay_start_stop/handlers/blocks.nim new file mode 100644 index 0000000000..e36471b192 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_start_stop/handlers/blocks.nim @@ -0,0 +1,54 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for replay environment + +{.push raises:[].} + +import + pkg/chronos, + ../../../../wire_protocol/types, + ../../replay_runner/runner_dispatch/dispatch_blocks, + ../../replay_desc + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc fetchBodiesReplay*( + buddy: BeaconBuddyRef; + req: BlockBodiesRequest; + ): Future[Result[FetchBodiesData,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `getBlockBodies()` handler. + await buddy.fetchBodiesHandler(req) + +proc syncBodiesMuted*(buddy: BeaconBuddyRef) = + ## Replacement for `syncBlockBodies()` handler. + discard + + +proc importBlockReplay*( + ctx: BeaconCtxRef; + maybePeer: Opt[BeaconBuddyRef]; + ethBlock: EthBlock; + effPeerID: Hash; + ): Future[Result[Duration,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `importBlock()` handler. + await ctx.importBlockHandler(maybePeer, ethBlock, effPeerID) + +proc syncBlockMuted*(ctx: BeaconCtxRef; maybePeer: Opt[BeaconBuddyRef]) = + ## Replacement for `syncImportBlock()` handler. + discard + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_start_stop/handlers/headers.nim b/execution_chain/sync/beacon/replay/replay_start_stop/handlers/headers.nim new file mode 100644 index 0000000000..87a87ade29 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_start_stop/handlers/headers.nim @@ -0,0 +1,39 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for replay environment + +{.push raises:[].} + +import + pkg/chronos, + ../../../../wire_protocol, + ../../replay_runner/runner_dispatch/dispatch_headers, + ../../replay_desc + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc fetchHeadersReplay*( + buddy: BeaconBuddyRef; + req: BlockHeadersRequest; + ): Future[Result[FetchHeadersData,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `getBlockHeaders()` handler. + await buddy.fetchHeadersHandler(req) + +proc syncHeadersMuted*(buddy: BeaconBuddyRef) = + ## Replacement for `syncBlockHeaders()` handler. + discard + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_start_stop/handlers/sched.nim b/execution_chain/sync/beacon/replay/replay_start_stop/handlers/sched.nim new file mode 100644 index 0000000000..3527828016 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_start_stop/handlers/sched.nim @@ -0,0 +1,51 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for replay environment + +{.push raises:[].} + +import + pkg/chronos, + ../../replay_desc + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc schedDaemonMuted*( + ctx: BeaconCtxRef; + ): Future[Duration] + {.async: (raises: []).} = + ## Replacement for `schedDaemon()` handler. + return replayWaitMuted + +proc schedStartMuted*(buddy: BeaconBuddyRef): bool = + ## Similar to `schedDaemonMuted()` + false + +proc schedStopMuted*(buddy: BeaconBuddyRef) = + ## Similar to `schedDaemonMuted()` + discard + +proc schedPoolMuted*(buddy: BeaconBuddyRef; last: bool; laps: int): bool = + ## Similar to `schedDaemonMuted()` + true + +proc schedPeerMuted*( + buddy: BeaconBuddyRef; + ):Future[Duration] + {.async: (raises: []).} = + ## Similar to `schedDaemonMuted()` + return replayWaitMuted + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/replay/replay_start_stop/handlers/sync.nim b/execution_chain/sync/beacon/replay/replay_start_stop/handlers/sync.nim new file mode 100644 index 0000000000..c9c51d3d04 --- /dev/null +++ b/execution_chain/sync/beacon/replay/replay_start_stop/handlers/sync.nim @@ -0,0 +1,32 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for replay environment + +{.push raises:[].} + +import + ../../replay_desc + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc activateReplay*(ctx: BeaconCtxRef) = + ## Replacement for `activate()` handler. + ctx.replay.backup.activate(ctx) + +proc suspendReplay*(ctx: BeaconCtxRef) = + ## Replacement for `suspend()` handler. + ctx.replay.backup.suspend(ctx) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace.nim b/execution_chain/sync/beacon/trace.nim new file mode 100644 index 0000000000..ba947cd057 --- /dev/null +++ b/execution_chain/sync/beacon/trace.nim @@ -0,0 +1,22 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Trace environment + +{.push raises:[].} + +import + ./trace/[trace_setup, trace_start_stop] + +export + trace_setup, + trace_start_stop + +# End diff --git a/execution_chain/sync/beacon/trace/README.md b/execution_chain/sync/beacon/trace/README.md new file mode 100644 index 0000000000..2dd8870d77 --- /dev/null +++ b/execution_chain/sync/beacon/trace/README.md @@ -0,0 +1,48 @@ +Beacon sync tracer +================== + +For the *nimbus_execution_client* binary, data from a syncer sessions can +be captured into a file **(capture)** along with system state information +via + + nimbus_execution_client \ + --beacon-sync-trace-file=(capture) \ + ... + +where **...** stands for all other options that might be useful for running +an execution layer session. + +The capture file **(capture)** will hold enough data for replaying the +*nimbus_execution_client* session(s). + +With the command line option *\-\-debug-beacon-sync-trace-file=***(capture)** +for the *nimbus_execution_client* binary, data from the syncer sessions will +be dumped into the argument file **(capture)** along with system state +information. + +The capture file **(capture)** will hold enough data for replaying the +*nimbus_execution_client* session(s). + +By default, the captured syncer session starts with the first syncer activation +(when *Activating syncer* is logged) and ends when the syncer is suspended +(when *Suspending syncer* is logged.) + +The trace file **(capture)** is organised as an ASCII text file, each line +consists of a data capture record. The line format is + + + +where the ** is a single alphanumeric letter, and ** +is a base64 representation of an rlp-encoded data capture structure. + +By nature of the base64 representation, the size of the trace data is about +four times the data capture which leads to huge files, e.g. some 30GiB for the +last 120k blocks synchronised on *mainnet*. + +The file with the captured data may be gzipped after the dump finished which +reduces ths size roughly to 1/3. So altogether in its gzipped form, the size +of the gzipped trace file is about 4/3 of the capured data (mainly downloaded +block headers and bodies.) + +The captured data might be further processed (e.g. inspection or replay) in +its gzipped form. diff --git a/execution_chain/sync/beacon/trace/trace_desc.nim b/execution_chain/sync/beacon/trace/trace_desc.nim new file mode 100644 index 0000000000..d96313a02f --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_desc.nim @@ -0,0 +1,219 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Trace environment descriptor and helpers +## +## TODO: +## * n/a +## + +{.push raises:[].} + +import + std/[net, streams], + pkg/[chronos, eth/common], + ../../wire_protocol, + ../worker_desc + +export + worker_desc + +const + TraceVersionID* = 20250730 + + TraceBaseHandlersID* = 1 + TraceOverlayHandlersID* = 10 + +type + StopIfEosHdl* = proc(trc: TraceRef) {.gcsafe, raises: [].} + ## Terminate trace if the number of sessions is exhausted + + TraceBaseHandlersRef* = ref object of BeaconHandlersRef + ## Extension for caching state so that the tracer start can be + ## synchronised with, e.g. after the syncer has started + strm*: Stream + nSessions*: int + + TraceRef* = ref object of BeaconHandlersRef + ## Overlay handlers extended by descriptor data + ctx*: BeaconCtxRef ## Parent context + outStream*: Stream ## Dump file for ethxx data packets + backup*: BeaconHandlersRef ## Can restore previous handlers + started*: Moment ## Start time + sessions*: int ## Initial number of sessions + remaining*: int ## Number of sessions left to run + stopIfEos*: StopIfEosHdl ## Auto-disable trace when needed + serial: uint ## Unique record ID + + # ------------- + + TraceRecType* = enum + TrtOops = 0 + TrtVersionInfo = 1 + + TrtSyncActvFailed + TrtSyncActivated + TrtSyncHibernated + + TrtSchedDaemonBegin + TrtSchedDaemonEnd + TrtSchedStart + TrtSchedStop + TrtSchedPool + TrtSchedPeerBegin + TrtSchedPeerEnd + + TrtFetchHeaders + TrtSyncHeaders + + TrtFetchBodies + TrtSyncBodies + + TrtImportBlock + TrtSyncBlock + + TraceRecBase* = object of RootObj + ## Trace context applicable with and without known peer + time*: Duration ## Relative to `TraceRef.started` + serial*: uint ## Increasing serial number + frameID*: uint ## Begin/end frame + nPeers*: uint + syncState*: SyncState + chainMode*: HeaderChainMode + poolMode*: bool + + baseNum*: BlockNumber ## Max finalised number from `FC` module + latestNum*: BlockNumber ## Number of latest branch head + antecedent*: BlockNumber ## Lower end of header chain cache + + hdrUnprLen*: uint64 ## # unprocessed header entries + hdrUnprChunks*: uint ## # unprocessed header iv segments + hdrUnprLast*: BlockNumber ## last avail block number + hdrUnprLastLen*: uint64 ## size of last block number interval + + blkUnprLen*: uint64 ## # unprocessed block entries + blkUnprChunks*: uint ## # unprocessed block iv segments + blkUnprLeast*: BlockNumber ## least avail block number + blkUnprLeastLen*: uint64 ## size of first interval + + stateAvail*: int ## Bitmask: 1=peerCtrl, 2=peerID, etc. + peerCtrl*: BuddyRunState ## 1) Rlp encoded `Opt[seq[xxx]]` would + peerID*: Hash ## 2) .. need manual decoder/reader + nHdrErrors*: uint8 ## 4) # header comm. errors + nBlkErrors*: uint8 ## 8) # body comm. errors + slowPeer*: Hash ## 16) Registered slow peer + + + TraceVersionInfo* = object of TraceRecBase + version*: uint + networkId*: NetworkId + + # ------------- + + TraceSyncActvFailed* = object of TraceRecBase + + TraceSyncActivated* = object of TraceRecBase + head*: Header ## Part of environment + finHash*: Hash32 ## Part of environment + + TraceSyncHibernated* = object of TraceRecBase + + # ------------- + + TraceSchedDaemonBegin* = object of TraceRecBase + ## Environment is captured before the daemon handler body is executed. + + TraceSchedDaemonEnd* = object of TraceRecBase + ## Environment is captured when leaving the daemon handler. + idleTime*: Duration ## Suggested idle time + + TraceSchedStart* = object of TraceRecBase + ## Environment is captured when leaving sched the start handler. + peerIP*: IpAddress ## Descriptor argument + peerPort*: Port ## Descriptor argument + accept*: bool ## Result/return code + + TraceSchedStop* = object of TraceRecBase + ## Environment is captured when leaving the sched stop handler. + peerIP*: IpAddress ## Descriptor argument + peerPort*: Port ## Descriptor argument + + TraceSchedPool* = object of TraceRecBase + ## Environment is captured leaving the pool handler. + peerIP*: IpAddress ## Descriptor argument + peerPort*: Port ## Descriptor argument + last*: bool ## Request argument + laps*: uint ## Request argument + stop*: bool ## Result/return code + + TraceSchedPeerBegin* = object of TraceRecBase + ## Environment is captured before the peer handler body is executed. + peerIP*: IpAddress ## Descriptor argument + peerPort*: Port ## Descriptor argument + + TraceSchedPeerEnd* = object of TraceRecBase + ## Environment is captured when leaving peer handler. + idleTime*: Duration ## Suggested idle time + + # ------------- + + TraceFetchHeaders* = object of TraceRecBase + ## Environment is captured after the `getBlockHeaders()` handler is run. + req*: BlockHeadersRequest ## Fetch request + ivReq*: BnRange ## Request as interval of block numbers + fieldAvail*: uint ## Bitmask: 1=fetched, 2=error + fetched*: FetchHeadersData ## If dowloaded successfully + error*: BeaconError + + TraceSyncHeaders* = object of TraceRecBase + ## Environment is captured when the `syncBlockHeaders()` handler is run. + + + TraceFetchBodies* = object of TraceRecBase + ## Environment is captured after the `getBlockBodies()` handler is run. + req*: BlockBodiesRequest ## Fetch request + ivReq*: BnRange ## Request as interval of block numbers + fieldAvail*: uint ## Bitmask: 1=fetchd, 2=error + fetched*: FetchBodiesData ## If dowloaded successfully + error*: BeaconError + + TraceSyncBodies* = object of TraceRecBase + ## Environment is captured when the `syncBlockBodies()` handler is run. + + + TraceImportBlock* = object of TraceRecBase + ## Environment is captured after the `importBlock()` handler is run. + ethBlock*: EthBlock ## Request argument + effPeerID*: Hash ## Request argument + fieldAvail*: uint ## Bitmask: 1=elapsed, 2=error + elapsed*: Duration ## Processing time on success + error*: BeaconError + + TraceSyncBlock* = object of TraceRecBase + ## Environment is captured after the `syncImportBlock()` handler is run. + +# ------------------------------------------------------------------------------ +# Public helpers +# ------------------------------------------------------------------------------ + +func trace*(ctx: BeaconCtxRef): TraceRef = + ## Getter, get trace descriptor (if any) + if ctx.handler.version == TraceOverlayHandlersID: + return ctx.handler.TraceRef + +func newSerial*(trc: TraceRef): uint64 = + trc.serial.inc + if trc.serial == 0: + trc.serial.inc + trc.serial + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace/trace_setup.nim b/execution_chain/sync/beacon/trace/trace_setup.nim new file mode 100644 index 0000000000..fdf623561f --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_setup.nim @@ -0,0 +1,109 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Trace environment setup & destroy + +{.push raises:[].} + +import + std/[os, streams, syncio], + pkg/chronicles, + ./trace_desc + +logScope: + topics = "beacon trace" + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Trace stream exception" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb & " -- STOP", error=($e.name), msg=e.msg + quit(quitCode) + +# ------------------------------------------------------------------------------ +# Public constructor/destructor +# ------------------------------------------------------------------------------ + +proc traceSetup*( + ctx: BeaconCtxRef; + fileName: string; + nSessions: int; + ): bool = + ## .. + const info = "traceSetup(): " + + if ctx.handler.version != 0: + error info & "Overlay session handlers activated already", + ID=ctx.handler.version + return false + + if fileName.fileExists: # File must not exist yet + error info & "Unsafe, please delete file first", fileName + return false + + var strm = Stream(nil) + info.onException(DontQuit): + # Note that there is a race condition. The proper open mode shoud be + # `fmReadWriteExisting` (sort of resembling `O_CREATE|O_EXCL`) but it + # does not work with the current nim version `2.2.4`. + var fd: File + if fd.open(fileName, fmWrite): + strm = fd.newFileStream() + + if strm.isNil: + error info & "Cannot open trace file for writing", fileName + return false + + ctx.pool.handlers = TraceBaseHandlersRef( + version: TraceBaseHandlersID, + strm: strm, + nSessions: nSessions, + activate: ctx.handler.activate, + suspend: ctx.handler.suspend, + schedDaemon: ctx.handler.schedDaemon, + schedStart: ctx.handler.schedStart, + schedStop: ctx.handler.schedStop, + schedPool: ctx.handler.schedPool, + schedPeer: ctx.handler.schedPeer, + getBlockHeaders: ctx.handler.getBlockHeaders, + syncBlockHeaders: ctx.handler.syncBlockHeaders, + getBlockBodies: ctx.handler.getBlockBodies, + syncBlockBodies: ctx.handler.syncBlockBodies, + importBlock: ctx.handler.importBlock, + syncImportBlock: ctx.handler.syncImportBlock) + + true + +proc traceRelease*(ctx: BeaconCtxRef) = + const info = "traceRelease(): " + + if ctx.handler.version == TraceBaseHandlersID: + info.onException(DontQuit): + ctx.handler.TraceBaseHandlersRef.strm.close() + ctx.pool.handlers.version = 0 + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace/trace_start_stop.nim b/execution_chain/sync/beacon/trace/trace_start_stop.nim new file mode 100644 index 0000000000..96f892530c --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_start_stop.nim @@ -0,0 +1,142 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Trace environment setup & destroy + +{.push raises:[].} + +import + std/streams, + pkg/[chronicles, chronos], + ./trace_start_stop/handlers/[blocks, headers, helpers, sched, sync], + ./[trace_desc, trace_write] + +logScope: + topics = "beacon trace" + +const + DontQuit = low(int) + ## To be used with `onCloseException()` + + stopInfo = "traceStop(): " + startInfo = "traceStart(): " + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +template onException( + info: static[string]; + quitCode: static[int]; + code: untyped) = + try: + code + except CatchableError as e: + const blurb = info & "Trace stream exception" + when quitCode == DontQuit: + error blurb, error=($e.name), msg=e.msg + else: + fatal blurb & " -- STOP", error=($e.name), msg=e.msg + quit(quitCode) + +# ----------- + +proc traceStop(trc: TraceRef) = + if not trc.isNil: + trc.ctx.pool.handlers = trc.backup + + stopInfo.onException(DontQuit): + trc.outStream.flush() + trc.outStream.close() + +proc stopIfEos(trc: TraceRef) = + trc.remaining.dec + if trc.remaining <= 0: + info stopInfo & "Number of sessions exhausted", nSessions=trc.sessions + trc.traceStop() + +proc writeVersion(ctx: BeaconCtxRef) = + var tRec: TraceVersionInfo + tRec.init ctx + tRec.version = TraceVersionID + tRec.networkId = ctx.chain.com.networkId + ctx.traceWrite tRec + trace "=Version", TraceVersionID, serial=tRec.serial + +# ------------------------------------------------------------------------------ +# Public constructor/destructor +# ------------------------------------------------------------------------------ + +proc traceStop*(ctx: BeaconCtxRef) = + ctx.trace.traceStop() + + +proc traceStart*(ctx: BeaconCtxRef; strm: Stream; nSessions: int) = + ## Start trace session + let nSessions = min(1, nSessions) + + if ctx.handler.version in {0, TraceBaseHandlersID}: + ctx.pool.handlers = TraceRef( + + # Install new Overlay handler descriptor + ctx: ctx, + outStream: strm, + backup: ctx.pool.handlers, + started: Moment.now(), + sessions: nSessions, + remaining: nSessions, + stopIfEos: stopIfEos, + + # Set up redirect handlers for tracing + version: TraceOverlayHandlersID, + activate: activateTrace, + suspend: suspendTrace, + schedDaemon: schedDaemonTrace, + schedStart: schedStartTrace, + schedStop: schedStopTrace, + schedPool: schedPoolTrace, + schedPeer: schedPeerTrace, + getBlockHeaders: fetchHeadersTrace, + syncBlockHeaders: syncHeadersTrace, + getBlockBodies: fetchBodiesTrace, + syncBlockBodies: syncBodiesTrace, + importBlock: importBlockTrace, + syncImportBlock: syncBlockTrace) + + # Write version as first record + ctx.writeVersion() + + elif ctx.handler.version == TraceOverlayHandlersID: + # Install new output file + let trc = ctx.handler.TraceRef + startInfo.onException(QuitFailure): + trc.outStream.flush() + trc.outStream.close() + trc.outStream = strm + trc.sessions = nSessions + trc.remaining = nSessions + + else: + fatal startInfo & "Overlay session handlers activated already", + ID=ctx.handler.version + quit(QuitFailure) + + +proc traceStart*(ctx: BeaconCtxRef) = + ## Variant of `traceStart()` for pre-initialised handlers (see + ## `traceSetup()`.) + ## + if ctx.handler.version == TraceBaseHandlersID: + let hdl = ctx.handler.TraceBaseHandlersRef + ctx.traceStart(hdl.strm, hdl.nSessions) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace/trace_start_stop/handlers/blocks.nim b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/blocks.nim new file mode 100644 index 0000000000..aea08b4e35 --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/blocks.nim @@ -0,0 +1,139 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for trace environment + +{.push raises:[].} + +import + pkg/[chronicles, chronos, stew/interval_set], + ../../../../../networking/p2p, + ../../../../wire_protocol/types, + ../../[trace_desc, trace_write], + ./helpers + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc toBnRange( + ctx: BeaconCtxRef; + lst: openArray[Hash32]; + info: static[string]; + ): BnRange = + ## Resolve block hashes as interval of block numbers + let rs = BnRangeSet.init() + for w in lst: + let h = ctx.hdrCache.get(w).valueOr: + raiseAssert info & ": Cannot resolve" & + ", hash=" & w.short + if rs.merge(h.number,h.number) != 1: + raiseAssert info & ": dulplicate hash" & + ", hash=" & w.short & ", number=" & h.bnStr + rs.ge().expect "valid BnRange" + + +proc toPeerStr(maybePeer: Opt[BeaconBuddyRef]): string = + if maybePeer.isOk(): $maybePeer.value.peer else: "n/a" + +proc toPeerIdStr(maybePeer: Opt[BeaconBuddyRef]): string = + if maybePeer.isOk(): maybePeer.value.peerID.short else: "n/a" + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc fetchBodiesTrace*( + buddy: BeaconBuddyRef; + req: BlockBodiesRequest; + ): Future[Result[FetchBodiesData,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `getBlockBodies()` handler which in addition writes data + ## to the output stream for tracing. + ## + let + ivReq = buddy.ctx.toBnRange(req.blockHashes, "fetchBodiesTrace") + data = await buddy.ctx.trace.backup.getBlockBodies(buddy, req) + + var tRec: TraceFetchBodies + tRec.init buddy + tRec.req = req + tRec.ivReq = ivReq + if data.isOk: + tRec.fieldAvail = 1 + tRec.fetched = data.value + else: + tRec.fieldAvail = 2 + tRec.error = data.error + buddy.traceWrite tRec + + trace "=BodiesFetch", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial, ivReq=ivReq.bnStr + return data + +proc syncBodiesTrace*( + buddy: BeaconBuddyRef; + ) = + ## Replacement for `syncBlockBodies()` handler. + var tRec: TraceSyncBodies + tRec.init buddy + buddy.traceWrite tRec + + trace "=BodiesSync", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial + + +proc importBlockTrace*( + ctx: BeaconCtxRef; + maybePeer: Opt[BeaconBuddyRef]; + ethBlock: EthBlock; + effPeerID: Hash; + ): Future[Result[Duration,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `importBlock()` handler which in addition writes data to + ## the output stream for tracing. + ## + let data = await ctx.trace.backup.importBlock( + ctx, maybePeer, ethBlock, effPeerID) + + var tRec: TraceImportBlock + tRec.init(ctx, maybePeer) + tRec.ethBlock = ethBlock + tRec.effPeerID = effPeerID + if data.isOk: + tRec.fieldAvail = 1 + tRec.elapsed = data.value + else: + tRec.fieldAvail = 2 + tRec.error = data.error + ctx.traceWrite tRec + + trace "=BlockImport", peer=maybePeer.toPeerStr, peerID=maybePeer.toPeerIdStr, + effPeerID=tRec.peerID.short, serial=tRec.serial + return data + +proc syncBlockTrace*( + ctx: BeaconCtxRef; + maybePeer: Opt[BeaconBuddyRef]; + ) = + ## Replacement for `syncImportBlock()` handler. + var tRec: TraceSyncBlock + tRec.init(ctx, maybePeer) + ctx.traceWrite tRec + + trace "=BlockSync", peer=maybePeer.toPeerStr, peerID=maybePeer.toPeerIdStr, + effPeerID=tRec.peerID.short, serial=tRec.serial + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace/trace_start_stop/handlers/headers.nim b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/headers.nim new file mode 100644 index 0000000000..7123ff9706 --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/headers.nim @@ -0,0 +1,68 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for trace environment + +{.push raises:[].} + +import + pkg/[chronicles, chronos, stew/interval_set], + ../../../../../networking/p2p, + ../../../../wire_protocol, + ../../[trace_desc, trace_write], + ./helpers + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc fetchHeadersTrace*( + buddy: BeaconBuddyRef; + req: BlockHeadersRequest; + ): Future[Result[FetchHeadersData,BeaconError]] + {.async: (raises: []).} = + ## Replacement for `getBlockHeaders()` handler which in addition writes data + ## to the output stream for tracing. + ## + let data = await buddy.ctx.trace.backup.getBlockHeaders(buddy, req) + + var tRec: TraceFetchHeaders + tRec.init buddy + tRec.req = req + if data.isOk: + tRec.fieldAvail = 1 + tRec.fetched = data.value + else: + tRec.fieldAvail = 2 + tRec.error = data.error + buddy.traceWrite tRec + + trace "=HeadersFetch", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial + return data + +proc syncHeadersTrace*( + buddy: BeaconBuddyRef; + ) = + ## Replacement for `syncBlockHeaders()` handler, + ## + var tRec: TraceSyncHeaders + tRec.init buddy + buddy.traceWrite tRec + + trace "=HeadersSync", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace/trace_start_stop/handlers/helpers.nim b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/helpers.nim new file mode 100644 index 0000000000..8ad05b021c --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/helpers.nim @@ -0,0 +1,97 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises:[].} + +import + std/[strformat, strutils], + pkg/[chronos, stew/interval_set], + ../../../worker/helpers as worker_helpers, + ../../trace_desc + +export + worker_helpers + +# ------------------------------------------------------------------------------ +# Public context capture initialisation +# ------------------------------------------------------------------------------ + +proc init*(tb: var TraceRecBase; ctx: BeaconCtxRef) = + ## Initialise new trace descriptor. This fuction does nothing if + ## there is no active trace. + let trc = ctx.trace + if not trc.isNil: + tb.serial = trc.newSerial + tb.time = Moment.now() - trc.started + tb.syncState = ctx.pool.lastState + tb.nPeers = ctx.pool.nBuddies.uint + tb.chainMode = ctx.hdrCache.state + tb.poolMode = ctx.poolMode + tb.baseNum = ctx.chain.baseNumber + tb.latestNum = ctx.chain.latestNumber + tb.antecedent = ctx.hdrCache.antecedent.number + + tb.hdrUnprChunks = ctx.hdr.unprocessed.chunks().uint + if 0 < tb.hdrUnprChunks: + tb.hdrUnprLen = ctx.hdr.unprocessed.total() + let iv = ctx.hdr.unprocessed.le().expect "valid iv" + tb.hdrUnprLast = iv.maxPt + tb.hdrUnprLastLen = iv.len + + tb.blkUnprChunks = ctx.blk.unprocessed.chunks().uint + if 0 < tb.blkUnprChunks: + tb.blkUnprLen = ctx.blk.unprocessed.total() + let iv = ctx.blk.unprocessed.ge().expect "valid iv" + tb.blkUnprLeast = iv.minPt + tb.blkUnprLeastLen = iv.len + + if ctx.pool.lastSlowPeer.isOk(): + tb.stateAvail = 16 + tb.slowPeer = ctx.pool.lastSlowPeer.value + else: + tb.stateAvail = 0 + +proc init*(tb: var TraceRecBase; buddy: BeaconBuddyRef) = + ## Variant of `init()` for `buddy` rather than `ctx` + let + ctx = buddy.ctx + trc = ctx.trace + if not trc.isNil: + tb.init ctx + tb.stateAvail += 15 + tb.peerCtrl = buddy.ctrl.state + tb.peerID = buddy.peerID + tb.nHdrErrors = buddy.only.nRespErrors.hdr + tb.nBlkErrors = buddy.only.nRespErrors.blk + +proc init*( + tb: var TraceRecBase; + ctx: BeaconCtxRef; + maybePeer: Opt[BeaconBuddyRef]; + ) = + ## Variant of `init()` + let trc = ctx.trace + if not trc.isNil: + if maybePeer.isSome: + tb.init maybePeer.value + else: + tb.init ctx + +# -------------- + +func short*(w: Hash): string = + w.toHex(8).toLowerAscii # strips leading 8 bytes + +func idStr*(w: uint64): string = + &"{w:x}" + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace/trace_start_stop/handlers/sched.nim b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/sched.nim new file mode 100644 index 0000000000..6bcd5911d4 --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/sched.nim @@ -0,0 +1,177 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for trace environment + +{.push raises:[].} + +import + pkg/[chronicles, chronos], + ../../../../../networking/p2p, + ../../[trace_desc, trace_write], + ./helpers + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc getIP(buddy: BeaconBuddyRef): IpAddress = + buddy.peer.remote.node.address.ip + +proc getPort(buddy: BeaconBuddyRef): Port = + let peer = buddy.peer + if peer.remote.node.address.tcpPort != Port(0): + peer.remote.node.address.tcpPort + else: + peer.remote.node.address.udpPort + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc schedDaemonTrace*( + ctx: BeaconCtxRef; + ): Future[Duration] + {.async: (raises: []).} = + ## Replacement for `schedDaemon()` handler which in addition + ## write data to the output stream for tracing. + ## + var tBeg: TraceSchedDaemonBegin + tBeg.init ctx + tBeg.frameID = tBeg.serial + ctx.traceWrite tBeg + + trace "+Daemon", serial=tBeg.serial, frameID=tBeg.frameID.idStr, + syncState=tBeg.syncState + + let idleTime = await ctx.trace.backup.schedDaemon ctx + + var tEnd: TraceSchedDaemonEnd + tEnd.init ctx + tEnd.frameID = tBeg.serial # refers back to `tBeg` capture + tEnd.idleTime = idleTime + ctx.traceWrite tEnd + + if 0 < tEnd.serial: + trace "-Daemon", serial=tEnd.serial, frameID=tEnd.frameID.idStr, + syncState=tBeg.syncState, idleTime=idleTime.toStr + else: + trace "-Daemon (blind)", serial="n/a", frameID=tEnd.frameID.idStr, + syncState=tBeg.syncState, idleTime=idleTime.toStr + + return idleTime + + +proc schedStartTrace*(buddy: BeaconBuddyRef): bool = + ## Similar to `schedDaemonTrace()` + ## + let + ctx = buddy.ctx + acceptOk = ctx.trace.backup.schedStart(buddy) + + if not ctx.hibernate: + var tRec: TraceSchedStart + tRec.init buddy + tRec.frameID = tRec.serial + tRec.peerIP = buddy.getIP() + tRec.peerPort = buddy.getPort() + tRec.accept = acceptOk + buddy.traceWrite tRec + + trace "=StartPeer", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial, frameID=tRec.frameID.idStr, + syncState=tRec.syncState + + acceptOk + + +proc schedStopTrace*(buddy: BeaconBuddyRef) = + ## Similar to `schedDaemonTrace()` + ## + let ctx = buddy.ctx + + ctx.trace.backup.schedStop(buddy) + + if not ctx.hibernate: + var tRec: TraceSchedStop + tRec.init buddy + tRec.frameID = tRec.serial + tRec.peerIP = buddy.getIP() + tRec.peerPort = buddy.getPort() + buddy.traceWrite tRec + + trace "=StopPeer", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial, frameID=tRec.frameID.idStr, + syncState=tRec.syncState + + +proc schedPoolTrace*(buddy: BeaconBuddyRef; last: bool; laps: int): bool = + ## Similar to `schedDaemonTrace()` + ## + let stopOk = buddy.ctx.trace.backup.schedPool(buddy, last, laps) + + var tRec: TraceSchedPool + tRec.init buddy + tRec.frameID = tRec.serial + tRec.peerIP = buddy.getIP() + tRec.peerPort = buddy.getPort() + tRec.last = last + tRec.laps = laps.uint + tRec.stop = stopOk + buddy.traceWrite tRec + + trace "=Pool", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tRec.serial, frameID=tRec.frameID.idStr + + stopOk + + +proc schedPeerTrace*( + buddy: BeaconBuddyRef; + ): Future[Duration] + {.async: (raises: []).} = + ## Similar to `schedDaemonTrace()` + ## + let + ctx = buddy.ctx + noisy = not ctx.hibernate + + var tBeg: TraceSchedPeerBegin + if noisy: + tBeg.init buddy + tBeg.frameID = tBeg.serial + tBeg.peerIP = buddy.getIP() + tBeg.peerPort = buddy.getPort() + buddy.traceWrite tBeg + + trace "+Peer", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tBeg.serial, frameID=tBeg.frameID.idStr, syncState=tBeg.syncState + + let idleTime = await ctx.trace.backup.schedPeer(buddy) + + if noisy: + var tEnd: TraceSchedPeerEnd + tEnd.init buddy + tEnd.frameID = tBeg.serial # refers back to `tBeg` capture + tEnd.idleTime = idleTime + buddy.traceWrite tEnd + + trace "-Peer", peer=($buddy.peer), peerID=buddy.peerID.short, + serial=tEnd.serial, frameID=tEnd.frameID.idStr, syncState=tBeg.syncState, + idleTime=idleTime.toStr + + return idleTime + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace/trace_start_stop/handlers/sync.nim b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/sync.nim new file mode 100644 index 0000000000..74516b1619 --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_start_stop/handlers/sync.nim @@ -0,0 +1,71 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Overlay handler for trace environment + +{.push raises:[].} + +import + pkg/chronicles, + ../../[trace_desc, trace_write], + ./helpers + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc activateTrace*(ctx: BeaconCtxRef) = + ## Replacement for `activate()` handler which in addition + ## write data to the output stream for tracing. + ## + let hdl = ctx.trace.backup + hdl.activate ctx + + if ctx.hibernate: + var tRec: TraceSyncActvFailed + tRec.init ctx + ctx.traceWrite tRec + + trace "=ActvFailed", serial=tRec.serial + + else: + let chn = ctx.chain + var tRec: TraceSyncActivated + tRec.init ctx + tRec.head = ctx.hdrCache.head + tRec.finHash = chn.finHash + ctx.traceWrite tRec + + trace "=Activated", serial=tRec.serial + + +proc suspendTrace*(ctx: BeaconCtxRef) = + ## Replacement for `suspend()` handler which in addition writes + ## data to the output stream for tracing. + ## + let hdl = ctx.trace.backup + hdl.suspend ctx + + var tRec: TraceSyncHibernated + tRec.init ctx + ctx.traceWrite tRec + + trace "=Suspended", serial=tRec.serial + + let trc = ctx.trace + if not trc.isNil: + trc.stopIfEos(trc) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/trace/trace_write.nim b/execution_chain/sync/beacon/trace/trace_write.nim new file mode 100644 index 0000000000..c62e30cd91 --- /dev/null +++ b/execution_chain/sync/beacon/trace/trace_write.nim @@ -0,0 +1,151 @@ +# Nimbus +# Copyright (c) 2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises:[].} + +import + std/[net, streams, typetraits], + pkg/[chronicles, chronos, eth/common, stew/base64], + ./trace_desc + +logScope: + topics = "beacon trace" + +# ------------------------------------------------------------------------------ +# Private mixin helpers for RLP encoder +# ------------------------------------------------------------------------------ + +proc append(w: var RlpWriter, h: Hash) = + when sizeof(h) != sizeof(uint): + # `castToUnsigned()` is defined in `std/private/bitops_utils` and + # included by `std/bitops` but not exported (as of nim 2.2.4) + {.error: "Expected that Hash is based on int".} + w.append(cast[uint](h).uint64) + +proc append(w: var RlpWriter, d: chronos.Duration) = + w.append(cast[uint64](d.nanoseconds)) + +proc append(w: var RlpWriter, p: Port) = + w.append(distinctBase p) + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc toTypeInx(w: TraceRecType): string = + if w.ord < 10: + $w.ord + else: + $chr(w.ord + 'A'.ord - 10) + + +proc toStream( + buddy: BeaconBuddyRef; + trp: TraceRecType; + blob: seq[byte]; + flush = false; + ) = + ## Write tracet data to output stream + let trc = buddy.ctx.trace + if trc.isNil: + debug "Trace output stopped while collecting", + peer=($buddy.peer), recType=trp + else: + try: + trc.outStream.writeLine trp.toTypeInx & " " & Base64.encode(blob) + trc.outStream.flush() + except CatchableError as e: + warn "Error writing trace data", peer=($buddy.peer), recType=trp, + recSize=blob.len, error=($e.name), msg=e.msg + +proc toStream( + ctx: BeaconCtxRef; + trp: TraceRecType; + blob: seq[byte]; + flush = false; + ) = + ## Variant of `toStream()` for `ctx` rather than `buddy` + let trc = ctx.trace + if trc.isNil: + debug "Trace output stopped while collecting", recType=trp + else: + try: + trc.outStream.writeLine trp.toTypeInx & " " & Base64.encode(blob) + trc.outStream.flush() + except CatchableError as e: + warn "Error writing trace data", recType=trp, + recSize=blob.len, error=($e.name), msg=e.msg + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc traceWrite*(ctx: BeaconCtxRef; w: TraceVersionInfo) = + ctx.toStream(TrtVersionInfo, rlp.encode w) + +# ------------- + +proc traceWrite*(ctx: BeaconCtxRef; w: TraceSyncActvFailed) = + ctx.toStream(TrtSyncActvFailed, rlp.encode w) + +proc traceWrite*(ctx: BeaconCtxRef; w: TraceSyncActivated) = + ctx.toStream(TrtSyncActivated, rlp.encode w) + +proc traceWrite*(ctx: BeaconCtxRef; w: TraceSyncHibernated) = + ctx.toStream(TrtSyncHibernated, rlp.encode w) + +# ------------- + +proc traceWrite*(ctx: BeaconCtxRef; w: TraceSchedDaemonBegin) = + ctx.toStream(TrtSchedDaemonBegin, rlp.encode w) + +proc traceWrite*(ctx: BeaconCtxRef; w: TraceSchedDaemonEnd) = + ctx.toStream(TrtSchedDaemonEnd, rlp.encode w) + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceSchedStart) = + buddy.toStream(TrtSchedStart, rlp.encode w) + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceSchedStop) = + buddy.toStream(TrtSchedStop, rlp.encode w) + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceSchedPool) = + buddy.toStream(TrtSchedPool, rlp.encode w) + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceSchedPeerBegin) = + buddy.toStream(TrtSchedPeerBegin, rlp.encode w) + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceSchedPeerEnd) = + buddy.toStream(TrtSchedPeerEnd, rlp.encode w) + +# ------------- + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceFetchHeaders) = + buddy.toStream(TrtFetchHeaders, rlp.encode w) + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceSyncHeaders) = + buddy.toStream(TrtSyncHeaders, rlp.encode w) + + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceFetchBodies) = + buddy.toStream(TrtFetchBodies, rlp.encode w) + +proc traceWrite*(buddy: BeaconBuddyRef; w: TraceSyncBodies) = + buddy.toStream(TrtSyncBodies, rlp.encode w) + + +proc traceWrite*(ctx: BeaconCtxRef; w: TraceImportBlock) = + ctx.toStream(TrtImportBlock, rlp.encode w) + +proc traceWrite*(ctx: BeaconCtxRef; w: TraceSyncBlock) = + ctx.toStream(TrtSyncBlock, rlp.encode w) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/worker/blocks/blocks_blocks.nim b/execution_chain/sync/beacon/worker/blocks/blocks_blocks.nim index 7effe8429c..b4b05d0140 100644 --- a/execution_chain/sync/beacon/worker/blocks/blocks_blocks.nim +++ b/execution_chain/sync/beacon/worker/blocks/blocks_blocks.nim @@ -17,13 +17,29 @@ import ../../../../networking/p2p, ../../../wire_protocol/types, ../../worker_desc, - ../update, - ./[blocks_fetch, blocks_helpers, blocks_import, blocks_unproc] + ./[blocks_fetch, blocks_helpers, blocks_unproc] + +import + ./blocks_debug # ------------------------------------------------------------------------------ # Private helpers # ------------------------------------------------------------------------------ +template importBlock( + ctx: BeaconCtxRef; + maybePeer: Opt[BeaconBuddyRef]; + blk: EthBlock; + effPeerID: Hash; + ): Result[Duration,BeaconError] = + ## Async/template + ## + ## Wrapper around `importBlock()` handler + ## + let rc = await ctx.handler.importBlock(ctx, maybePeer, blk, effPeerID) + ctx.handler.syncImportBlock(ctx, maybePeer) # debugging, trace, replay + rc + proc getNthHash(ctx: BeaconCtxRef; blocks: seq[EthBlock]; n: int): Hash32 = ctx.hdrCache.getHash(blocks[n].header.number).valueOr: return zeroHash32 @@ -188,16 +204,17 @@ template blocksImport*( let iv {.inject.} = BnRange.new(blocks[0].header.number, blocks[^1].header.number) doAssert iv.len == blocks.len.uint64 + doAssert ctx.blk.verify() var isError = false block loop: trace info & ": Start importing blocks", peer=maybePeer.toStr, iv, nBlocks=iv.len, base=ctx.chain.baseNumber.bnStr, - head=ctx.chain.latestNumber.bnStr + head=ctx.chain.latestNumber.bnStr, blk=ctx.blk.bnStr for n in 0 ..< blocks.len: let nBn = blocks[n].header.number - discard (await ctx.importBlock(maybePeer, blocks[n], peerID)).valueOr: + ctx.importBlock(maybePeer, blocks[n], peerID).isOkOr: if error.excp != ECancelledError: isError = true @@ -245,7 +262,8 @@ template blocksImport*( nBlocks=(ctx.subState.top - iv.minPt + 1), nFailed=(iv.maxPt - ctx.subState.top), base=ctx.chain.baseNumber.bnStr, head=ctx.chain.latestNumber.bnStr, - target=ctx.subState.head.bnStr, targetHash=ctx.subState.headHash.short + target=ctx.subState.head.bnStr, targetHash=ctx.subState.headHash.short, + blk=ctx.blk.bnStr discard diff --git a/execution_chain/sync/beacon/worker/blocks/blocks_debug.nim b/execution_chain/sync/beacon/worker/blocks/blocks_debug.nim new file mode 100644 index 0000000000..57c38bf006 --- /dev/null +++ b/execution_chain/sync/beacon/worker/blocks/blocks_debug.nim @@ -0,0 +1,67 @@ +# Nimbus +# Copyright (c) 2023-2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises:[].} + +import + std/[sequtils, strutils], + pkg/[chronicles, chronos], + pkg/eth/common, + pkg/stew/[interval_set, sorted_set], + ../../worker_desc + +# ------------------------------------------------------------------------------ +# Public logging functions +# ------------------------------------------------------------------------------ + +func bnStr*(w: BnRangeSet): string = + "{" & w.increasing.toSeq.mapIt(it.bnStr).join(",") & "}" + +func bnStr*(w: StagedBlocksQueue): string = + result = "{" + var rc = w.ge(0) + while rc.isOk: + result &= rc.value.data.blocks.bnStr & "," + rc = w.gt(rc.value.key) + if result[^1] == ',': + result[^1] = '}' + else: + result &= "}" + +func bnStr*(w: BlocksFetchSync): string = + "(" & w.unprocessed.bnStr & + "," & w.borrowed.bnStr & + "," & w.staged.bnStr & + ")" + +proc verify*(blk: BlocksFetchSync): bool = + # Unprocessed intervals must not overlap + for iv in blk.borrowed.increasing: + if 0 < blk.unprocessed.covered(iv): + trace "verify: borrowed and unprocessed overlap", blk=blk.bnStr + return false + # Check stashed against unprocessed intervals + var rc = blk.staged.ge(0) + while rc.isOk: + let + minPt = rc.value.data.blocks[0].header.number + maxPt = rc.value.data.blocks[^1].header.number + if 0 < blk.unprocessed.covered(minPt, maxPt): + trace "verify: staged and unprocessed overlap", blk=blk.bnStr + return false + if 0 < blk.borrowed.covered(minPt, maxPt): + trace "verify: staged and borrowed overlap", blk=blk.bnStr + return false + rc = blk.staged.gt(rc.value.key) + true + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/worker/blocks/blocks_fetch.nim b/execution_chain/sync/beacon/worker/blocks/blocks_fetch.nim index 8aec6c587a..b0cb5b3a78 100644 --- a/execution_chain/sync/beacon/worker/blocks/blocks_fetch.nim +++ b/execution_chain/sync/beacon/worker/blocks/blocks_fetch.nim @@ -18,11 +18,30 @@ import ../../worker_desc, ./blocks_helpers +logScope: + topics = "beacon sync" + +# ------------------------------------------------------------------------------ +# Private helper +# ----------------------------------------------------------------------------- + +template getBlockBodies( + buddy: BeaconBuddyRef; + req: BlockBodiesRequest; + ): Result[FetchBodiesData,BeaconError] = + ## Async/template + ## + ## Wrapper around `getBlockBodies()` handler + ## + let rc = await buddy.ctx.handler.getBlockBodies(buddy, req) + buddy.ctx.handler.syncBlockBodies(buddy) # debugging, sync, replay + rc + # ------------------------------------------------------------------------------ -# Private helpers +# Public handler # ----------------------------------------------------------------------------- -proc getBlockBodies( +proc getBlockBodiesCB*( buddy: BeaconBuddyRef; req: BlockBodiesRequest; ): Future[Result[FetchBodiesData,BeaconError]] @@ -70,7 +89,7 @@ template fetchBodies*( trace trEthSendSendingGetBlockBodies, peer, nReq, bdyErrors=buddy.bdyErrors - let rc = await buddy.getBlockBodies(request) + let rc = buddy.getBlockBodies(request) var elapsed: Duration if rc.isOk: elapsed = rc.value.elapsed diff --git a/execution_chain/sync/beacon/worker/blocks/blocks_import.nim b/execution_chain/sync/beacon/worker/blocks/blocks_import.nim index c8e56c0c98..aa5a43afb9 100644 --- a/execution_chain/sync/beacon/worker/blocks/blocks_import.nim +++ b/execution_chain/sync/beacon/worker/blocks/blocks_import.nim @@ -17,11 +17,14 @@ import ../../worker_desc, ./blocks_helpers +logScope: + topics = "beacon sync" + # ------------------------------------------------------------------------------ -# Public function +# Public handler # ------------------------------------------------------------------------------ -proc importBlock*( +proc importBlockCB*( ctx: BeaconCtxRef; maybePeer: Opt[BeaconBuddyRef]; blk: EthBlock; diff --git a/execution_chain/sync/beacon/worker/headers.nim b/execution_chain/sync/beacon/worker/headers.nim index 91e1257ae6..b360909b6a 100644 --- a/execution_chain/sync/beacon/worker/headers.nim +++ b/execution_chain/sync/beacon/worker/headers.nim @@ -21,6 +21,9 @@ import export headers_queue, headers_unproc +import + ./headers/headers_debug + # ------------------------------------------------------------------------------ # Public functions # ------------------------------------------------------------------------------ @@ -50,6 +53,9 @@ template headersCollect*(buddy: BeaconBuddyRef; info: static[string]) = if ctx.headersUnprocIsEmpty() or ctx.hdrCache.state != collecting: + trace info & ": nothing to do", peer, + unprocEmpty=ctx.headersUnprocIsEmpty(), nStagedQ=ctx.hdr.staged.len, + syncState=($buddy.syncState), nSyncPeers=ctx.pool.nBuddies break body # no action, return var @@ -126,6 +132,7 @@ template headersCollect*(buddy: BeaconBuddyRef; info: static[string]) = # Continue opportunistically fetching by block number rather than hash. # The fetched headers need to be staged and checked/serialised later. if ctx.hdr.staged.len+ctx.hdr.reserveStaged < headersStagedQueueLengthMax: + doAssert ctx.hdr.verify() # Fetch headers ctx.hdr.reserveStaged.inc # Book a slot on `staged` @@ -150,6 +157,8 @@ template headersCollect*(buddy: BeaconBuddyRef; info: static[string]) = # End block: `fetchHeadersBody` + doAssert ctx.hdr.verify() + if nStored == 0 and nQueued == 0: if not ctx.pool.seenData and buddy.peerID notin ctx.pool.failedPeers and @@ -167,7 +176,7 @@ template headersCollect*(buddy: BeaconBuddyRef; info: static[string]) = unprocTop=(if ctx.hdrSessionStopped(): "n/a" else: ctx.headersUnprocAvailTop.bnStr), nQueued, nStored, nStagedQ=ctx.hdr.staged.len, - nSyncPeers=ctx.pool.nBuddies + nSyncPeers=ctx.pool.nBuddies, hdr=(ctx.hdr.bnStr) discard @@ -204,7 +213,8 @@ proc headersUnstage*(buddy: BeaconBuddyRef; info: static[string]): bool = if maxNum + 1 < dangling: trace info & ": gap, serialisation postponed", peer, qItem=qItem.data.revHdrs.bnStr, D=dangling.bnStr, nStored, - nStagedQ=ctx.hdr.staged.len, nSyncPeers=ctx.pool.nBuddies + nStagedQ=ctx.hdr.staged.len, nSyncPeers=ctx.pool.nBuddies, + hdr=(ctx.hdr.bnStr) switchPeer = true # there is a gap -- come back later break @@ -226,12 +236,12 @@ proc headersUnstage*(buddy: BeaconBuddyRef; info: static[string]): bool = if 0 < nStored: info "Headers serialised and stored", D=ctx.hdrCache.antecedent.bnStr, nStored, nStagedQ=ctx.hdr.staged.len, nSyncPeers=ctx.pool.nBuddies, - switchPeer + switchPeer, hdr=(ctx.hdr.bnStr) elif 0 < ctx.hdr.staged.len and not switchPeer: trace info & ": no headers processed", peer, D=ctx.hdrCache.antecedent.bnStr, nStagedQ=ctx.hdr.staged.len, - nSyncPeers=ctx.pool.nBuddies + nSyncPeers=ctx.pool.nBuddies, hdr=(ctx.hdr.bnStr) not switchPeer diff --git a/execution_chain/sync/beacon/worker/headers/headers_debug.nim b/execution_chain/sync/beacon/worker/headers/headers_debug.nim new file mode 100644 index 0000000000..c65292168a --- /dev/null +++ b/execution_chain/sync/beacon/worker/headers/headers_debug.nim @@ -0,0 +1,75 @@ +# Nimbus +# Copyright (c) 2023-2025 Status Research & Development GmbH +# Licensed and distributed under either of +# * MIT license (license terms in the root directory or at +# https://opensource.org/licenses/MIT). +# * Apache v2 license (license terms in the root directory or at +# https://www.apache.org/licenses/LICENSE-2.0). +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises:[].} + +import + std/[sequtils, strutils], + pkg/[chronicles, chronos], + pkg/eth/common, + pkg/stew/[interval_set, sorted_set], + ../../worker_desc + +# ------------------------------------------------------------------------------ +# Private logging helpers +# ------------------------------------------------------------------------------ + +func bnStr(w: seq[Header]): string = + ## Pretty print reverse sequence of headers as interval + if w.len == 0: "n/a" else: (w[^1].number,w[0].number).bnStr + +# ------------------------------------------------------------------------------ +# Public logging functions +# ------------------------------------------------------------------------------ + +func bnStr*(w: BnRangeSet): string = + "{" & w.increasing.toSeq.mapIt(it.bnStr).join(",") & "}" + +func bnStr*(w: StagedHeaderQueue): string = + result = "{" + var rc = w.ge(0) + while rc.isOk: + result &= rc.value.data.revHdrs.bnStr & "," + rc = w.gt(rc.value.key) + if result[^1] == ',': + result[^1] = '}' + else: + result &= "}" + +func bnStr*(w: HeaderFetchSync): string = + "(" & w.unprocessed.bnStr & + "," & w.borrowed.bnStr & + "," & w.staged.bnStr & + ")" + +proc verify*(hdr: HeaderFetchSync): bool = + # Unprocessed intervals must not overlap + for iv in hdr.borrowed.increasing: + if 0 < hdr.unprocessed.covered(iv): + trace "verify: borrowed and unprocessed overlap", hdr=hdr.bnStr + return false + # Check stashed against unprocessed intervals + var rc = hdr.staged.ge(0) + while rc.isOk: + let + minPt = rc.value.data.revHdrs[^1].number + maxPt = rc.value.data.revHdrs[0].number + if 0 < hdr.unprocessed.covered(minPt, maxPt): + trace "verify: staged and unprocessed overlap", hdr=hdr.bnStr + return false + if 0 < hdr.borrowed.covered(minPt, maxPt): + trace "verify: staged and borrowed overlap", hdr=hdr.bnStr + return false + rc = hdr.staged.gt(rc.value.key) + true + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/worker/headers/headers_fetch.nim b/execution_chain/sync/beacon/worker/headers/headers_fetch.nim index 18b4e5d40f..2595c2504f 100644 --- a/execution_chain/sync/beacon/worker/headers/headers_fetch.nim +++ b/execution_chain/sync/beacon/worker/headers/headers_fetch.nim @@ -18,11 +18,33 @@ import ../../worker_desc, ./headers_helpers +import + ./headers_debug + +logScope: + topics = "beacon sync" + # ------------------------------------------------------------------------------ # Private helpers +# ----------------------------------------------------------------------------- + +template getBlockHeaders( + buddy: BeaconBuddyRef; + req: BlockHeadersRequest; + ): Result[FetchHeadersData,BeaconError] = + ## Async/template + ## + ## Wrapper around `getBlockHeaders()` handler + ## + let rc = await buddy.ctx.handler.getBlockHeaders(buddy, req) + buddy.ctx.handler.syncBlockHeaders(buddy) # debugging, sync, replay + rc + +# ------------------------------------------------------------------------------ +# Public handler # ------------------------------------------------------------------------------ -proc getBlockHeaders( +proc getBlockHeadersCB*( buddy: BeaconBuddyRef; req: BlockHeadersRequest; ): Future[Result[FetchHeadersData,BeaconError]] @@ -86,9 +108,10 @@ template fetchHeadersReversed*( number: ivReq.maxPt)) trace trEthSendSendingGetBlockHeaders & " reverse", peer, req=ivReq, - nReq=req.maxResults, hash=topHash.toStr, hdrErrors=buddy.hdrErrors + nReq=req.maxResults, hash=topHash.toStr, hdrErrors=buddy.hdrErrors, + hdr=(buddy.ctx.hdr.bnStr) - let rc = await buddy.getBlockHeaders(req) + let rc = buddy.getBlockHeaders(req) var elapsed: Duration if rc.isOk: elapsed = rc.value.elapsed @@ -107,7 +130,8 @@ template fetchHeadersReversed*( chronicles.info trEthRecvReceivedBlockHeaders & ": error", peer, req=ivReq, nReq=req.maxResults, hash=topHash.toStr, elapsed=rc.error.elapsed.toStr, syncState=($buddy.syncState), - error=rc.error.name, msg=rc.error.msg, hdrErrors=buddy.hdrErrors + error=rc.error.name, msg=rc.error.msg, hdrErrors=buddy.hdrErrors, + hdr=(buddy.ctx.hdr.bnStr) break body # return err() # Evaluate result @@ -115,7 +139,8 @@ template fetchHeadersReversed*( buddy.hdrFetchRegisterError() trace trEthRecvReceivedBlockHeaders, peer, nReq=req.maxResults, hash=topHash.toStr, nResp=0, elapsed=elapsed.toStr, - syncState=($buddy.syncState), hdrErrors=buddy.hdrErrors + syncState=($buddy.syncState), hdrErrors=buddy.hdrErrors, + hdr=(buddy.ctx.hdr.bnStr) break body # return err() let h = rc.value.packet.headers @@ -123,7 +148,8 @@ template fetchHeadersReversed*( buddy.hdrFetchRegisterError() trace trEthRecvReceivedBlockHeaders, peer, nReq=req.maxResults, hash=topHash.toStr, nResp=h.len, elapsed=elapsed.toStr, - syncState=($buddy.syncState), hdrErrors=buddy.hdrErrors + syncState=($buddy.syncState), hdrErrors=buddy.hdrErrors, + hdr=(buddy.ctx.hdr.bnStr) break body # return err() # Verify that first block number matches @@ -132,7 +158,8 @@ template fetchHeadersReversed*( trace trEthRecvReceivedBlockHeaders, peer, nReq=req.maxResults, hash=topHash.toStr, reqMinPt=ivReq.minPt.bnStr, respMinPt=h[^1].bnStr, nResp=h.len, elapsed=elapsed.toStr, - syncState=($buddy.syncState), hdrErrors=buddy.hdrErrors + syncState=($buddy.syncState), hdrErrors=buddy.hdrErrors, + hdr=(buddy.ctx.hdr.bnStr) break body # Ban an overly slow peer for a while when seen in a row. Also there is a @@ -147,7 +174,7 @@ template fetchHeadersReversed*( trace trEthRecvReceivedBlockHeaders, peer, nReq=req.maxResults, hash=topHash.toStr, ivResp=BnRange.new(h[^1].number,h[0].number), nResp=h.len, elapsed=elapsed.toStr, syncState=($buddy.syncState), - hdrErrors=buddy.hdrErrors + hdrErrors=buddy.hdrErrors, hdr=(buddy.ctx.hdr.bnStr) bodyRc = Opt[seq[Header]].ok(h) diff --git a/execution_chain/sync/beacon/worker/headers/headers_headers.nim b/execution_chain/sync/beacon/worker/headers/headers_headers.nim index 3b8d8b7ec8..91d7f3a795 100644 --- a/execution_chain/sync/beacon/worker/headers/headers_headers.nim +++ b/execution_chain/sync/beacon/worker/headers/headers_headers.nim @@ -18,6 +18,9 @@ import ../../worker_desc, ./[headers_fetch, headers_helpers, headers_unproc] +import + ./headers_debug + # ------------------------------------------------------------------------------ # Public helper functions # ------------------------------------------------------------------------------ @@ -133,7 +136,8 @@ proc headersStashOnDisk*( elif revHdrs[^1].number <= dBottom: (dBottom - revHdrs[^1].number) else: revHdrs.len.uint64), base=ctx.chain.baseNumber.bnStr, head=ctx.chain.latestNumber.bnStr, - target=ctx.subState.head.bnStr, targetHash=ctx.subState.headHash.short + target=ctx.subState.head.bnStr, targetHash=ctx.subState.headHash.short, + hdr=ctx.hdr.bnStr ctx.resetHdrProcErrors peerID # reset error count true diff --git a/execution_chain/sync/beacon/worker/start_stop.nim b/execution_chain/sync/beacon/worker/start_stop.nim index af4933449b..3dc3f616e4 100644 --- a/execution_chain/sync/beacon/worker/start_stop.nim +++ b/execution_chain/sync/beacon/worker/start_stop.nim @@ -15,7 +15,7 @@ import ../../../networking/p2p, ../../wire_protocol, ../worker_desc, - ./[blocks, headers, update] + ./[blocks, headers] type SyncStateData = tuple @@ -60,8 +60,8 @@ proc setupServices*(ctx: BeaconCtxRef; info: static[string]) = # Set up the notifier informing when a new syncer session has started. ctx.hdrCache.start proc() = - # Activates the syncer. Work will be picked up by peers when available. - ctx.updateActivateSyncer() + # This directive captures `ctx` for calling the activation handler. + ctx.handler.activate(ctx) # Manual first run? if 0 < ctx.pool.clReq.consHead.number: diff --git a/execution_chain/sync/beacon/worker/update.nim b/execution_chain/sync/beacon/worker/update.nim index 1edd5968a2..46ac92a9b6 100644 --- a/execution_chain/sync/beacon/worker/update.nim +++ b/execution_chain/sync/beacon/worker/update.nim @@ -15,8 +15,11 @@ import pkg/[chronicles, chronos, metrics], pkg/eth/common, ../worker_desc, - ./blocks/blocks_unproc, - ./headers + ./[blocks, headers] + +import + ./blocks/[blocks_debug, blocks_queue], + ./headers/[headers_debug, headers_queue] logScope: topics = "beacon sync" @@ -31,24 +34,6 @@ declareGauge nec_sync_head, "" & # Private functions, state handler helpers # ------------------------------------------------------------------------------ -proc updateSuspendSyncer(ctx: BeaconCtxRef) = - ## Clean up sync target buckets, stop syncer activity, and and get ready - ## for awaiting a new request from the `CL`. - ## - ctx.hdrCache.clear() - - ctx.pool.clReq.reset - ctx.pool.failedPeers.clear() - ctx.pool.seenData = false - - ctx.hibernate = true - - metrics.set(nec_sync_last_block_imported, 0) - metrics.set(nec_sync_head, 0) - - info "Suspending syncer", base=ctx.chain.baseNumber.bnStr, - head=ctx.chain.latestNumber.bnStr, nSyncPeers=ctx.pool.nBuddies - proc commitCollectHeaders(ctx: BeaconCtxRef; info: static[string]): bool = ## Link header chain into `FC` module. Gets ready for block import. ## @@ -65,6 +50,28 @@ proc commitCollectHeaders(ctx: BeaconCtxRef; info: static[string]): bool = proc setupProcessingBlocks(ctx: BeaconCtxRef; info: static[string]) = ## Prepare for blocks processing ## + if not ctx.blocksUnprocIsEmpty() or + not ctx.blocksStagedQueueIsEmpty() or + not ctx.headersUnprocIsEmpty() or + not ctx.headersStagedQueueIsEmpty() or + ctx.subState.top != 0 or + ctx.subState.head != 0 or + ctx.subState.cancelRequest: + error "updateSuspendCB: Oops", blk=ctx.blk.bnStr, hdr=ctx.hdr.bnStr, + syncState=($ctx.syncState) + doAssert ctx.blocksUnprocIsEmpty() + doAssert ctx.blocksStagedQueueIsEmpty() + doAssert ctx.headersUnprocIsEmpty() + doAssert ctx.headersStagedQueueIsEmpty() + doAssert ctx.subState.top == 0 + doAssert ctx.subState.head == 0 + doAssert not ctx.subState.cancelRequest + + #ctx.headersUnprocClear() + #ctx.blocksUnprocClear() + #ctx.headersStagedQueueClear() + #ctx.blocksStagedQueueClear() + # Reset for useles block download detection (to avoid deadlock) ctx.pool.failedPeers.clear() ctx.pool.seenData = false @@ -228,7 +235,7 @@ proc updateSyncState*(ctx: BeaconCtxRef; info: static[string]) = # Final sync scrum layout reached or inconsistent/impossible state if newState == idle: - ctx.updateSuspendSyncer() + ctx.handler.suspend(ctx) proc updateLastBlockImported*(ctx: BeaconCtxRef; bn: BlockNumber) = @@ -239,7 +246,7 @@ proc updateLastBlockImported*(ctx: BeaconCtxRef; bn: BlockNumber) = # Public functions, call-back handler ready # ------------------------------------------------------------------------------ -proc updateActivateSyncer*(ctx: BeaconCtxRef) = +proc updateActivateCB*(ctx: BeaconCtxRef) = ## If in hibernate mode, accept a cache session and activate syncer ## if ctx.hibernate: @@ -268,6 +275,25 @@ proc updateActivateSyncer*(ctx: BeaconCtxRef) = debug "Syncer activation rejected", base=ctx.chain.baseNumber.bnStr, head=ctx.chain.latestNumber.bnStr, state=ctx.hdrCache.state + +proc updateSuspendCB*(ctx: BeaconCtxRef) = + ## Clean up sync target buckets, stop syncer activity, and and get ready + ## for a new sync request from the `CL`. + ## + ctx.hdrCache.clear() + + ctx.pool.clReq.reset + ctx.pool.failedPeers.clear() + ctx.pool.seenData = false + + ctx.hibernate = true + + metrics.set(nec_sync_last_block_imported, 0) + metrics.set(nec_sync_head, 0) + + info "Suspending syncer", base=ctx.chain.baseNumber.bnStr, + head=ctx.chain.latestNumber.bnStr, nSyncPeers=ctx.pool.nBuddies + # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/execution_chain/sync/beacon/worker_const.nim b/execution_chain/sync/beacon/worker_const.nim index 63bf86f22a..55383480fd 100644 --- a/execution_chain/sync/beacon/worker_const.nim +++ b/execution_chain/sync/beacon/worker_const.nim @@ -23,7 +23,7 @@ type SyncState* = enum blocksFinish ## get ready for `idle` const - enableTicker* = false + enableTicker* = false or true ## Log regular status updates similar to metrics. Great for debugging. # ---------------------- diff --git a/execution_chain/sync/beacon/worker_desc.nim b/execution_chain/sync/beacon/worker_desc.nim index ed068eeb20..89bc2ae6fc 100644 --- a/execution_chain/sync/beacon/worker_desc.nim +++ b/execution_chain/sync/beacon/worker_desc.nim @@ -55,6 +55,66 @@ type # ------------------- + ActivateSyncerHdl* = + proc(ctx: BeaconCtxRef) {.gcsafe, raises: [].} + ## Syncer activation function run when notified by header chain cache. + + SuspendSyncerHdl* = proc(ctx: BeaconCtxRef) {.gcsafe, raises: [].} + ## Syncer hibernate function run when the current session fas finished. + + SchedDaemonHdl* = + proc(ctx: BeaconCtxRef): Future[Duration] {.async: (raises: []).} + ## See `runDaemon()` described in `sync_sched.nim` + + SchedStartHdl* = + proc(buddy: BeaconBuddyRef): bool {.gcsafe, raises: [].} + ## See `runStart()` described in `sync_sched.nim` + + SchedStopHdl* = + proc(buddy: BeaconBuddyRef) {.gcsafe, raises: [].} + ## See `runStart()` described in `sync_sched.nim` + + SchedPoolHdl* = + proc(buddy: BeaconBuddyRef; last: bool; laps: int): + bool {.gcsafe, raises: [].} + ## See `runPool()` described in `sync_sched.nim` + + SchedPeerHdl* = + proc(buddy: BeaconBuddyRef): Future[Duration] {.async: (raises: []).} + ## See `runPeer()` described in `sync_sched.nim` + + GetBlockHeadersHdl* = + proc(buddy: BeaconBuddyRef; req: BlockHeadersRequest): + Future[Result[FetchHeadersData,BeaconError]] {.async: (raises: []).} + ## From the ethXX argument peer implied by `buddy` fetch a list of + ## headers. + + SyncBlockHeadersHdl* = + proc(buddy: BeaconBuddyRef) {.gcsafe, raises: [].} + ## Status of syncer after `GetBlockHeadersHdl` + + GetBlockBodiesHdl* = + proc(buddy: BeaconBuddyRef; request: BlockBodiesRequest): + Future[Result[FetchBodiesData,BeaconError]] {.async: (raises: []).} + ## Fetch bodies from the network. + + SyncBlockBodiesHdl* = + proc(buddy: BeaconBuddyRef) {.gcsafe, raises: [].} + ## Status of syncer after `GetBlockBodiesHdl` + + ImportBlockHdl* = + proc(ctx: BeaconCtxRef; maybePeer: Opt[BeaconBuddyRef]; blk: EthBlock; + effPeerID: Hash): + Future[Result[Duration,BeaconError]] {.async: (raises: []).} + ## Import a sinmgle block into `FC` module. + + SyncImportBlockHdl* = + proc(ctx: BeaconCtxRef; maybePeer: Opt[BeaconBuddyRef]) + {.gcsafe, raises: [].} + ## Status of syncer after `ImportBlockHdl` + + # ------------------- + BnRangeSet* = IntervalSetRef[BlockNumber,uint64] ## Disjunct sets of block number intervals @@ -127,6 +187,25 @@ type ## Local descriptor data extension nRespErrors*: BuddyError ## Number of errors/slow responses in a row + + BeaconHandlersRef* = ref object of RootRef + ## Selected handlers that can be replaced for tracing. The version number + ## allows to identify overlays. + version*: int ## Overlay version unless 0 (i.e. base=0) + activate*: ActivateSyncerHdl ## Allows for redirect (e.g. tracing) + suspend*: SuspendSyncerHdl ## Ditto + schedDaemon*: SchedDaemonHdl ## ... + schedStart*: SchedStartHdl + schedStop*: SchedStopHdl + schedPool*: SchedPoolHdl + schedPeer*: SchedPeerHdl + getBlockHeaders*: GetBlockHeadersHdl + syncBlockHeaders*: SyncBlockHeadersHdl + getBlockBodies*: GetBlockBodiesHdl + syncBlockBodies*: SyncBlockBodiesHdl + importBlock*: ImportBlockHdl + syncImportBlock*: SyncImportBlockHdl + BeaconCtxData* = object ## Globally shared data extension nBuddies*: int ## Number of active workers @@ -139,6 +218,7 @@ type chain*: ForkedChainRef ## Core database, FCU support hdrCache*: HeaderChainRef ## Currently in tandem with `chain` + handlers*: BeaconHandlersRef ## Allows for redirect (e.g. tracing) # Info, debugging, and error handling stuff nProcError*: Table[Hash,BuddyError] ## Per peer processing error @@ -176,6 +256,10 @@ func hdrCache*(ctx: BeaconCtxRef): HeaderChainRef = ## Shortcut ctx.pool.hdrCache +func handler*(ctx: BeaconCtxRef): BeaconHandlersRef = + ## Shortcut + ctx.pool.handlers + # ----- func hibernate*(ctx: BeaconCtxRef): bool = diff --git a/execution_chain/sync/sync_desc.nim b/execution_chain/sync/sync_desc.nim index ffeb14ae13..0b14c4f0b7 100644 --- a/execution_chain/sync/sync_desc.nim +++ b/execution_chain/sync/sync_desc.nim @@ -30,7 +30,7 @@ type ## Control and state settings runState: BuddyRunState ## Access with getters - BuddyRef*[S,W] = ref object + BuddyRef*[S,W] = ref object of RootRef ## Worker peer state descriptor. ctx*: CtxRef[S] ## Shared data descriptor back reference peer*: Peer ## Reference to eth `p2p` protocol entry