Skip to content

Commit fc0f484

Browse files
Merge pull request #6013 from oasisprotocol/martin/feature/show-history-reindex-round
go/control: Show runtime block history indexer status
2 parents 7f5edbd + 2a6fedc commit fc0f484

File tree

5 files changed

+159
-43
lines changed

5 files changed

+159
-43
lines changed

.changelog/5998.feature.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
go/oasis-node: Display runtime block history indexer status
2+
3+
A new field `indexer` has been added to the `oasis-node control status`
4+
output under the runtime status section. Unless keymanager runtime,
5+
this field displays:
6+
7+
1. The status of runtime block history indexer.
8+
2. The last indexed round.
9+
10+
Additionally, if history reindex is in progress, it also includes:
11+
12+
- Batch size that is used during reindex.
13+
- Last consensus height that was indexed.
14+
- Start and end heights of reindex range.
15+
- ETA field, which specifies expected time of reindex completion.
16+
17+
This is useful for the node operators, so that they can estimate when their
18+
node will be ready to accept runtime work.

go/control/api/api.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
registry "github.com/oasisprotocol/oasis-core/go/registry/api"
1919
block "github.com/oasisprotocol/oasis-core/go/roothash/api/block"
2020
"github.com/oasisprotocol/oasis-core/go/runtime/bundle/component"
21+
"github.com/oasisprotocol/oasis-core/go/runtime/history"
2122
storage "github.com/oasisprotocol/oasis-core/go/storage/api"
2223
upgrade "github.com/oasisprotocol/oasis-core/go/upgrade/api"
2324
commonWorker "github.com/oasisprotocol/oasis-core/go/worker/common/api"
@@ -184,13 +185,14 @@ type RuntimeStatus struct {
184185
// LastRetainedHash is the hash of the oldest retained block.
185186
LastRetainedHash hash.Hash `json:"last_retained_hash"`
186187

187-
// Committee contains the runtime worker status in case this node is a (candidate) member of a
188-
// runtime committee.
188+
// Committee contains the runtime common committee worker status.
189189
Committee *commonWorker.Status `json:"committee"`
190190
// Executor contains the executor worker status in case this node is an executor node.
191191
Executor *executorWorker.Status `json:"executor,omitempty"`
192192
// Storage contains the storage worker status in case this node is a storage node.
193193
Storage *storageWorker.Status `json:"storage,omitempty"`
194+
// Indexer contains the runtime history indexer status in case this runtime has a block indexer.
195+
Indexer *history.IndexerStatus `json:"indexer,omitempty"`
194196

195197
// Provisioner is the name of the runtime provisioner.
196198
Provisioner string `json:"provisioner,omitempty"`

go/oasis-node/cmd/node/node_control.go

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr
208208
continue
209209
}
210210

211+
logger := n.logger.With("runtime_id", rt.ID())
212+
211213
var status control.RuntimeStatus
212214

213215
// Fetch runtime registry descriptor. Do not wait too long for the descriptor to become
@@ -221,10 +223,7 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr
221223
case context.DeadlineExceeded:
222224
// The descriptor may not yet be available. It is fine if we use nil in this case.
223225
default:
224-
n.logger.Error("failed to fetch registry descriptor",
225-
"err", err,
226-
"runtime_id", rt.ID(),
227-
)
226+
logger.Error("failed to fetch registry descriptor", "err", err)
228227
}
229228

230229
// Fetch latest block as seen by this node.
@@ -244,10 +243,7 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr
244243
Hash: blk.Header.StateRoot,
245244
}
246245
default:
247-
n.logger.Error("failed to fetch latest runtime block",
248-
"err", err,
249-
"runtime_id", rt.ID(),
250-
)
246+
logger.Error("failed to fetch latest runtime block", "err", err)
251247
}
252248

253249
// Fetch latest genesis block as seen by this node.
@@ -260,10 +256,7 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr
260256
status.GenesisRound = blk.Header.Round
261257
status.GenesisHash = blk.Header.EncodedHash()
262258
default:
263-
n.logger.Error("failed to fetch genesis runtime block",
264-
"err", err,
265-
"runtime_id", rt.ID(),
266-
)
259+
logger.Error("failed to fetch genesis runtime block", "err", err)
267260
}
268261

269262
// Fetch the oldest retained block.
@@ -273,20 +266,15 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr
273266
status.LastRetainedRound = blk.Header.Round
274267
status.LastRetainedHash = blk.Header.EncodedHash()
275268
default:
276-
n.logger.Error("failed to fetch last retained runtime block",
277-
"err", err,
278-
"runtime_id", rt.ID(),
279-
)
269+
logger.Error("failed to fetch last retained runtime block", "err", err)
280270
}
281271

282272
// Take storage into account for last retained round.
283273
if config.GlobalConfig.Mode.HasLocalStorage() {
284274
lsb, ok := rt.Storage().(storage.LocalBackend)
285275
switch ok {
286276
case false:
287-
n.logger.Error("local storage backend expected",
288-
"runtime_id", rt.ID(),
289-
)
277+
logger.Error("local storage backend expected")
290278
default:
291279
// Update last retained round if storage earliest round is higher.
292280
if earliest := lsb.NodeDB().GetEarliestVersion(); earliest > status.LastRetainedRound {
@@ -296,10 +284,9 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr
296284
status.LastRetainedRound = blk.Header.Round
297285
status.LastRetainedHash = blk.Header.EncodedHash()
298286
default:
299-
n.logger.Error("failed to fetch runtime block",
287+
logger.Error("failed to fetch runtime block",
300288
"err", err,
301289
"round", earliest,
302-
"runtime_id", rt.ID(),
303290
)
304291
}
305292

@@ -312,35 +299,31 @@ func (n *Node) getRuntimeStatus(ctx context.Context) (map[common.Namespace]contr
312299
if rtNode := n.CommonWorker.GetRuntime(rt.ID()); rtNode != nil {
313300
status.Committee, err = rtNode.GetStatus()
314301
if err != nil {
315-
n.logger.Error("failed to fetch common committee worker status",
316-
"err", err,
317-
"runtime_id", rt.ID(),
318-
)
302+
logger.Error("failed to fetch common committee worker status", "err", err)
319303
}
320304
}
321305

322306
// Fetch executor worker status.
323307
if execNode := n.ExecutorWorker.GetRuntime(rt.ID()); execNode != nil {
324308
status.Executor, err = execNode.GetStatus()
325309
if err != nil {
326-
n.logger.Error("failed to fetch executor worker status",
327-
"err", err,
328-
"runtime_id", rt.ID(),
329-
)
310+
logger.Error("failed to fetch executor worker status", "err", err)
330311
}
331312
}
332313

333314
// Fetch storage worker status.
334315
if storageNode := n.StorageWorker.GetRuntime(rt.ID()); storageNode != nil {
335316
status.Storage, err = storageNode.GetStatus(ctx)
336317
if err != nil {
337-
n.logger.Error("failed to fetch storage worker status",
338-
"err", err,
339-
"runtime_id", rt.ID(),
340-
)
318+
logger.Error("failed to fetch storage worker status", "err", err)
341319
}
342320
}
343321

322+
// Fetch history indexer status.
323+
if indexer, ok := n.RuntimeRegistry.Indexer(rt.ID()); ok {
324+
status.Indexer = indexer.Status()
325+
}
326+
344327
// Fetch provisioner type.
345328
status.Provisioner = n.Provisioner.Name()
346329

go/runtime/history/indexer.go

Lines changed: 106 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,61 @@ import (
1515
roothash "github.com/oasisprotocol/oasis-core/go/roothash/api"
1616
)
1717

18+
const (
19+
statusStarted = "started"
20+
statusReindexing = "reindexing"
21+
statusIndexing = "indexing"
22+
statusStopped = "stopped"
23+
)
24+
25+
// IndexerStatus is the status of runtime history indexer.
26+
type IndexerStatus struct {
27+
// Status is the concise status of runtime history indexer state.
28+
Status string `json:"status"`
29+
30+
// LastRound is the last runtime round that was indexed.
31+
LastRound uint64 `json:"last_round"`
32+
33+
// ReindexStatus is history reindex status.
34+
//
35+
// It is nil unless during history reindex.
36+
ReindexStatus *ReindexStatus `json:"reindex_status,omitempty"`
37+
}
38+
39+
type ReindexStatus struct {
40+
// BatchSize is the number of blocks to reindex in a single batch.
41+
BatchSize uint16 `json:"batch_size"`
42+
// LastHeight is the last consensus height that was indexed.
43+
LastHeight int64 `json:"last_height"`
44+
// StartHeight is the first height of history reindex interval.
45+
StartHeight int64 `json:"start_height"`
46+
// EndHeight is the last height of history reindex interval.
47+
EndHeight int64 `json:"end_height"`
48+
// ETA is expected time of history reindex completition.
49+
ETA time.Time `json:"eta"`
50+
}
51+
1852
const (
1953
maxPendingBlocks = 10
2054
)
2155

2256
// BlockIndexer is responsible for indexing and committing finalized
2357
// runtime blocks from the consensus into the runtime history.
2458
type BlockIndexer struct {
59+
mu sync.RWMutex
2560
startOne cmSync.One
2661

2762
consensus consensus.Backend
2863
history History
2964
batchSize uint16
3065

66+
status string
67+
lastHeight int64
68+
startHeight int64
69+
endHeight int64
70+
lastRound uint64
71+
started time.Time
72+
3173
logger *logging.Logger
3274
}
3375

@@ -54,9 +96,41 @@ func (bi *BlockIndexer) Stop() {
5496
bi.startOne.TryStop()
5597
}
5698

99+
// Status returns runtime block history indexer status.
100+
func (bi *BlockIndexer) Status() *IndexerStatus {
101+
bi.mu.RLock()
102+
defer bi.mu.RUnlock()
103+
104+
status := &IndexerStatus{
105+
Status: bi.status,
106+
LastRound: bi.lastRound,
107+
}
108+
109+
if bi.status != statusReindexing {
110+
return status
111+
}
112+
113+
elapsed := time.Since(bi.started).Milliseconds()
114+
remaining := elapsed * (bi.endHeight - bi.lastHeight) / max((bi.lastHeight-bi.startHeight+1), 1)
115+
eta := time.Now().Add(time.Duration(remaining) * time.Millisecond)
116+
status.ReindexStatus = &ReindexStatus{
117+
BatchSize: bi.batchSize,
118+
LastHeight: bi.lastHeight,
119+
StartHeight: bi.startHeight,
120+
EndHeight: bi.endHeight,
121+
ETA: eta,
122+
}
123+
124+
return status
125+
}
126+
57127
func (bi *BlockIndexer) run(ctx context.Context) {
58128
bi.logger.Info("starting")
59129

130+
bi.mu.Lock()
131+
bi.status = statusStarted
132+
bi.mu.Unlock()
133+
60134
// Subscribe to new runtime blocks.
61135
blkCh, blkSub, err := bi.consensus.RootHash().WatchBlocks(ctx, bi.history.RuntimeID())
62136
if err != nil {
@@ -86,11 +160,19 @@ func (bi *BlockIndexer) run(ctx context.Context) {
86160
// Index new blocks.
87161
bi.index(ctx, blkCh)
88162
bi.logger.Info("stopping")
163+
164+
bi.mu.Lock()
165+
bi.status = statusStopped
166+
bi.mu.Unlock()
89167
}
90168

91169
func (bi *BlockIndexer) index(ctx context.Context, blkCh <-chan *roothash.AnnotatedBlock) {
92170
bi.logger.Debug("indexing")
93171

172+
bi.mu.Lock()
173+
bi.status = statusIndexing
174+
bi.mu.Unlock()
175+
94176
retry := time.Duration(math.MaxInt64)
95177
boff := cmnBackoff.NewExponentialBackOff()
96178
boff.Reset()
@@ -216,23 +298,33 @@ func (bi *BlockIndexer) reindexTo(ctx context.Context, height int64) error {
216298
)
217299
return fmt.Errorf("failed to get last indexed height: %w", err)
218300
}
219-
lastHeight++ // +1 since we want the last non-seen height.
301+
startHeight := lastHeight + 1 // +1 since we want the last non-seen height.
220302

221303
lastRetainedHeight, err := bi.consensus.GetLastRetainedHeight(ctx)
222304
if err != nil {
223305
return fmt.Errorf("failed to get last retained height: %w", err)
224306
}
225307

226-
if lastHeight < lastRetainedHeight {
308+
if startHeight < lastRetainedHeight {
227309
bi.logger.Debug("skipping pruned heights",
228310
"last_retained_height", lastRetainedHeight,
229-
"last_height", lastHeight,
311+
"start_height", startHeight,
230312
)
231-
lastHeight = lastRetainedHeight
313+
startHeight = lastRetainedHeight
314+
}
315+
316+
bi.mu.Lock()
317+
bi.status = statusReindexing
318+
bi.endHeight = height
319+
if bi.startHeight == 0 {
320+
bi.lastHeight = lastHeight
321+
bi.startHeight = startHeight
322+
bi.started = time.Now()
232323
}
324+
bi.mu.Unlock()
233325

234326
batchSize := int64(bi.batchSize)
235-
for start := lastHeight; start <= height; start += batchSize {
327+
for start := startHeight; start <= height; start += batchSize {
236328
end := min(start+batchSize-1, height)
237329
if err = bi.reindexRange(ctx, start, end); err != nil {
238330
return fmt.Errorf("failed to reindex batch: %w", err)
@@ -291,6 +383,10 @@ func (bi *BlockIndexer) reindexRange(ctx context.Context, start int64, end int64
291383
return err
292384
}
293385

386+
bi.mu.Lock()
387+
bi.lastHeight = end
388+
bi.mu.Unlock()
389+
294390
bi.logger.Debug("block reindex completed")
295391
return nil
296392
}
@@ -312,6 +408,11 @@ func (bi *BlockIndexer) commitBlocks(blocks []*roothash.AnnotatedBlock) error {
312408
return fmt.Errorf("failed to commit blocks: %w", err)
313409
}
314410

411+
bi.mu.Lock()
412+
defer bi.mu.Unlock()
413+
lastBlk := blocks[len(blocks)-1]
414+
bi.lastRound = lastBlk.Block.Header.Round
415+
315416
return nil
316417
}
317418

0 commit comments

Comments
 (0)