Skip to content

Commit 5bef208

Browse files
committed
triedb/pathdb: optimize history indexing efficiency
1 parent 8d1b1c2 commit 5bef208

File tree

6 files changed

+335
-86
lines changed

6 files changed

+335
-86
lines changed

triedb/pathdb/history_index.go

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -163,12 +163,15 @@ type indexWriter struct {
163163
db ethdb.KeyValueReader
164164
}
165165

166-
// newIndexWriter constructs the index writer for the specified state.
167-
func newIndexWriter(db ethdb.KeyValueReader, state stateIdent) (*indexWriter, error) {
166+
// newIndexWriter constructs the index writer for the specified state. Additionally,
167+
// it takes an element ID and prunes all existing elements above that ID. It's
168+
// essential as the recovery mechanism after unclean shutdown during the history
169+
// indexing.
170+
func newIndexWriter(db ethdb.KeyValueReader, state stateIdent, lastID uint64) (*indexWriter, error) {
168171
blob := readStateIndex(state, db)
169172
if len(blob) == 0 {
170173
desc := newIndexBlockDesc(0)
171-
bw, _ := newBlockWriter(nil, desc)
174+
bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */)
172175
return &indexWriter{
173176
descList: []*indexBlockDesc{desc},
174177
bw: bw,
@@ -180,15 +183,26 @@ func newIndexWriter(db ethdb.KeyValueReader, state stateIdent) (*indexWriter, er
180183
if err != nil {
181184
return nil, err
182185
}
186+
// Trim trailing blocks whose elements all exceed the limit.
187+
for i := len(descList) - 1; i > 0; i-- {
188+
if descList[i].max <= lastID {
189+
break
190+
}
191+
if descList[i-1].max >= lastID {
192+
descList = descList[:i]
193+
}
194+
}
195+
196+
// Take the last block for appending new elements
183197
lastDesc := descList[len(descList)-1]
184198
indexBlock := readStateIndexBlock(state, db, lastDesc.id)
185-
bw, err := newBlockWriter(indexBlock, lastDesc)
199+
bw, err := newBlockWriter(indexBlock, lastDesc, lastID)
186200
if err != nil {
187201
return nil, err
188202
}
189203
return &indexWriter{
190204
descList: descList,
191-
lastID: lastDesc.max,
205+
lastID: bw.last(),
192206
bw: bw,
193207
state: state,
194208
db: db,
@@ -221,7 +235,7 @@ func (w *indexWriter) rotate() error {
221235
desc = newIndexBlockDesc(w.bw.desc.id + 1)
222236
)
223237
w.frozen = append(w.frozen, w.bw)
224-
w.bw, err = newBlockWriter(nil, desc)
238+
w.bw, err = newBlockWriter(nil, desc, 0 /* useless if the block is empty */)
225239
if err != nil {
226240
return err
227241
}
@@ -271,13 +285,13 @@ type indexDeleter struct {
271285
}
272286

273287
// newIndexDeleter constructs the index deleter for the specified state.
274-
func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent) (*indexDeleter, error) {
288+
func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent, lastID uint64) (*indexDeleter, error) {
275289
blob := readStateIndex(state, db)
276290
if len(blob) == 0 {
277291
// TODO(rjl493456442) we can probably return an error here,
278292
// deleter with no data is meaningless.
279293
desc := newIndexBlockDesc(0)
280-
bw, _ := newBlockWriter(nil, desc)
294+
bw, _ := newBlockWriter(nil, desc, 0 /* useless if the block is empty */)
281295
return &indexDeleter{
282296
descList: []*indexBlockDesc{desc},
283297
bw: bw,
@@ -289,15 +303,26 @@ func newIndexDeleter(db ethdb.KeyValueReader, state stateIdent) (*indexDeleter,
289303
if err != nil {
290304
return nil, err
291305
}
306+
// Trim trailing blocks whose elements all exceed the limit.
307+
for i := len(descList) - 1; i > 0; i-- {
308+
if descList[i].max <= lastID {
309+
break
310+
}
311+
if descList[i-1].max >= lastID {
312+
descList = descList[:i]
313+
}
314+
}
315+
316+
// Take the block for deleting element from
292317
lastDesc := descList[len(descList)-1]
293318
indexBlock := readStateIndexBlock(state, db, lastDesc.id)
294-
bw, err := newBlockWriter(indexBlock, lastDesc)
319+
bw, err := newBlockWriter(indexBlock, lastDesc, lastID)
295320
if err != nil {
296321
return nil, err
297322
}
298323
return &indexDeleter{
299324
descList: descList,
300-
lastID: lastDesc.max,
325+
lastID: bw.last(),
301326
bw: bw,
302327
state: state,
303328
db: db,
@@ -337,7 +362,7 @@ func (d *indexDeleter) pop(id uint64) error {
337362
// Open the previous block writer for deleting
338363
lastDesc := d.descList[len(d.descList)-1]
339364
indexBlock := readStateIndexBlock(d.state, d.db, lastDesc.id)
340-
bw, err := newBlockWriter(indexBlock, lastDesc)
365+
bw, err := newBlockWriter(indexBlock, lastDesc, lastDesc.max)
341366
if err != nil {
342367
return err
343368
}

triedb/pathdb/history_index_block.go

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,15 @@ import (
2121
"errors"
2222
"fmt"
2323
"math"
24+
25+
"github.com/ethereum/go-ethereum/log"
2426
)
2527

2628
const (
27-
indexBlockDescSize = 14 // The size of index block descriptor
28-
indexBlockEntriesCap = 4096 // The maximum number of entries can be grouped in a block
29-
indexBlockRestartLen = 256 // The restart interval length of index block
30-
historyIndexBatch = 512 * 1024 // The number of state history indexes for constructing or deleting as batch
29+
indexBlockDescSize = 14 // The size of index block descriptor
30+
indexBlockEntriesCap = 4096 // The maximum number of entries can be grouped in a block
31+
indexBlockRestartLen = 256 // The restart interval length of index block
32+
historyIndexBatch = 8 * 1024 * 1024 // The number of state history indexes for constructing or deleting as batch
3133
)
3234

3335
// indexBlockDesc represents a descriptor for an index block, which contains a
@@ -180,7 +182,11 @@ type blockWriter struct {
180182
data []byte // Aggregated encoded data slice
181183
}
182184

183-
func newBlockWriter(blob []byte, desc *indexBlockDesc) (*blockWriter, error) {
185+
// newBlockWriter constructs a block writer. In addition to the existing data
186+
// and block description, it takes an element ID and prunes all existing elements
187+
// above that ID. It's essential as the recovery mechanism after unclean shutdown
188+
// during the history indexing.
189+
func newBlockWriter(blob []byte, desc *indexBlockDesc, lastID uint64) (*blockWriter, error) {
184190
if len(blob) == 0 {
185191
return &blockWriter{
186192
desc: desc,
@@ -191,11 +197,22 @@ func newBlockWriter(blob []byte, desc *indexBlockDesc) (*blockWriter, error) {
191197
if err != nil {
192198
return nil, err
193199
}
194-
return &blockWriter{
200+
writer := &blockWriter{
195201
desc: desc,
196202
restarts: restarts,
197203
data: data, // safe to own the slice
198-
}, nil
204+
}
205+
var trimmed int
206+
for !writer.empty() && writer.last() > lastID {
207+
if err := writer.pop(writer.last()); err != nil {
208+
return nil, err
209+
}
210+
trimmed += 1
211+
}
212+
if trimmed > 0 {
213+
log.Debug("Truncated extraneous elements", "count", trimmed, "lastID", lastID)
214+
}
215+
return writer, nil
199216
}
200217

201218
// append adds a new element to the block. The new element must be greater than
@@ -271,6 +288,7 @@ func (b *blockWriter) sectionLast(section int) uint64 {
271288

272289
// sectionSearch looks up the specified value in the given section,
273290
// the position and the preceding value will be returned if found.
291+
// It assumes that the preceding element exists in the section.
274292
func (b *blockWriter) sectionSearch(section int, n uint64) (found bool, prev uint64, pos int) {
275293
b.scanSection(section, func(v uint64, p int) bool {
276294
if n == v {
@@ -295,7 +313,6 @@ func (b *blockWriter) pop(id uint64) error {
295313
}
296314
// If there is only one entry left, the entire block should be reset
297315
if b.desc.entries == 1 {
298-
//b.desc.min = 0
299316
b.desc.max = 0
300317
b.desc.entries = 0
301318
b.restarts = nil
@@ -331,6 +348,15 @@ func (b *blockWriter) full() bool {
331348
return b.desc.full()
332349
}
333350

351+
// last returns the last element in the block. It should only be called when
352+
// writer is not empty, otherwise the returned data is meaningless.
353+
func (b *blockWriter) last() uint64 {
354+
if b.empty() {
355+
return 0
356+
}
357+
return b.desc.max
358+
}
359+
334360
// finish finalizes the index block encoding by appending the encoded restart points
335361
// and the restart counter to the end of the block.
336362
//

triedb/pathdb/history_index_block_test.go

Lines changed: 84 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ func TestBlockReaderBasic(t *testing.T) {
2828
elements := []uint64{
2929
1, 5, 10, 11, 20,
3030
}
31-
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0))
31+
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
3232
for i := 0; i < len(elements); i++ {
3333
bw.append(elements[i])
3434
}
@@ -66,7 +66,7 @@ func TestBlockReaderLarge(t *testing.T) {
6666
}
6767
slices.Sort(elements)
6868

69-
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0))
69+
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
7070
for i := 0; i < len(elements); i++ {
7171
bw.append(elements[i])
7272
}
@@ -95,19 +95,21 @@ func TestBlockReaderLarge(t *testing.T) {
9595
}
9696

9797
func TestBlockWriterBasic(t *testing.T) {
98-
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0))
98+
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
9999
if !bw.empty() {
100100
t.Fatal("expected empty block")
101101
}
102102
bw.append(2)
103103
if err := bw.append(1); err == nil {
104104
t.Fatal("out-of-order insertion is not expected")
105105
}
106+
var maxElem uint64
106107
for i := 0; i < 10; i++ {
107108
bw.append(uint64(i + 3))
109+
maxElem = uint64(i + 3)
108110
}
109111

110-
bw, err := newBlockWriter(bw.finish(), newIndexBlockDesc(0))
112+
bw, err := newBlockWriter(bw.finish(), newIndexBlockDesc(0), maxElem)
111113
if err != nil {
112114
t.Fatalf("Failed to construct the block writer, %v", err)
113115
}
@@ -119,8 +121,71 @@ func TestBlockWriterBasic(t *testing.T) {
119121
bw.finish()
120122
}
121123

124+
func TestBlockWriterWithLimit(t *testing.T) {
125+
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
126+
127+
var maxElem uint64
128+
for i := 0; i < indexBlockRestartLen*2; i++ {
129+
bw.append(uint64(i + 1))
130+
maxElem = uint64(i + 1)
131+
}
132+
133+
suites := []struct {
134+
limit uint64
135+
expMax uint64
136+
}{
137+
// nothing to truncate
138+
{
139+
maxElem, maxElem,
140+
},
141+
// truncate the last element
142+
{
143+
maxElem - 1, maxElem - 1,
144+
},
145+
// truncation around the restart boundary
146+
{
147+
uint64(indexBlockRestartLen + 1),
148+
uint64(indexBlockRestartLen + 1),
149+
},
150+
// truncation around the restart boundary
151+
{
152+
uint64(indexBlockRestartLen),
153+
uint64(indexBlockRestartLen),
154+
},
155+
{
156+
uint64(1), uint64(1),
157+
},
158+
// truncate the entire block, it's in theory invalid
159+
{
160+
uint64(0), uint64(0),
161+
},
162+
}
163+
for i, suite := range suites {
164+
desc := *bw.desc
165+
block, err := newBlockWriter(bw.finish(), &desc, suite.limit)
166+
if err != nil {
167+
t.Fatalf("Failed to construct the block writer, %v", err)
168+
}
169+
if block.desc.max != suite.expMax {
170+
t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, block.desc.max, suite.expMax)
171+
}
172+
173+
// Re-fill the elements
174+
var maxElem uint64
175+
for elem := suite.limit + 1; elem < indexBlockRestartLen*4; elem++ {
176+
if err := block.append(elem); err != nil {
177+
t.Fatalf("Failed to append value %d: %v", elem, err)
178+
}
179+
maxElem = elem
180+
}
181+
if block.desc.max != maxElem {
182+
t.Fatalf("Test %d, unexpected max value, got %d, want %d", i, block.desc.max, maxElem)
183+
}
184+
}
185+
}
186+
122187
func TestBlockWriterDelete(t *testing.T) {
123-
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0))
188+
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
124189
for i := 0; i < 10; i++ {
125190
bw.append(uint64(i + 1))
126191
}
@@ -147,7 +212,7 @@ func TestBlcokWriterDeleteWithData(t *testing.T) {
147212
elements := []uint64{
148213
1, 5, 10, 11, 20,
149214
}
150-
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0))
215+
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
151216
for i := 0; i < len(elements); i++ {
152217
bw.append(elements[i])
153218
}
@@ -158,7 +223,7 @@ func TestBlcokWriterDeleteWithData(t *testing.T) {
158223
max: 20,
159224
entries: 5,
160225
}
161-
bw, err := newBlockWriter(bw.finish(), desc)
226+
bw, err := newBlockWriter(bw.finish(), desc, elements[len(elements)-1])
162227
if err != nil {
163228
t.Fatalf("Failed to construct block writer %v", err)
164229
}
@@ -201,15 +266,18 @@ func TestBlcokWriterDeleteWithData(t *testing.T) {
201266
}
202267

203268
func TestCorruptedIndexBlock(t *testing.T) {
204-
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0))
269+
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
270+
271+
var maxElem uint64
205272
for i := 0; i < 10; i++ {
206273
bw.append(uint64(i + 1))
274+
maxElem = uint64(i + 1)
207275
}
208276
buf := bw.finish()
209277

210278
// Mutate the buffer manually
211279
buf[len(buf)-1]++
212-
_, err := newBlockWriter(buf, newIndexBlockDesc(0))
280+
_, err := newBlockWriter(buf, newIndexBlockDesc(0), maxElem)
213281
if err == nil {
214282
t.Fatal("Corrupted index block data is not detected")
215283
}
@@ -218,7 +286,7 @@ func TestCorruptedIndexBlock(t *testing.T) {
218286
// BenchmarkParseIndexBlock benchmarks the performance of parseIndexBlock.
219287
func BenchmarkParseIndexBlock(b *testing.B) {
220288
// Generate a realistic index block blob
221-
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0))
289+
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0), 0)
222290
for i := 0; i < 4096; i++ {
223291
bw.append(uint64(i * 2))
224292
}
@@ -238,13 +306,15 @@ func BenchmarkBlockWriterAppend(b *testing.B) {
238306
b.ReportAllocs()
239307
b.ResetTimer()
240308

241-
desc := newIndexBlockDesc(0)
242-
writer, _ := newBlockWriter(nil, desc)
309+
var blockID uint32
310+
desc := newIndexBlockDesc(blockID)
311+
writer, _ := newBlockWriter(nil, desc, 0)
243312

244313
for i := 0; i < b.N; i++ {
245314
if writer.full() {
246-
desc = newIndexBlockDesc(0)
247-
writer, _ = newBlockWriter(nil, desc)
315+
blockID += 1
316+
desc = newIndexBlockDesc(blockID)
317+
writer, _ = newBlockWriter(nil, desc, 0)
248318
}
249319
if err := writer.append(writer.desc.max + 1); err != nil {
250320
b.Error(err)

0 commit comments

Comments
 (0)