diff --git a/triedb/pathdb/history_index.go b/triedb/pathdb/history_index.go index 5b4c91d7e61..87b6e377afe 100644 --- a/triedb/pathdb/history_index.go +++ b/triedb/pathdb/history_index.go @@ -20,7 +20,6 @@ import ( "errors" "fmt" "math" - "sort" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" @@ -119,30 +118,34 @@ func (r *indexReader) refresh() error { return nil } +// newIterator creates an iterator for traversing the index entries. +func (r *indexReader) newIterator() *indexIterator { + return newIndexIterator(r.descList, func(id uint32) (*blockReader, error) { + br, ok := r.readers[id] + if !ok { + var err error + br, err = newBlockReader(readStateIndexBlock(r.state, r.db, id)) + if err != nil { + return nil, err + } + r.readers[id] = br + } + return br, nil + }) +} + // readGreaterThan locates the first element that is greater than the specified // id. If no such element is found, MaxUint64 is returned. func (r *indexReader) readGreaterThan(id uint64) (uint64, error) { - index := sort.Search(len(r.descList), func(i int) bool { - return id < r.descList[i].max - }) - if index == len(r.descList) { - return math.MaxUint64, nil + it := r.newIterator() + found := it.SeekGT(id) + if err := it.Error(); err != nil { + return 0, err } - desc := r.descList[index] - - br, ok := r.readers[desc.id] - if !ok { - var err error - blob := readStateIndexBlock(r.state, r.db, desc.id) - br, err = newBlockReader(blob) - if err != nil { - return 0, err - } - r.readers[desc.id] = br + if !found { + return math.MaxUint64, nil } - // The supplied ID is not greater than block.max, ensuring that an element - // satisfying the condition can be found. - return br.readGreaterThan(id) + return it.ID(), nil } // indexWriter is responsible for writing index data for a specific state (either diff --git a/triedb/pathdb/history_index_block.go b/triedb/pathdb/history_index_block.go index 5abdee682ad..7b59c8e8821 100644 --- a/triedb/pathdb/history_index_block.go +++ b/triedb/pathdb/history_index_block.go @@ -21,7 +21,6 @@ import ( "errors" "fmt" "math" - "sort" ) const ( @@ -164,58 +163,15 @@ func newBlockReader(blob []byte) (*blockReader, error) { // readGreaterThan locates the first element in the block that is greater than // the specified value. If no such element is found, MaxUint64 is returned. func (br *blockReader) readGreaterThan(id uint64) (uint64, error) { - var err error - index := sort.Search(len(br.restarts), func(i int) bool { - item, n := binary.Uvarint(br.data[br.restarts[i]:]) - if n <= 0 { - err = fmt.Errorf("failed to decode item at restart %d", br.restarts[i]) - } - return item > id - }) - if err != nil { + it := newBlockIterator(br.data, br.restarts) + found := it.SeekGT(id) + if err := it.Error(); err != nil { return 0, err } - if index == 0 { - item, _ := binary.Uvarint(br.data[br.restarts[0]:]) - return item, nil - } - var ( - start int - limit int - result uint64 - ) - if index == len(br.restarts) { - // The element being searched falls within the last restart section, - // there is no guarantee such element can be found. - start = int(br.restarts[len(br.restarts)-1]) - limit = len(br.data) - } else { - // The element being searched falls within the non-last restart section, - // such element can be found for sure. - start = int(br.restarts[index-1]) - limit = int(br.restarts[index]) - } - pos := start - for pos < limit { - x, n := binary.Uvarint(br.data[pos:]) - if pos == start { - result = x - } else { - result += x - } - if result > id { - return result, nil - } - pos += n - } - // The element which is greater than specified id is not found. - if index == len(br.restarts) { + if !found { return math.MaxUint64, nil } - // The element which is the first one greater than the specified id - // is exactly the one located at the restart point. - item, _ := binary.Uvarint(br.data[br.restarts[index]:]) - return item, nil + return it.ID(), nil } type blockWriter struct { diff --git a/triedb/pathdb/history_index_iterator.go b/triedb/pathdb/history_index_iterator.go new file mode 100644 index 00000000000..1ccb39ad097 --- /dev/null +++ b/triedb/pathdb/history_index_iterator.go @@ -0,0 +1,359 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see id + }) + if err != nil { + it.setErr(err) + return false + } + if index == 0 { + item, n := binary.Uvarint(it.data[it.restarts[0]:]) + + // If the restart size is 1, then the restart pointer shouldn't be 0. + // It's not practical and should be denied in the first place. + it.set(int(it.restarts[0])+n, 0, item) + return true + } + var ( + start int + limit int + restartIndex int // The restart section being searched below + ) + if index == len(it.restarts) { + // The element being searched falls within the last restart section, + // there is no guarantee such element can be found. + start = int(it.restarts[len(it.restarts)-1]) + limit = len(it.data) + restartIndex = len(it.restarts) - 1 + } else { + // The element being searched falls within the non-last restart section, + // such element can be found for sure. + start = int(it.restarts[index-1]) + limit = int(it.restarts[index]) + restartIndex = index - 1 + } + var ( + result uint64 + pos = start + ) + for pos < limit { + x, n := binary.Uvarint(it.data[pos:]) + if n <= 0 { + it.setErr(fmt.Errorf("failed to decode item at pos %d", pos)) + return false + } + if pos == start { + result = x + } else { + result += x + } + pos += n + + if result > id { + if pos == limit { + it.set(pos, restartIndex+1, result) + } else { + it.set(pos, restartIndex, result) + } + return true + } + } + // The element which is greater than specified id is not found. + if index == len(it.restarts) { + it.reset() + return false + } + // The element which is the first one greater than the specified id + // is exactly the one located at the restart point. + item, n := binary.Uvarint(it.data[it.restarts[index]:]) + it.set(int(it.restarts[index])+n, index, item) + return true +} + +func (it *blockIterator) init() { + if it.dataPtr != -1 { + return + } + it.dataPtr = 0 + it.restartPtr = 0 +} + +// Next implements the HistoryIndexIterator, moving the iterator to the next +// element. If the iterator has been exhausted, and boolean with false should +// be returned. +func (it *blockIterator) Next() bool { + if it.exhausted || it.err != nil { + return false + } + it.init() + + // Decode the next element pointed by the iterator + v, n := binary.Uvarint(it.data[it.dataPtr:]) + if n <= 0 { + it.setErr(fmt.Errorf("failed to decode item at pos %d", it.dataPtr)) + return false + } + + var val uint64 + if it.dataPtr == int(it.restarts[it.restartPtr]) { + val = v + } else { + val = it.id + v + } + + // Move to the next restart section if the data pointer crosses the boundary + nextRestartPtr := it.restartPtr + if it.restartPtr < len(it.restarts)-1 && it.dataPtr+n == int(it.restarts[it.restartPtr+1]) { + nextRestartPtr = it.restartPtr + 1 + } + it.set(it.dataPtr+n, nextRestartPtr, val) + + return true +} + +// ID implements HistoryIndexIterator, returning the id of the element where the +// iterator is positioned at. +func (it *blockIterator) ID() uint64 { + return it.id +} + +// Error implements HistoryIndexIterator, returning any accumulated error. +// Exhausting all the elements is not considered to be an error. +func (it *blockIterator) Error() error { return it.err } + +// blockLoader defines the method to retrieve the specific block for reading. +type blockLoader func(id uint32) (*blockReader, error) + +// indexIterator is an iterator to traverse the history indices belonging to the +// specific state entry. +type indexIterator struct { + // immutable fields + descList []*indexBlockDesc + loader blockLoader + + // mutable fields + blockIt *blockIterator + blockPtr int + exhausted bool + err error +} + +func newIndexIterator(descList []*indexBlockDesc, loader blockLoader) *indexIterator { + it := &indexIterator{ + descList: descList, + loader: loader, + } + it.reset() + return it +} + +func (it *indexIterator) setErr(err error) { + if it.err != nil { + return + } + it.err = err +} + +func (it *indexIterator) reset() { + it.blockIt = nil + it.blockPtr = -1 + it.exhausted = false + it.err = nil + + if len(it.descList) == 0 { + it.exhausted = true + } +} + +func (it *indexIterator) open(blockPtr int) error { + id := it.descList[blockPtr].id + br, err := it.loader(id) + if err != nil { + return err + } + it.blockIt = newBlockIterator(br.data, br.restarts) + it.blockPtr = blockPtr + return nil +} + +// SeekGT moves the iterator to the first element whose id is greater than the +// given number. It returns whether such element exists. +// +// Note, this operation will unset the exhausted status and subsequent traversal +// is allowed. +func (it *indexIterator) SeekGT(id uint64) bool { + if it.err != nil { + return false + } + index := sort.Search(len(it.descList), func(i int) bool { + return id < it.descList[i].max + }) + if index == len(it.descList) { + return false + } + it.exhausted = false + + if it.blockIt == nil || it.blockPtr != index { + if err := it.open(index); err != nil { + it.setErr(err) + return false + } + } + return it.blockIt.SeekGT(id) +} + +func (it *indexIterator) init() error { + if it.blockIt != nil { + return nil + } + return it.open(0) +} + +// Next implements the HistoryIndexIterator, moving the iterator to the next +// element. If the iterator has been exhausted, and boolean with false should +// be returned. +func (it *indexIterator) Next() bool { + if it.exhausted || it.err != nil { + return false + } + if err := it.init(); err != nil { + it.setErr(err) + return false + } + + if it.blockIt.Next() { + return true + } + if it.blockPtr == len(it.descList)-1 { + it.exhausted = true + return false + } + if err := it.open(it.blockPtr + 1); err != nil { + it.setErr(err) + return false + } + return it.blockIt.Next() +} + +// Error implements HistoryIndexIterator, returning any accumulated error. +// Exhausting all the elements is not considered to be an error. +func (it *indexIterator) Error() error { + if it.err != nil { + return it.err + } + if it.blockIt != nil { + return it.blockIt.Error() + } + return nil +} + +// ID implements HistoryIndexIterator, returning the id of the element where the +// iterator is positioned at. +func (it *indexIterator) ID() uint64 { + return it.blockIt.ID() +} diff --git a/triedb/pathdb/history_index_iterator_test.go b/triedb/pathdb/history_index_iterator_test.go new file mode 100644 index 00000000000..da60dc6e8f2 --- /dev/null +++ b/triedb/pathdb/history_index_iterator_test.go @@ -0,0 +1,297 @@ +// Copyright 2025 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see input + }) + var ( + exp bool + expVal uint64 + remains []uint64 + ) + if index == len(elements) { + exp = false + } else { + exp = true + expVal = elements[index] + if index < len(elements) { + remains = elements[index+1:] + } + } + if err := checkSeekGT(it, input, exp, expVal); err != nil { + t.Fatal(err) + } + if exp { + if err := checkNext(it, remains); err != nil { + t.Fatal(err) + } + } + } +} + +func TestIndexIteratorSeekGT(t *testing.T) { + ident := newAccountIdent(common.Hash{0x1}) + + dbA := rawdb.NewMemoryDatabase() + testIndexIterator(t, ident, dbA, makeTestIndexBlocks(dbA, ident, 1)) + + dbB := rawdb.NewMemoryDatabase() + testIndexIterator(t, ident, dbB, makeTestIndexBlocks(dbB, ident, 3*indexBlockEntriesCap)) + + dbC := rawdb.NewMemoryDatabase() + testIndexIterator(t, ident, dbC, makeTestIndexBlocks(dbC, ident, indexBlockEntriesCap-1)) + + dbD := rawdb.NewMemoryDatabase() + testIndexIterator(t, ident, dbD, makeTestIndexBlocks(dbD, ident, indexBlockEntriesCap+1)) +} + +func testIndexIterator(t *testing.T, stateIdent stateIdent, db ethdb.Database, elements []uint64) { + ir, err := newIndexReader(db, stateIdent) + if err != nil { + t.Fatalf("Failed to open the index reader, %v", err) + } + it := newIndexIterator(ir.descList, func(id uint32) (*blockReader, error) { + return newBlockReader(readStateIndexBlock(stateIdent, db, id)) + }) + + for i := 0; i < 128; i++ { + var input uint64 + if rand.Intn(2) == 0 { + input = elements[rand.Intn(len(elements))] + } else { + input = uint64(rand.Uint32()) + } + index := sort.Search(len(elements), func(i int) bool { + return elements[i] > input + }) + var ( + exp bool + expVal uint64 + remains []uint64 + ) + if index == len(elements) { + exp = false + } else { + exp = true + expVal = elements[index] + if index < len(elements) { + remains = elements[index+1:] + } + } + if err := checkSeekGT(it, input, exp, expVal); err != nil { + t.Fatal(err) + } + if exp { + if err := checkNext(it, remains); err != nil { + t.Fatal(err) + } + } + } +} + +func TestBlockIteratorTraversal(t *testing.T) { + /* 0-size index block is not allowed + + data, elements := makeTestIndexBlock(0) + testBlockIterator(t, data, elements) + */ + + data, elements := makeTestIndexBlock(1) + testBlockIteratorTraversal(t, data, elements) + + data, elements = makeTestIndexBlock(indexBlockRestartLen) + testBlockIteratorTraversal(t, data, elements) + + data, elements = makeTestIndexBlock(3 * indexBlockRestartLen) + testBlockIteratorTraversal(t, data, elements) + + data, elements = makeTestIndexBlock(indexBlockEntriesCap) + testBlockIteratorTraversal(t, data, elements) +} + +func testBlockIteratorTraversal(t *testing.T, data []byte, elements []uint64) { + br, err := newBlockReader(data) + if err != nil { + t.Fatalf("Failed to open the block for reading, %v", err) + } + it := newBlockIterator(br.data, br.restarts) + + if err := checkNext(it, elements); err != nil { + t.Fatal(err) + } +} + +func TestIndexIteratorTraversal(t *testing.T) { + ident := newAccountIdent(common.Hash{0x1}) + + dbA := rawdb.NewMemoryDatabase() + testIndexIteratorTraversal(t, ident, dbA, makeTestIndexBlocks(dbA, ident, 1)) + + dbB := rawdb.NewMemoryDatabase() + testIndexIteratorTraversal(t, ident, dbB, makeTestIndexBlocks(dbB, ident, 3*indexBlockEntriesCap)) + + dbC := rawdb.NewMemoryDatabase() + testIndexIteratorTraversal(t, ident, dbC, makeTestIndexBlocks(dbC, ident, indexBlockEntriesCap-1)) + + dbD := rawdb.NewMemoryDatabase() + testIndexIteratorTraversal(t, ident, dbD, makeTestIndexBlocks(dbD, ident, indexBlockEntriesCap+1)) +} + +func testIndexIteratorTraversal(t *testing.T, stateIdent stateIdent, db ethdb.KeyValueReader, elements []uint64) { + ir, err := newIndexReader(db, stateIdent) + if err != nil { + t.Fatalf("Failed to open the index reader, %v", err) + } + it := newIndexIterator(ir.descList, func(id uint32) (*blockReader, error) { + return newBlockReader(readStateIndexBlock(stateIdent, db, id)) + }) + if err := checkNext(it, elements); err != nil { + t.Fatal(err) + } +}