Skip to content

Commit fe66dd9

Browse files
committed
documentation extensions and polishing
1 parent 39ff4a5 commit fe66dd9

File tree

3 files changed

+39
-13
lines changed

3 files changed

+39
-13
lines changed

storage/chunk_data_packs_stored.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type StoredChunkDataPack struct {
3838
ChunkID flow.Identifier
3939
StartState flow.StateCommitment
4040
Proof flow.StorageProof
41-
CollectionID flow.Identifier
41+
CollectionID flow.Identifier // flow.ZeroID for system chunks
4242
ExecutionDataRoot flow.BlockExecutionDataRoot
4343
}
4444

@@ -58,6 +58,7 @@ func NewStoredChunkDataPack(
5858
}
5959
}
6060

61+
// IsSystemChunk returns true if this chunk data pack is for a system chunk.
6162
func (s *StoredChunkDataPack) IsSystemChunk() bool {
6263
return s.CollectionID == flow.ZeroID
6364
}
@@ -67,7 +68,6 @@ func ToStoredChunkDataPack(c *flow.ChunkDataPack) *StoredChunkDataPack {
6768
if c.Collection != nil {
6869
collectionID = c.Collection.ID()
6970
}
70-
7171
return NewStoredChunkDataPack(
7272
c.ChunkID,
7373
c.StartState,
@@ -77,7 +77,10 @@ func ToStoredChunkDataPack(c *flow.ChunkDataPack) *StoredChunkDataPack {
7777
)
7878
}
7979

80-
func ToStoredChunkDataPacks(cs []*flow.ChunkDataPack) []*StoredChunkDataPack { // ToStoredChunkDataPack converts the given ChunkDataPacks to their reduced representation,
80+
// ToStoredChunkDataPacks converts the given Chunk Data Packs to their reduced representation.
81+
// This is useful for reducing storage consumption, by avoiding repeated storage of the full collections
82+
// (stored individually anyway).
83+
func ToStoredChunkDataPacks(cs []*flow.ChunkDataPack) []*StoredChunkDataPack {
8184
scs := make([]*StoredChunkDataPack, 0, len(cs))
8285
for _, c := range cs {
8386
scs = append(scs, ToStoredChunkDataPack(c))

storage/operation/chunk_data_packs.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ import (
1010
"github.com/onflow/flow-go/storage"
1111
)
1212

13-
// IndexChunkDataPackByChunkID inserts a mapping from chunk ID to stored chunk data pack ID.
14-
// It requires the [storage.LockInsertOwnReceipt] lock to be held by the caller.
13+
// IndexChunkDataPackByChunkID inserts a mapping from chunk ID to stored chunk data pack ID. It requires
14+
// the [storage.LockInsertOwnReceipt] lock to be acquired by the caller and held until the write batch has been committed.
1515
// Returns [storage.ErrDataMismatch] if a different chunk data pack ID already exists for the given chunk ID.
1616
func IndexChunkDataPackByChunkID(lctx lockctx.Proof, rw storage.ReaderBatchWriter, chunkID flow.Identifier, chunkDataPackID flow.Identifier) error {
1717
if !lctx.HoldsLock(storage.LockInsertOwnReceipt) {
@@ -35,7 +35,7 @@ func IndexChunkDataPackByChunkID(lctx lockctx.Proof, rw storage.ReaderBatchWrite
3535
}
3636

3737
// RetrieveChunkDataPackID retrieves the stored chunk data pack ID for a given chunk ID.
38-
// Returns [storage.ErrNotFound] if no mapping exists for the given chunk ID.
38+
// Returns [storage.ErrNotFound] if no chunk data pack has been indexed as result for the given chunk ID.
3939
func RetrieveChunkDataPackID(r storage.Reader, chunkID flow.Identifier, chunkDataPackID *flow.Identifier) error {
4040
return RetrieveByKey(r, MakePrefix(codeIndexChunkDataPackByChunkID, chunkID), chunkDataPackID)
4141
}
@@ -47,14 +47,18 @@ func RemoveChunkDataPackID(w storage.Writer, chunkID flow.Identifier) error {
4747
}
4848

4949
// InsertStoredChunkDataPack inserts a [storage.StoredChunkDataPack] into the database, keyed by its own ID.
50-
// The caller must ensure the chunkDataPackID is the same as c.ID().
50+
//
51+
// CAUTION: The caller must ensure `storeChunkDataPackID` is the same as `c.ID()`, ie. a collision-resistant
52+
// hash of the chunk data pack! This method silently overrides existing data, which is safe only if for the
53+
// same key, we always write the same value.
54+
//
5155
// No error returns expected during normal operations.
5256
func InsertStoredChunkDataPack(rw storage.ReaderBatchWriter, storeChunkDataPackID flow.Identifier, c *storage.StoredChunkDataPack) error {
5357
return UpsertByKey(rw.Writer(), MakePrefix(codeChunkDataPack, storeChunkDataPackID), c)
5458
}
5559

5660
// RetrieveStoredChunkDataPack retrieves a chunk data pack by stored chunk data pack ID.
57-
// It returns [storage.ErrNotFound] if the chunk data pack is not found
61+
// It returns [storage.ErrNotFound] if no chunk data pack with the given ID is known.
5862
func RetrieveStoredChunkDataPack(r storage.Reader, storeChunkDataPackID flow.Identifier, c *storage.StoredChunkDataPack) error {
5963
return RetrieveByKey(r, MakePrefix(codeChunkDataPack, storeChunkDataPackID), c)
6064
}

storage/store/chunk_data_packs.go

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,30 @@ import (
1414
"github.com/onflow/flow-go/storage/operation"
1515
)
1616

17+
// ChunkDataPacks manages storage and retrieval of ChunkDataPacks, primarily serving the use case of EXECUTION NODES persisting
18+
// and indexing chunk data packs for their OWN RESULTS. Essentially, the chunk describes a batch of work to be done, and the
19+
// chunk data pack describes the result of that work. The storage of chunk data packs is segregated across different
20+
// storage components for efficiency and modularity reasons:
21+
// 0. Usually (ignoring the system chunk for a moment), the batch of work is given by the collection referenced in the chunk
22+
// data pack. For any chunk data pack being stored, we assume that the executed collection has *previously* been persisted
23+
// in [storage.Collections]. It is useful to persist the collections individually, so we can individually retrieve them.
24+
// 1. The actual chunk data pack itself is stored in a dedicated storage component `cdpStorage`. Note that for this storage
25+
// component, no atomicity is required, as we are storing chunk data packs by their collision-resistant hashes, so
26+
// different chunk data packs will be stored under different keys.
27+
// Theoretically, nodes could store persist multiple different (disagreeing) chunk data packs for the same
28+
// chunk in this step. However, for efficiency, Execution Nodes only store their own chunk data packs.
29+
// 2. The index mapping from ChunkID to chunkDataPackID is stored in the protocol database for fast retrieval.
30+
// This index is intended to be populated by execution nodes when they commit to a specific result represented by the chunk
31+
// data pack. Here, we require atomicity, as an execution node should not be changing / overwriting which chunk data pack
32+
// it committed to (during normal operations).
33+
//
34+
// Since the executed collections are stored separately (step 0, above), we can just use the collection ID in context of the
35+
// chunk data pack storage (step 1, above). Therefore, we utilize the reduced representation [storage.StoredChunkDataPack]
36+
// internally. While removing redundant data from storage, it takes 3 look-ups to return chunk data pack by chunk ID:
37+
//
38+
// i. a lookup for chunkID -> chunkDataPackID
39+
// ii. a lookup for chunkDataPackID -> StoredChunkDataPack (only has CollectionID, no collection data)
40+
// iii. a lookup for CollectionID -> Collection, then reconstruct the chunk data pack from the collection and the StoredChunkDataPack
1741
type ChunkDataPacks struct {
1842
// the protocol DB is used for storing index mappings from chunk ID to chunk data pack ID
1943
protocolDB storage.DB
@@ -27,11 +51,6 @@ type ChunkDataPacks struct {
2751

2852
// cache chunkID -> chunkDataPackID
2953
chunkIDToChunkDataPackIDCache *Cache[flow.Identifier, flow.Identifier]
30-
31-
// it takes 3 look ups to return chunk data pack by chunk ID:
32-
// 1. a cache lookup for chunkID -> chunkDataPackID
33-
// 2. a lookup for chunkDataPackID -> StoredChunkDataPack (only has CollectionID, no collection data)
34-
// 3. a lookup for CollectionID -> Collection, then restore the chunk data pack with the collection and the StoredChunkDataPack
3554
}
3655

3756
var _ storage.ChunkDataPacks = (*ChunkDataPacks)(nil)

0 commit comments

Comments
 (0)