-
Notifications
You must be signed in to change notification settings - Fork 384
fix: add erasure reDecoder for evicted chunks #5097
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
3c322a0
fix: call recovery after cache eviction
nugaon c41ec35
fix: lint
nugaon 4f369c5
refactor: remove factory naming
nugaon 433294d
fix: get redundancy getter from cache
nugaon 92efdbe
fix: return cached value if not null
nugaon 94cddeb
fix: only attempt recovery if in storage not found
nugaon acd23d7
test: redecorder
nugaon 6de3a56
fix: add rsbuf nil check
nugaon 1de8ca1
test: fix testdata generation
nugaon 3ba1eea
chore: change year in file comment header
nugaon File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,209 @@ | ||
| // Copyright 2025 The Swarm Authors. All rights reserved. | ||
| // Use of this source code is governed by a BSD-style | ||
| // license that can be found in the LICENSE file. | ||
|
|
||
| package joiner_test | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "context" | ||
| "strconv" | ||
| "testing" | ||
|
|
||
| "github.com/ethersphere/bee/v2/pkg/cac" | ||
| "github.com/ethersphere/bee/v2/pkg/file/joiner" | ||
| "github.com/ethersphere/bee/v2/pkg/file/redundancy/getter" | ||
| "github.com/ethersphere/bee/v2/pkg/log" | ||
| "github.com/ethersphere/bee/v2/pkg/storage" | ||
| "github.com/ethersphere/bee/v2/pkg/storage/inmemchunkstore" | ||
| "github.com/ethersphere/bee/v2/pkg/swarm" | ||
| "github.com/klauspost/reedsolomon" | ||
| ) | ||
|
|
||
| // TestReDecoderFlow tests the complete flow of: | ||
| // 1. Loading data with redundancy getter | ||
| // 2. Successful recovery which nulls the decoder | ||
| // 3. Chunk eviction from cache | ||
| // 4. Reloading the same data through ReDecoder fallback | ||
| func TestReDecoderFlow(t *testing.T) { | ||
| ctx := context.Background() | ||
| dataShardCount := 4 | ||
| parityShardCount := 2 | ||
| totalShardCount := dataShardCount + parityShardCount | ||
|
|
||
| // Create real data chunks with proper content | ||
| dataShards := make([][]byte, dataShardCount) | ||
| for i := 0; i < dataShardCount; i++ { | ||
| // Create chunks with simpler test data | ||
| dataShards[i] = make([]byte, swarm.ChunkWithSpanSize) | ||
| // Create a unique string for this shard | ||
| testData := []byte("test-data-" + strconv.Itoa(i)) | ||
| // Copy as much as will fit | ||
| copy(dataShards[i], testData) | ||
| } | ||
|
|
||
| // Create parity chunks using Reed-Solomon encoding | ||
| parityShards := make([][]byte, parityShardCount) | ||
| for i := 0; i < parityShardCount; i++ { | ||
| parityShards[i] = make([]byte, swarm.ChunkWithSpanSize) | ||
| } | ||
|
|
||
| // Create Reed-Solomon encoder | ||
| enc, err := reedsolomon.New(dataShardCount, parityShardCount) | ||
| if err != nil { | ||
| t.Fatalf("Failed to create Reed-Solomon encoder: %v", err) | ||
| } | ||
|
|
||
| // Combine data and parity shards | ||
| allShards := make([][]byte, totalShardCount) | ||
| copy(allShards, dataShards) | ||
| copy(allShards[dataShardCount:], parityShards) | ||
|
|
||
| // Encode to generate parity chunks | ||
| if err := enc.Encode(allShards); err != nil { | ||
| t.Fatalf("Failed to encode data: %v", err) | ||
| } | ||
|
|
||
| // Create content-addressed chunks for all shards | ||
| addresses := make([]swarm.Address, totalShardCount) | ||
| chunks := make([]swarm.Chunk, totalShardCount) | ||
|
|
||
| for i := 0; i < totalShardCount; i++ { | ||
| // Create proper content-addressed chunks | ||
| chunk, err := cac.NewWithDataSpan(allShards[i]) | ||
| if err != nil { | ||
| t.Fatalf("Failed to create content-addressed chunk %d: %v", i, err) | ||
| } | ||
| chunks[i] = chunk | ||
| addresses[i] = chunk.Address() | ||
| } | ||
|
|
||
| // Select a data chunk to be missing (which will be recovered) | ||
| missingChunkIndex := 2 // The third data chunk will be missing | ||
| mockStore := inmemchunkstore.New() | ||
| netFetcher := newMockNetworkFetcher(addresses, addresses[missingChunkIndex]) | ||
| config := getter.Config{ | ||
| Strategy: getter.RACE, | ||
| Logger: log.Noop, | ||
| } | ||
|
|
||
| j := joiner.NewDecoderCache(netFetcher, mockStore, config) | ||
|
|
||
| // Step 1: Initializing decoder and triggering recovery | ||
| decoder := j.GetOrCreate(addresses, dataShardCount) | ||
| if decoder == nil { | ||
| t.Fatal("Failed to create decoder") | ||
| } | ||
|
|
||
| // Verify we can now fetch the previously missing chunk through recovery | ||
| recoveredChunk, err := decoder.Get(ctx, addresses[missingChunkIndex]) | ||
| if err != nil { | ||
| t.Fatalf("Failed to recover missing chunk: %v", err) | ||
| } | ||
| // Verify the recovered chunk has the correct content | ||
| if !recoveredChunk.Address().Equal(addresses[missingChunkIndex]) { | ||
| t.Fatalf("Recovered chunk has incorrect address") | ||
| } | ||
|
|
||
| // Verify the recovered chunk has the correct content | ||
| recoveredData := recoveredChunk.Data() | ||
| expectedData := chunks[missingChunkIndex].Data() | ||
| if len(recoveredData) != len(expectedData) { | ||
| t.Fatalf("Recovered chunk has incorrect data length: got %d, want %d", len(recoveredData), len(expectedData)) | ||
| } | ||
| if !bytes.Equal(recoveredData, expectedData) { | ||
| t.Fatalf("Recovered chunk has incorrect data") | ||
| } | ||
| // Check if the recovered chunk is now in the store | ||
| _, err = mockStore.Get(ctx, addresses[missingChunkIndex]) | ||
| if err != nil { | ||
| t.Fatalf("Recovered chunk not saved to store: %v", err) | ||
| } | ||
|
|
||
| // Step 2: The original decoder should be automatically nulled after successful recovery | ||
| // This is an internal state check, we can't directly test it but we can verify that | ||
| // we can still access the chunks | ||
|
|
||
| // Sanity check - verify we can still fetch chunks through the cache | ||
| for i := 0; i < dataShardCount; i++ { | ||
| _, err := decoder.Get(ctx, addresses[i]) | ||
| if err != nil { | ||
| t.Fatalf("Failed to get chunk %d after recovery: %v", i, err) | ||
| } | ||
| } | ||
|
|
||
| // Step 3: Testing ReDecoder fallback | ||
| newDecoder := j.GetOrCreate(addresses, dataShardCount) | ||
| if newDecoder == nil { | ||
| t.Fatal("Failed to create ReDecoder") | ||
| } | ||
|
|
||
| // Verify all chunks can be fetched through the ReDecoder | ||
| for i := 0; i < dataShardCount; i++ { | ||
| _, err := newDecoder.Get(ctx, addresses[i]) | ||
| if err != nil { | ||
| t.Fatalf("Failed to get chunk %d through ReDecoder: %v", i, err) | ||
| } | ||
| } | ||
|
|
||
| // Verify that we can also access the first missing chunk - now from the store | ||
| // This would be using the local store and not network or recovery mechanisms | ||
| retrievedChunk, err := newDecoder.Get(ctx, addresses[missingChunkIndex]) | ||
| if err != nil { | ||
| t.Fatalf("Failed to retrieve previously recovered chunk: %v", err) | ||
| } | ||
|
|
||
| if !retrievedChunk.Address().Equal(addresses[missingChunkIndex]) { | ||
| t.Fatalf("Retrieved chunk has incorrect address") | ||
| } | ||
|
|
||
| // Also verify the data content matches | ||
| retrievedData := retrievedChunk.Data() | ||
| expectedData = chunks[missingChunkIndex].Data() | ||
| if len(retrievedData) != len(expectedData) { | ||
| t.Fatalf("Retrieved chunk has incorrect data length: got %d, want %d", len(retrievedData), len(expectedData)) | ||
| } | ||
| if !bytes.Equal(retrievedData, expectedData) { | ||
| t.Fatalf("Retrieved chunk has incorrect data") | ||
| } | ||
| } | ||
|
|
||
| // Mock implementation of storage.Getter for testing | ||
| type mockNetworkFetcher struct { | ||
| allAddresses []swarm.Address | ||
| missingAddr swarm.Address | ||
| } | ||
|
|
||
| // newMockNetworkFetcher creates a new mock fetcher that will return ErrNotFound for specific addresses | ||
| func newMockNetworkFetcher(allAddrs []swarm.Address, missingAddr swarm.Address) *mockNetworkFetcher { | ||
| return &mockNetworkFetcher{ | ||
| allAddresses: allAddrs, | ||
| missingAddr: missingAddr, | ||
| } | ||
| } | ||
|
|
||
| // Get implements storage.Getter interface | ||
| func (m *mockNetworkFetcher) Get(ctx context.Context, addr swarm.Address) (swarm.Chunk, error) { | ||
| // Simulate network fetch - fail for the missing chunk | ||
| if addr.Equal(m.missingAddr) { | ||
| return nil, storage.ErrNotFound | ||
| } | ||
|
|
||
| // Find the index of this address in the all addresses list | ||
| var index int | ||
| for i, a := range m.allAddresses { | ||
| if addr.Equal(a) { | ||
| index = i | ||
| break | ||
| } | ||
| } | ||
|
|
||
| // Generate data using the same pattern as in the test | ||
| data := make([]byte, swarm.ChunkWithSpanSize) | ||
| // Create a unique string for this shard | ||
| testData := []byte("test-data-" + strconv.Itoa(index)) | ||
| // Copy as much as will fit | ||
| copy(data, testData) | ||
|
|
||
| return swarm.NewChunk(addr, data), nil | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| // Copyright 2025 The Swarm Authors. All rights reserved. | ||
| // Use of this source code is governed by a BSD-style | ||
| // license that can be found in the LICENSE file. | ||
|
|
||
| package getter | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
|
|
||
| "github.com/ethersphere/bee/v2/pkg/log" | ||
| "github.com/ethersphere/bee/v2/pkg/storage" | ||
| "github.com/ethersphere/bee/v2/pkg/swarm" | ||
| ) | ||
|
|
||
| // Recovery is a function that creates a recovery decoder on demand | ||
| type Recovery func() storage.Getter | ||
|
|
||
| // ReDecoder is a wrapper around a Getter that first attempts to fetch a chunk directly | ||
| // from the network, and only falls back to recovery if the network fetch fails. | ||
| // This is used to handle cases where previously recovered chunks have been evicted from cache. | ||
| type ReDecoder struct { | ||
| fetcher storage.Getter // Direct fetcher (usually netstore) | ||
| recovery Recovery // Factory function to create recovery decoder on demand | ||
| logger log.Logger | ||
| } | ||
|
|
||
| // NewReDecoder creates a new ReDecoder instance with the provided fetcher and recovery factory. | ||
| // The recovery decoder will only be created if needed (when network fetch fails). | ||
| func NewReDecoder(fetcher storage.Getter, recovery Recovery, logger log.Logger) *ReDecoder { | ||
| return &ReDecoder{ | ||
| fetcher: fetcher, | ||
| recovery: recovery, | ||
| logger: logger, | ||
| } | ||
| } | ||
|
|
||
| // Get implements the storage.Getter interface. | ||
| // It first attempts to fetch the chunk directly from the network. | ||
| // If that fails with ErrNotFound, it then creates the recovery decoder and attempts to recover the chunk. | ||
| func (rd *ReDecoder) Get(ctx context.Context, addr swarm.Address) (swarm.Chunk, error) { | ||
| // First try to get the chunk directly from the network | ||
| chunk, err := rd.fetcher.Get(ctx, addr) | ||
| if err == nil { | ||
| return chunk, nil | ||
| } | ||
|
|
||
| // Only attempt recovery if the chunk was not found | ||
| if !errors.Is(err, storage.ErrNotFound) { | ||
| return nil, err | ||
| } | ||
|
|
||
| // Log that we're falling back to recovery | ||
| rd.logger.Debug("chunk not found in network, creating recovery decoder", "address", addr) | ||
|
|
||
| // Create the recovery decoder on demand | ||
| recovery := rd.recovery() | ||
|
|
||
| // Attempt to recover the chunk | ||
| return recovery.Get(ctx, addr) | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think here it is possible to have a deadlock. At this point, the same goroutine is holding the lock from line 96 and trying to acquire it again here in decoderCallback.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
decoderCallback is called in prefetch function only that is executed by a different go routine