Skip to content

Commit ca00f89

Browse files
committed
fix: update state to allow iter continuance on NotFound errors
1 parent aadb6fe commit ca00f89

File tree

3 files changed

+90
-7
lines changed

3 files changed

+90
-7
lines changed
158 KB
Binary file not shown.

hamt/shardeddir.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,20 +197,19 @@ type _UnixFSShardedDir__ListItr struct {
197197
}
198198

199199
func (itr *_UnixFSShardedDir__ListItr) Next() (int64, dagpb.PBLink, error) {
200+
total := itr.total
201+
itr.total++
200202
next, err := itr.next()
201203
if err != nil {
202204
return -1, nil, err
203205
}
204206
if next == nil {
205207
return -1, nil, nil
206208
}
207-
total := itr.total
208-
itr.total++
209209
return total, next, nil
210210
}
211211

212212
func (itr *_UnixFSShardedDir__ListItr) next() (dagpb.PBLink, error) {
213-
214213
if itr.childIter == nil {
215214
if itr._substrate.Done() {
216215
return nil, nil
@@ -232,15 +231,16 @@ func (itr *_UnixFSShardedDir__ListItr) next() (dagpb.PBLink, error) {
232231
nd: child,
233232
maxPadLen: maxPadLength(child.data),
234233
}
235-
236234
}
237235
_, next, err := itr.childIter.Next()
238-
if err != nil {
239-
return nil, err
240-
}
241236
if itr.childIter.Done() {
237+
// do this even on error to make sure we don't overrun a shard where the
238+
// end is missing and the user is ignoring NotFound errors
242239
itr.childIter = nil
243240
}
241+
if err != nil {
242+
return nil, err
243+
}
244244
return next, nil
245245
}
246246

hamt/shardeddir_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"io"
88
"math/rand"
9+
"os"
910
"sort"
1011
"testing"
1112
"time"
@@ -16,10 +17,12 @@ import (
1617
ft "github.com/ipfs/go-unixfs"
1718
legacy "github.com/ipfs/go-unixfs/hamt"
1819
"github.com/ipfs/go-unixfsnode/hamt"
20+
"github.com/ipld/go-car/v2/storage"
1921
dagpb "github.com/ipld/go-codec-dagpb"
2022
"github.com/ipld/go-ipld-prime"
2123
"github.com/ipld/go-ipld-prime/fluent/qp"
2224
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
25+
basicnode "github.com/ipld/go-ipld-prime/node/basic"
2326
"github.com/ipld/go-ipld-prime/schema"
2427
"github.com/stretchr/testify/require"
2528
)
@@ -206,3 +209,83 @@ func TestFindNonExisting(t *testing.T) {
206209
require.EqualError(t, err, schema.ErrNoSuchField{Field: ipld.PathSegmentOfString(key)}.Error())
207210
}
208211
}
212+
213+
func TestIncompleteShardedIteration(t *testing.T) {
214+
ctx := context.Background()
215+
req := require.New(t)
216+
217+
fixture := "./fixtures/wikipedia-cryptographic-hash-function.car"
218+
f, err := os.Open(fixture)
219+
req.NoError(err)
220+
defer f.Close()
221+
carstore, err := storage.OpenReadable(f)
222+
req.NoError(err)
223+
lsys := cidlink.DefaultLinkSystem()
224+
lsys.TrustedStorage = true
225+
lsys.SetReadStorage(carstore)
226+
227+
// classic recursive go-ipld-prime map iteration, being forgiving about
228+
// NotFound block loads to see what we end up with
229+
230+
kvs := make(map[string]string)
231+
var iterNotFound int
232+
blockNotFound := make(map[string]struct{})
233+
234+
var iter func(string, ipld.Link)
235+
iter = func(dir string, lnk ipld.Link) {
236+
nd, err := lsys.Load(ipld.LinkContext{Ctx: ctx}, lnk, basicnode.Prototype.Any)
237+
if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
238+
// got a named link that we can't load
239+
blockNotFound[dir] = struct{}{}
240+
return
241+
}
242+
req.NoError(err)
243+
if nd.Kind() == ipld.Kind_Bytes {
244+
bv, err := nd.AsBytes()
245+
req.NoError(err)
246+
kvs[dir] = string(bv)
247+
return
248+
}
249+
250+
nb := dagpb.Type.PBNode.NewBuilder()
251+
req.NoError(nb.AssignNode(nd))
252+
pbn := nb.Build()
253+
hamtShard, err := hamt.AttemptHAMTShardFromNode(ctx, pbn, &lsys)
254+
req.NoError(err)
255+
256+
mi := hamtShard.MapIterator()
257+
for !mi.Done() {
258+
k, v, err := mi.Next()
259+
if nf, ok := err.(interface{ NotFound() bool }); ok && nf.NotFound() {
260+
// internal shard link that won't load, we don't know what it might
261+
// point to
262+
iterNotFound++
263+
continue
264+
}
265+
req.NoError(err)
266+
ks, err := k.AsString()
267+
req.NoError(err)
268+
req.Equal(ipld.Kind_Link, v.Kind())
269+
lv, err := v.AsLink()
270+
req.NoError(err)
271+
iter(dir+"/"+ks, lv)
272+
}
273+
}
274+
// walk the tree
275+
iter("", cidlink.Link{Cid: carstore.Roots()[0]})
276+
277+
req.Len(kvs, 1)
278+
req.Contains(kvs, "/wiki/Cryptographic_hash_function")
279+
req.Contains(kvs["/wiki/Cryptographic_hash_function"], "<title>Cryptographic hash function</title>\n")
280+
req.Equal(iterNotFound, 570) // tried to load 570 blocks that were not in the CAR
281+
req.Len(blockNotFound, 110) // 110 blocks, for named links, were not found in the CAR
282+
// some of the root block links
283+
req.Contains(blockNotFound, "/favicon.ico")
284+
req.Contains(blockNotFound, "/index.html")
285+
req.Contains(blockNotFound, "/zimdump_version")
286+
// some of the shard links
287+
req.Contains(blockNotFound, "/wiki/UK_railway_Signal")
288+
req.Contains(blockNotFound, "/wiki/Australian_House")
289+
req.Contains(blockNotFound, "/wiki/ICloud_Drive")
290+
req.Contains(blockNotFound, "/wiki/Édouard_Bamberger")
291+
}

0 commit comments

Comments
 (0)