Skip to content

Commit cf3d09f

Browse files
committed
test: UnixFSHAMTDirectorySizeThreshold
1 parent 6f3dfb5 commit cf3d09f

File tree

4 files changed

+254
-31
lines changed

4 files changed

+254
-31
lines changed

test/cli/add_test.go

Lines changed: 235 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
package cli
22

33
import (
4+
"io"
5+
"os"
6+
"path/filepath"
7+
"strings"
48
"testing"
59

10+
"github.com/dustin/go-humanize"
611
"github.com/ipfs/kubo/config"
712
"github.com/ipfs/kubo/test/cli/harness"
13+
"github.com/ipfs/kubo/test/cli/testutils"
814
"github.com/stretchr/testify/assert"
915
"github.com/stretchr/testify/require"
1016
)
@@ -20,6 +26,11 @@ func TestAdd(t *testing.T) {
2026
shortStringCidV1Sha512 = "bafkrgqbqt3gerhas23vuzrapkdeqf4vu2dwxp3srdj6hvg6nhsug2tgyn6mj3u23yx7utftq3i2ckw2fwdh5qmhid5qf3t35yvkc5e5ottlw6"
2127
)
2228

29+
const (
30+
cidV0Length = 34 // cidv0 sha2-256
31+
cidV1Length = 36 // cidv1 sha2-256
32+
)
33+
2334
t.Run("produced cid version: implicit default (CIDv0)", func(t *testing.T) {
2435
t.Parallel()
2536
node := harness.NewT(t).NewNode().Init().StartDaemon()
@@ -109,15 +120,17 @@ func TestAdd(t *testing.T) {
109120

110121
t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSChunker=size-262144 and UnixFSFileMaxLinks", func(t *testing.T) {
111122
t.Parallel()
112-
node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v0")
113-
node.StartDaemon()
114-
defer node.StopDaemon()
115123
seed := "v0-seed"
124+
profile := "--profile=legacy-cid-v0"
116125

117126
t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) {
127+
t.Parallel()
128+
node := harness.NewT(t).NewNode().Init(profile)
129+
node.StartDaemon()
130+
defer node.StopDaemon()
118131
// Add 44544KiB file:
119132
// 174 * 256KiB should fit in single DAG layer
120-
cidStr := node.IPFSAddFromSeed("44544KiB", seed)
133+
cidStr := node.IPFSAddDeterministic("44544KiB", seed)
121134
root, err := node.InspectPBNode(cidStr)
122135
assert.NoError(t, err)
123136
require.Equal(t, 174, len(root.Links))
@@ -126,8 +139,12 @@ func TestAdd(t *testing.T) {
126139
})
127140

128141
t.Run("above UnixFSFileMaxLinks=174", func(t *testing.T) {
142+
t.Parallel()
143+
node := harness.NewT(t).NewNode().Init(profile)
144+
node.StartDaemon()
145+
defer node.StopDaemon()
129146
// add 256KiB (one more block), it should force rebalancing DAG and moving most to second layer
130-
cidStr := node.IPFSAddFromSeed("44800KiB", seed)
147+
cidStr := node.IPFSAddDeterministic("44800KiB", seed)
131148
root, err := node.InspectPBNode(cidStr)
132149
assert.NoError(t, err)
133150
require.Equal(t, 2, len(root.Links))
@@ -136,6 +153,52 @@ func TestAdd(t *testing.T) {
136153
})
137154
})
138155

156+
t.Run("ipfs init --profile=legacy-cid-v0 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
157+
t.Parallel()
158+
seed := "hamt-legacy-cid-v0"
159+
profile := "--profile=legacy-cid-v0"
160+
161+
t.Run("under UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
162+
t.Parallel()
163+
node := harness.NewT(t).NewNode().Init(profile)
164+
node.StartDaemon()
165+
defer node.StopDaemon()
166+
167+
randDir, err := os.MkdirTemp(node.Dir, seed)
168+
require.NoError(t, err)
169+
170+
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
171+
err = createDirectoryForHAMT(randDir, cidV0Length, "255KiB", seed)
172+
require.NoError(t, err)
173+
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
174+
175+
// Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory"
176+
root, err := node.InspectPBNode(cidStr)
177+
assert.NoError(t, err)
178+
require.Equal(t, 903, len(root.Links))
179+
})
180+
181+
t.Run("above UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
182+
t.Parallel()
183+
node := harness.NewT(t).NewNode().Init(profile)
184+
node.StartDaemon()
185+
defer node.StopDaemon()
186+
187+
randDir, err := os.MkdirTemp(node.Dir, seed)
188+
require.NoError(t, err)
189+
190+
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
191+
err = createDirectoryForHAMT(randDir, cidV0Length, "257KiB", seed)
192+
require.NoError(t, err)
193+
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
194+
195+
// Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256
196+
root, err := node.InspectPBNode(cidStr)
197+
assert.NoError(t, err)
198+
require.Equal(t, 252, len(root.Links))
199+
})
200+
})
201+
139202
t.Run("ipfs init --profile=legacy-cid-v1 produces CIDv1 with raw leaves", func(t *testing.T) {
140203
t.Parallel()
141204
node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1")
@@ -146,17 +209,19 @@ func TestAdd(t *testing.T) {
146209
require.Equal(t, shortStringCidV1, cidStr) // raw leaf
147210
})
148211

149-
t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks", func(t *testing.T) {
212+
t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSChunker=size-1048576", func(t *testing.T) {
150213
t.Parallel()
151-
node := harness.NewT(t).NewNode().Init("--profile=legacy-cid-v1")
152-
node.StartDaemon()
153-
defer node.StopDaemon()
154214
seed := "v1-seed"
215+
profile := "--profile=legacy-cid-v1"
155216

156217
t.Run("under UnixFSFileMaxLinks=174", func(t *testing.T) {
218+
t.Parallel()
219+
node := harness.NewT(t).NewNode().Init(profile)
220+
node.StartDaemon()
221+
defer node.StopDaemon()
157222
// Add 174MiB file:
158223
// 174 * 1MiB should fit in single layer
159-
cidStr := node.IPFSAddFromSeed("174MiB", seed)
224+
cidStr := node.IPFSAddDeterministic("174MiB", seed)
160225
root, err := node.InspectPBNode(cidStr)
161226
assert.NoError(t, err)
162227
require.Equal(t, 174, len(root.Links))
@@ -165,8 +230,12 @@ func TestAdd(t *testing.T) {
165230
})
166231

167232
t.Run("above UnixFSFileMaxLinks=174", func(t *testing.T) {
233+
t.Parallel()
234+
node := harness.NewT(t).NewNode().Init(profile)
235+
node.StartDaemon()
236+
defer node.StopDaemon()
168237
// add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer
169-
cidStr := node.IPFSAddFromSeed("175MiB", seed)
238+
cidStr := node.IPFSAddDeterministic("175MiB", seed)
170239
root, err := node.InspectPBNode(cidStr)
171240
assert.NoError(t, err)
172241
require.Equal(t, 2, len(root.Links))
@@ -175,17 +244,65 @@ func TestAdd(t *testing.T) {
175244
})
176245
})
177246

178-
t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks", func(t *testing.T) {
247+
t.Run("ipfs init --profile=legacy-cid-v1 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
248+
t.Parallel()
249+
seed := "hamt-legacy-cid-v1"
250+
profile := "--profile=legacy-cid-v1"
251+
252+
t.Run("under UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
253+
t.Parallel()
254+
node := harness.NewT(t).NewNode().Init(profile)
255+
node.StartDaemon()
256+
defer node.StopDaemon()
257+
258+
randDir, err := os.MkdirTemp(node.Dir, seed)
259+
require.NoError(t, err)
260+
261+
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
262+
err = createDirectoryForHAMT(randDir, cidV1Length, "255KiB", seed)
263+
require.NoError(t, err)
264+
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
265+
266+
// Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory"
267+
root, err := node.InspectPBNode(cidStr)
268+
assert.NoError(t, err)
269+
require.Equal(t, 897, len(root.Links))
270+
})
271+
272+
t.Run("above UnixFSHAMTDirectorySizeThreshold=256KiB", func(t *testing.T) {
273+
t.Parallel()
274+
node := harness.NewT(t).NewNode().Init(profile)
275+
node.StartDaemon()
276+
defer node.StopDaemon()
277+
278+
randDir, err := os.MkdirTemp(node.Dir, seed)
279+
require.NoError(t, err)
280+
281+
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
282+
err = createDirectoryForHAMT(randDir, cidV1Length, "257KiB", seed)
283+
require.NoError(t, err)
284+
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
285+
286+
// Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256
287+
root, err := node.InspectPBNode(cidStr)
288+
assert.NoError(t, err)
289+
require.Equal(t, 245, len(root.Links))
290+
})
291+
})
292+
293+
t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSChunker=size-1048576 and UnixFSFileMaxLinks=1024", func(t *testing.T) {
179294
t.Parallel()
180-
node := harness.NewT(t).NewNode().Init("--profile=test-cid-v1-2025-v35")
181-
node.StartDaemon()
182-
defer node.StopDaemon()
183295
seed := "v1-seed-1024"
296+
profile := "--profile=test-cid-v1-2025-v35"
184297

185298
t.Run("under UnixFSFileMaxLinks=1024", func(t *testing.T) {
299+
t.Parallel()
300+
node := harness.NewT(t).NewNode().Init(profile)
301+
node.StartDaemon()
302+
defer node.StopDaemon()
186303
// Add 174MiB file:
187304
// 1024 * 1MiB should fit in single layer
188-
cidStr := node.IPFSAddFromSeed("1024MiB", seed)
305+
cidStr := node.IPFSAddDeterministic("1024MiB", seed)
189306
root, err := node.InspectPBNode(cidStr)
190307
assert.NoError(t, err)
191308
require.Equal(t, 1024, len(root.Links))
@@ -194,13 +311,114 @@ func TestAdd(t *testing.T) {
194311
})
195312

196313
t.Run("above UnixFSFileMaxLinks=1024", func(t *testing.T) {
314+
t.Parallel()
315+
node := harness.NewT(t).NewNode().Init(profile)
316+
node.StartDaemon()
317+
defer node.StopDaemon()
197318
// add +1MiB (one more block), it should force rebalancing DAG and moving most to second layer
198-
cidStr := node.IPFSAddFromSeed("1025MiB", seed)
319+
cidStr := node.IPFSAddDeterministic("1025MiB", seed)
199320
root, err := node.InspectPBNode(cidStr)
200321
assert.NoError(t, err)
201322
require.Equal(t, 2, len(root.Links))
202323
// expect same CID every time
203324
require.Equal(t, "bafybeieilp2qx24pe76hxrxe6bpef5meuxto3kj5dd6mhb5kplfeglskdm", cidStr)
204325
})
205326
})
327+
328+
t.Run("ipfs init --profile=test-cid-v1-2025-v35 applies UnixFSHAMTDirectoryMaxFanout=256 and UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) {
329+
t.Parallel()
330+
seed := "hamt-legacy-cid-v1"
331+
profile := "--profile=test-cid-v1-2025-v35"
332+
333+
t.Run("under UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) {
334+
t.Parallel()
335+
node := harness.NewT(t).NewNode().Init(profile)
336+
node.StartDaemon()
337+
defer node.StopDaemon()
338+
339+
randDir, err := os.MkdirTemp(node.Dir, seed)
340+
require.NoError(t, err)
341+
342+
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
343+
err = createDirectoryForHAMT(randDir, cidV1Length, "1023KiB", seed)
344+
require.NoError(t, err)
345+
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
346+
347+
// Confirm the number of links is more than UnixFSHAMTDirectoryMaxFanout=256 (indicating regular "basic" directory"
348+
root, err := node.InspectPBNode(cidStr)
349+
assert.NoError(t, err)
350+
require.Equal(t, 3599, len(root.Links))
351+
})
352+
353+
t.Run("above UnixFSHAMTDirectorySizeThreshold=1MiB", func(t *testing.T) {
354+
t.Parallel()
355+
node := harness.NewT(t).NewNode().Init(profile)
356+
node.StartDaemon()
357+
defer node.StopDaemon()
358+
359+
randDir, err := os.MkdirTemp(node.Dir, seed)
360+
require.NoError(t, err)
361+
362+
// Create directory with a lot of files that have filenames which together take close to UnixFSHAMTDirectorySizeThreshold in total
363+
err = createDirectoryForHAMT(randDir, cidV1Length, "1025KiB", seed)
364+
require.NoError(t, err)
365+
cidStr := node.IPFS("add", "-r", "-Q", randDir).Stdout.Trimmed()
366+
367+
// Confirm this time, the number of links is less than UnixFSHAMTDirectoryMaxFanout=256
368+
root, err := node.InspectPBNode(cidStr)
369+
assert.NoError(t, err)
370+
require.Equal(t, 256, len(root.Links))
371+
})
372+
})
373+
374+
}
375+
376+
// createDirectoryForHAMT aims to create enough files with long names for the directory block to be close to the UnixFSHAMTDirectorySizeThreshold.
377+
// The calculation is based on boxo's HAMTShardingSize and sizeBelowThreshold which calculates ballpark size of the block
378+
// by adding length of link names and the binary cid length.
379+
// See https://github.com/ipfs/boxo/blob/6c5a07602aed248acc86598f30ab61923a54a83e/ipld/unixfs/io/directory.go#L491
380+
func createDirectoryForHAMT(dirPath string, cidLength int, unixfsNodeSizeTarget, seed string) error {
381+
hamtThreshold, err := humanize.ParseBytes(unixfsNodeSizeTarget)
382+
if err != nil {
383+
return err
384+
}
385+
386+
// Calculate how many files with long filenames are needed to hit UnixFSHAMTDirectorySizeThreshold
387+
nameLen := 255 // max that works across windows/macos/linux
388+
alphabetLen := len(testutils.AlphabetEasy)
389+
numFiles := int(hamtThreshold) / (nameLen + cidLength)
390+
391+
// Deterministic pseudo-random bytes for static CID
392+
drand, err := testutils.DeterministicRandomReader(unixfsNodeSizeTarget, seed)
393+
if err != nil {
394+
return err
395+
}
396+
397+
// Create necessary files in a single, flat directory
398+
for i := 0; i < numFiles; i++ {
399+
buf := make([]byte, nameLen)
400+
_, err := io.ReadFull(drand, buf)
401+
if err != nil {
402+
return err
403+
}
404+
405+
// Convert deterministic pseudo-random bytes to ASCII
406+
var sb strings.Builder
407+
408+
for _, b := range buf {
409+
// Map byte to printable ASCII range (33-126)
410+
char := testutils.AlphabetEasy[int(b)%alphabetLen]
411+
sb.WriteRune(char)
412+
}
413+
filename := sb.String()[:nameLen]
414+
filePath := filepath.Join(dirPath, filename)
415+
416+
// Create empty file
417+
f, err := os.Create(filePath)
418+
if err != nil {
419+
return err
420+
}
421+
f.Close()
422+
}
423+
return nil
206424
}

test/cli/harness/ipfs.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@ func (n *Node) IPFSAddStr(content string, args ...string) string {
7878

7979
// IPFSAddDeterministic produces a CID of a file of a certain size, filled with deterministically generated bytes based on some seed.
8080
// This ensures deterministic CID on the other end, that can be used in tests.
81-
func (n *Node) IPFSAddFromSeed(size string, seed string, args ...string) string {
81+
func (n *Node) IPFSAddDeterministic(size string, seed string, args ...string) string {
8282
log.Debugf("node %d adding %s of deterministic pseudo-random data with seed %q and args: %v", n.ID, size, seed, args)
83-
reader, err := RandomReader(size, seed)
83+
reader, err := DeterministicRandomReader(size, seed)
8484
if err != nil {
8585
panic(err)
8686
}

test/cli/testutils/random_reader.go renamed to test/cli/testutils/random_deterministic.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func (r *randomReader) Read(p []byte) (int, error) {
2929

3030
// createRandomReader produces specified number of pseudo-random bytes
3131
// from a seed.
32-
func RandomReader(sizeStr string, seed string) (io.Reader, error) {
32+
func DeterministicRandomReader(sizeStr string, seed string) (io.Reader, error) {
3333
size, err := humanize.ParseBytes(sizeStr)
3434
if err != nil {
3535
return nil, err

0 commit comments

Comments
 (0)