Skip to content

Commit 5157d10

Browse files
committed
feat: new UnixFS{File,Directory} with options pattern
flag future deprecation of Generate*
1 parent c0b150b commit 5157d10

File tree

1 file changed

+244
-58
lines changed

1 file changed

+244
-58
lines changed

testutil/generator.go

Lines changed: 244 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -19,31 +19,225 @@ import (
1919
"github.com/stretchr/testify/require"
2020
)
2121

22-
// GenerateFile generates a random unixfs file of the given size, storing the
23-
// blocks in the provided LinkSystem and returns a DirEntry representation of
22+
type options struct {
23+
randReader io.Reader
24+
shardBitwidth int
25+
chunker string
26+
dirname string
27+
childGenerator ChildGeneratorFn
28+
shardThisDir bool // a private option used internally to randomly switch on sharding at this current level
29+
}
30+
31+
// Option is a functional option for the Generate* functions.
32+
type Option func(*options)
33+
34+
// WithRandReader sets the random reader used by the Generate* functions.
35+
func WithRandReader(randReader io.Reader) Option {
36+
return func(o *options) {
37+
o.randReader = randReader
38+
}
39+
}
40+
41+
// WithShardBitwidth sets the shard fanout for the generated directory. By
42+
// default directories are not sharded. Set to 8 to use the default sharding
43+
// fanout value of 256. Set to a lower value, such as 4, to increase the
44+
// probability of collisions and therefore greater depth for smaller number of
45+
// files.
46+
func WithShardBitwidth(bitwidth int) Option {
47+
return func(o *options) {
48+
o.shardBitwidth = bitwidth
49+
}
50+
}
51+
52+
// WithChunker sets the chunker used by the Generate* functions. By default
53+
// files are chunked using the "size-256144" chunker.
54+
// The "size-256144" chunker will result in splitting bytes at 256144b
55+
// boundaries. See https://pkg.go.dev/github.com/ipfs/go-ipfs-chunker#FromString
56+
// for more information on options available.
57+
func WithChunker(chunker string) Option {
58+
return func(o *options) {
59+
o.chunker = chunker
60+
}
61+
}
62+
63+
// WithDirname sets the directory name used by UnixFSDirectory where a root
64+
// directory name is required.
65+
func WithDirname(dirname string) Option {
66+
return func(o *options) {
67+
o.dirname = dirname
68+
}
69+
}
70+
71+
// ChildGeneratorFn is a function that generates a child DirEntry for a
72+
// directory. It is used by UnixFSDirectory where control over the direct
73+
// children of a directory is required. Return nil to stop generating children.
74+
type ChildGeneratorFn func(name string) (*DirEntry, error)
75+
76+
// WithChildGenerator sets the child generator used by UnixFSDirectory control
77+
// over the direct children of a directory is required.
78+
func WithChildGenerator(childGenerator ChildGeneratorFn) Option {
79+
return func(o *options) {
80+
o.childGenerator = childGenerator
81+
}
82+
}
83+
84+
// shardThisDir is a private internal option
85+
func shardThisDir(b bool) Option {
86+
return func(o *options) {
87+
o.shardThisDir = b
88+
}
89+
}
90+
91+
func applyOptions(opts []Option) *options {
92+
o := &options{
93+
randReader: rand.Reader,
94+
shardBitwidth: 0,
95+
chunker: "size-256144",
96+
shardThisDir: true,
97+
}
98+
for _, opt := range opts {
99+
opt(o)
100+
}
101+
return o
102+
}
103+
104+
// Generate a file of `size` random bytes, packaged into UnixFS structure,
105+
// stored in the provided LinkSystem and returns a DirEntry representation of
24106
// the file.
25-
func GenerateFile(t require.TestingT, linkSys *linking.LinkSystem, randReader io.Reader, size int) DirEntry {
26-
// a file of `size` random bytes, packaged into unixfs DAGs, stored in the remote blockstore
27-
delimited := io.LimitReader(randReader, int64(size))
107+
func UnixFSFile(lsys linking.LinkSystem, size int, opt ...Option) (DirEntry, error) {
108+
o := applyOptions(opt)
109+
delimited := io.LimitReader(o.randReader, int64(size))
28110
var buf bytes.Buffer
29111
buf.Grow(size)
30112
delimited = io.TeeReader(delimited, &buf)
31-
// "size-256144" sets the chunker, splitting bytes at 256144b boundaries
32113
cids := make([]cid.Cid, 0)
33114
var undo func()
34-
linkSys.StorageWriteOpener, undo = cidCollector(linkSys, &cids)
115+
lsys.StorageWriteOpener, undo = cidCollector(lsys, &cids)
35116
defer undo()
36-
root, gotSize, err := builder.BuildUnixFSFile(delimited, "size-256144", linkSys)
37-
require.NoError(t, err)
38-
srcData := buf.Bytes()
39-
rootCid := root.(cidlink.Link).Cid
117+
root, gotSize, err := builder.BuildUnixFSFile(delimited, o.chunker, &lsys)
118+
if err != nil {
119+
return DirEntry{}, err
120+
}
40121
return DirEntry{
41122
Path: "",
42-
Content: srcData,
43-
Root: rootCid,
123+
Content: buf.Bytes(),
124+
Root: root.(cidlink.Link).Cid,
44125
SelfCids: cids,
45126
TSize: uint64(gotSize),
127+
}, nil
128+
}
129+
130+
// UnixFSDirectory generates a random UnixFS directory that aims for the
131+
// requested targetSize (in bytes, although it is likely to fall somewhere
132+
// under this number), storing the blocks in the provided LinkSystem and
133+
// returns a DirEntry representation of the directory.
134+
//
135+
// If the WithDirname option is not set, the directory will be built as a root
136+
// directory. If the dirname option is set, the directory will be built as a
137+
// child directory.
138+
//
139+
// If the WithChildGenerator option is not set, the targetSize will be
140+
// ignored and all sizing control will be delegated to the child generator.
141+
func UnixFSDirectory(lsys linking.LinkSystem, targetSize int, opts ...Option) (DirEntry, error) {
142+
o := applyOptions(opts)
143+
144+
var curSize int
145+
targetFileSize := targetSize / 16
146+
147+
children := make([]DirEntry, 0)
148+
149+
childGenerator := func(name string) (*DirEntry, error) {
150+
for curSize < targetSize {
151+
switch rndInt(o.randReader, 6) {
152+
case 0: // 1 in 6 chance of finishing this directory if not at root
153+
if o.dirname != "" && len(children) > 0 {
154+
curSize = targetSize // not really, but we're done with this directory
155+
} // else at the root we don't get to finish early
156+
case 1: // 1 in 6 chance of making a new directory
157+
if targetSize-curSize <= 1024 { // don't make tiny directories
158+
continue
159+
}
160+
so := append(opts, WithDirname(o.dirname+"/"+name), shardThisDir(rndInt(o.randReader, 6) == 0))
161+
child, err := UnixFSDirectory(lsys, targetSize-curSize, so...)
162+
if err != nil {
163+
return nil, err
164+
}
165+
children = append(children, child)
166+
curSize += int(child.TSize)
167+
default: // 4 in 6 chance of making a new file
168+
var size int
169+
for size == 0 { // don't make empty files
170+
sizeB, err := rand.Int(o.randReader, big.NewInt(int64(targetFileSize)))
171+
if err != nil {
172+
return nil, err
173+
}
174+
size = int(sizeB.Int64())
175+
if size > targetSize-curSize {
176+
size = targetSize - curSize
177+
}
178+
}
179+
entry, err := UnixFSFile(lsys, size, opts...)
180+
if err != nil {
181+
return nil, err
182+
}
183+
var name string
184+
entry.Path = o.dirname + "/" + name
185+
curSize += size
186+
return &entry, nil
187+
}
188+
}
189+
return nil, nil
190+
}
191+
192+
if o.childGenerator != nil {
193+
childGenerator = o.childGenerator
194+
}
195+
196+
for {
197+
var name string
198+
for {
199+
var err error
200+
name, err = namegen.RandomDirectoryName(o.randReader)
201+
if err != nil {
202+
return DirEntry{}, err
203+
}
204+
if !isDupe(children, name) {
205+
break
206+
}
207+
}
208+
209+
child, err := childGenerator(o.dirname + "/" + name)
210+
if err != nil {
211+
return DirEntry{}, err
212+
}
213+
if child == nil {
214+
break
215+
}
216+
children = append(children, *child)
217+
}
218+
219+
var shardBitwidth int
220+
if o.shardThisDir {
221+
shardBitwidth = o.shardBitwidth
222+
}
223+
dirEntry, err := packDirectory(lsys, children, shardBitwidth)
224+
if err != nil {
225+
return DirEntry{}, err
46226
}
227+
dirEntry.Path = o.dirname
228+
return dirEntry, nil
229+
}
230+
231+
// GenerateFile generates a random unixfs file of the given size, storing the
232+
// blocks in the provided LinkSystem and returns a DirEntry representation of
233+
// the file.
234+
//
235+
// This function will be deprecated in a future release, use UnixFSFile()
236+
// instead.
237+
func GenerateFile(t require.TestingT, linkSys *linking.LinkSystem, randReader io.Reader, size int) DirEntry {
238+
dirEntry, err := UnixFSFile(*linkSys, size, WithRandReader(randReader))
239+
require.NoError(t, err)
240+
return dirEntry
47241
}
48242

49243
// GenerateDirectory generates a random UnixFS directory that aims for the
@@ -53,13 +247,19 @@ func GenerateFile(t require.TestingT, linkSys *linking.LinkSystem, randReader io
53247
// the root directory will be built as HAMT sharded (with a low "width" to
54248
// maximise the chance of collisions and therefore greater depth for smaller
55249
// number of files).
250+
//
251+
// This function will be deprecated in a future release, use UnixFSDirectory()
252+
// instead.
56253
func GenerateDirectory(t require.TestingT, linkSys *linking.LinkSystem, randReader io.Reader, targetSize int, rootSharded bool) DirEntry {
57254
return GenerateDirectoryFrom(t, linkSys, randReader, targetSize, "", rootSharded)
58255
}
59256

60257
// GenerateDirectoryFrom is the same as GenerateDirectory but allows the caller
61258
// to specify a directory path to start from. This is useful for generating
62259
// nested directories.
260+
//
261+
// This function will be deprecated in a future release, use UnixFSDirectory()
262+
// instead.
63263
func GenerateDirectoryFrom(
64264
t require.TestingT,
65265
linkSys *linking.LinkSystem,
@@ -124,46 +324,26 @@ func GenerateDirectoryFrom(
124324
return dirEntry
125325
}
126326

127-
// GenerateDirectoryFor will build a directory with name `dir` whose children
128-
// are generated by the caller using the provided mkchild function. This is
129-
// useful for generating directories with a specific structure where you need
130-
// to have greater control over the children.
131-
func GenerateDirectoryFor(
132-
t require.TestingT,
133-
linkSys *linking.LinkSystem,
134-
randReader io.Reader,
135-
dir string,
136-
sharded bool,
137-
mkchild func(name string) *DirEntry,
138-
) DirEntry {
139-
children := make([]DirEntry, 0)
140-
for {
141-
var name string
142-
for {
143-
var err error
144-
name, err = namegen.RandomDirectoryName(randReader)
145-
require.NoError(t, err)
146-
if !isDupe(children, name) {
147-
break
148-
}
149-
}
150-
child := mkchild(dir + "/" + name)
151-
if child == nil {
152-
break
153-
}
154-
children = append(children, *child)
155-
}
156-
dirEntry := BuildDirectory(t, linkSys, children, sharded)
157-
dirEntry.Path = dir
158-
return dirEntry
159-
}
160-
161327
// BuildDirectory builds a directory from the given children, storing the
162328
// blocks in the provided LinkSystem and returns a DirEntry representation of
163329
// the directory. If sharded is true, the root directory will be built as HAMT
164330
// sharded (with a low "width" to maximise the chance of collisions and
165331
// therefore greater depth for smaller number of files).
332+
//
333+
// This function will be deprecated in a future release. Currently there is no
334+
// direct replacement.
166335
func BuildDirectory(t require.TestingT, linkSys *linking.LinkSystem, children []DirEntry, sharded bool) DirEntry {
336+
var bitWidth int
337+
if sharded {
338+
// fanout of 16, quite small to increase collision probability so we actually get sharding
339+
bitWidth = 4
340+
}
341+
dirEnt, err := packDirectory(*linkSys, children, bitWidth)
342+
require.NoError(t, err)
343+
return dirEnt
344+
}
345+
346+
func packDirectory(lsys linking.LinkSystem, children []DirEntry, bitWidth int) (DirEntry, error) {
167347
// create stable sorted children, which should match the encoded form
168348
// in dag-pb
169349
sort.Slice(children, func(i, j int) bool {
@@ -175,25 +355,31 @@ func BuildDirectory(t require.TestingT, linkSys *linking.LinkSystem, children []
175355
paths := strings.Split(child.Path, "/")
176356
name := paths[len(paths)-1]
177357
lnk, err := builder.BuildUnixFSDirectoryEntry(name, int64(child.TSize), cidlink.Link{Cid: child.Root})
178-
require.NoError(t, err)
358+
if err != nil {
359+
return DirEntry{}, err
360+
}
179361
dirLinks = append(dirLinks, lnk)
180362
}
181363
cids := make([]cid.Cid, 0)
182364
var undo func()
183-
linkSys.StorageWriteOpener, undo = cidCollector(linkSys, &cids)
365+
lsys.StorageWriteOpener, undo = cidCollector(lsys, &cids)
184366
defer undo()
185367
var root ipld.Link
186368
var size uint64
187369
var err error
188-
if sharded {
189-
// node arity of 16, quite small to increase collision probability so we actually get sharding
190-
const width = 16
370+
if bitWidth > 0 {
371+
// width is 2^bitWidth
372+
width := 2 << bitWidth
191373
const hasher = multihash.MURMUR3X64_64
192-
root, size, err = builder.BuildUnixFSShardedDirectory(width, hasher, dirLinks, linkSys)
193-
require.NoError(t, err)
374+
root, size, err = builder.BuildUnixFSShardedDirectory(width, hasher, dirLinks, &lsys)
375+
if err != nil {
376+
return DirEntry{}, err
377+
}
194378
} else {
195-
root, size, err = builder.BuildUnixFSDirectory(dirLinks, linkSys)
196-
require.NoError(t, err)
379+
root, size, err = builder.BuildUnixFSDirectory(dirLinks, &lsys)
380+
if err != nil {
381+
return DirEntry{}, err
382+
}
197383
}
198384

199385
return DirEntry{
@@ -202,7 +388,7 @@ func BuildDirectory(t require.TestingT, linkSys *linking.LinkSystem, children []
202388
SelfCids: cids,
203389
TSize: size,
204390
Children: children,
205-
}
391+
}, nil
206392
}
207393

208394
func rndInt(randReader io.Reader, max int) int {
@@ -213,7 +399,7 @@ func rndInt(randReader io.Reader, max int) int {
213399
return int(coin.Int64())
214400
}
215401

216-
func cidCollector(ls *ipld.LinkSystem, cids *[]cid.Cid) (ipld.BlockWriteOpener, func()) {
402+
func cidCollector(ls ipld.LinkSystem, cids *[]cid.Cid) (ipld.BlockWriteOpener, func()) {
217403
swo := ls.StorageWriteOpener
218404
return func(linkCtx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) {
219405
w, c, err := swo(linkCtx)

0 commit comments

Comments
 (0)