Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 67 additions & 15 deletions config/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ const (
// write-batch. The total size of the batch is limited by
// BatchMaxnodes and BatchMaxSize.
DefaultBatchMaxSize = 100 << 20 // 20MiB

// HAMTSizeEstimation values for Import.UnixFSHAMTDirectorySizeEstimation
HAMTSizeEstimationLinks = "links" // legacy: estimate using link names + CID byte lengths (default)
HAMTSizeEstimationBlock = "block" // full serialized dag-pb block size
HAMTSizeEstimationDisabled = "disabled" // disable HAMT sharding entirely

// DAGLayout values for Import.UnixFSDAGLayout
DAGLayoutBalanced = "balanced" // balanced DAG layout (default)
DAGLayoutTrickle = "trickle" // trickle DAG layout

DefaultUnixFSHAMTDirectorySizeEstimation = HAMTSizeEstimationLinks // legacy behavior
DefaultUnixFSDAGLayout = DAGLayoutBalanced // balanced DAG layout
DefaultUnixFSIncludeEmptyDirs = true // include empty directories
)

var (
Expand All @@ -40,18 +53,20 @@ var (
// Import configures the default options for ingesting data. This affects commands
// that ingest data, such as 'ipfs add', 'ipfs dag put, 'ipfs block put', 'ipfs files write'.
type Import struct {
CidVersion OptionalInteger
UnixFSRawLeaves Flag
UnixFSChunker OptionalString
HashFunction OptionalString
UnixFSFileMaxLinks OptionalInteger
UnixFSDirectoryMaxLinks OptionalInteger
UnixFSHAMTDirectoryMaxFanout OptionalInteger
UnixFSHAMTDirectorySizeThreshold OptionalBytes
BatchMaxNodes OptionalInteger
BatchMaxSize OptionalInteger
FastProvideRoot Flag
FastProvideWait Flag
CidVersion OptionalInteger
UnixFSRawLeaves Flag
UnixFSChunker OptionalString
HashFunction OptionalString
UnixFSFileMaxLinks OptionalInteger
UnixFSDirectoryMaxLinks OptionalInteger
UnixFSHAMTDirectoryMaxFanout OptionalInteger
UnixFSHAMTDirectorySizeThreshold OptionalBytes
UnixFSHAMTDirectorySizeEstimation OptionalString // "links", "block", or "disabled"
UnixFSDAGLayout OptionalString // "balanced" or "trickle"
BatchMaxNodes OptionalInteger
BatchMaxSize OptionalInteger
FastProvideRoot Flag
FastProvideWait Flag
}

// ValidateImportConfig validates the Import configuration according to UnixFS spec requirements.
Expand Down Expand Up @@ -129,6 +144,30 @@ func ValidateImportConfig(cfg *Import) error {
}
}

// Validate UnixFSHAMTDirectorySizeEstimation
if !cfg.UnixFSHAMTDirectorySizeEstimation.IsDefault() {
est := cfg.UnixFSHAMTDirectorySizeEstimation.WithDefault(DefaultUnixFSHAMTDirectorySizeEstimation)
switch est {
case HAMTSizeEstimationLinks, HAMTSizeEstimationBlock, HAMTSizeEstimationDisabled:
// valid
default:
return fmt.Errorf("Import.UnixFSHAMTDirectorySizeEstimation must be %q, %q, or %q, got %q",
HAMTSizeEstimationLinks, HAMTSizeEstimationBlock, HAMTSizeEstimationDisabled, est)
}
}

// Validate UnixFSDAGLayout
if !cfg.UnixFSDAGLayout.IsDefault() {
layout := cfg.UnixFSDAGLayout.WithDefault(DefaultUnixFSDAGLayout)
switch layout {
case DAGLayoutBalanced, DAGLayoutTrickle:
// valid
default:
return fmt.Errorf("Import.UnixFSDAGLayout must be %q or %q, got %q",
DAGLayoutBalanced, DAGLayoutTrickle, layout)
}
}

return nil
}

Expand All @@ -144,8 +183,7 @@ func isValidChunker(chunker string) bool {
}

// Check for size-<bytes> format
if strings.HasPrefix(chunker, "size-") {
sizeStr := strings.TrimPrefix(chunker, "size-")
if sizeStr, ok := strings.CutPrefix(chunker, "size-"); ok {
if sizeStr == "" {
return false
}
Expand All @@ -167,7 +205,7 @@ func isValidChunker(chunker string) bool {

// Parse and validate min, avg, max values
values := make([]int, 3)
for i := 0; i < 3; i++ {
for i := range 3 {
val, err := strconv.Atoi(parts[i+1])
if err != nil {
return false
Expand All @@ -182,3 +220,17 @@ func isValidChunker(chunker string) bool {

return false
}

// HAMTSizeEstimationMode returns the boxo SizeEstimationMode based on the config value.
func (i *Import) HAMTSizeEstimationMode() io.SizeEstimationMode {
switch i.UnixFSHAMTDirectorySizeEstimation.WithDefault(DefaultUnixFSHAMTDirectorySizeEstimation) {
case HAMTSizeEstimationLinks:
return io.SizeEstimationLinks
case HAMTSizeEstimationBlock:
return io.SizeEstimationBlock
case HAMTSizeEstimationDisabled:
return io.SizeEstimationDisabled
default:
return io.SizeEstimationLinks
}
}
102 changes: 102 additions & 0 deletions config/import_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"strings"
"testing"

"github.com/ipfs/boxo/ipld/unixfs/io"
mh "github.com/multiformats/go-multihash"
)

Expand Down Expand Up @@ -406,3 +407,104 @@ func TestIsPowerOfTwo(t *testing.T) {
})
}
}

func TestValidateImportConfig_HAMTSizeEstimation(t *testing.T) {
tests := []struct {
name string
value string
wantErr bool
errMsg string
}{
{name: "valid links", value: HAMTSizeEstimationLinks, wantErr: false},
{name: "valid block", value: HAMTSizeEstimationBlock, wantErr: false},
{name: "valid disabled", value: HAMTSizeEstimationDisabled, wantErr: false},
{name: "invalid unknown", value: "unknown", wantErr: true, errMsg: "must be"},
{name: "invalid empty", value: "", wantErr: true, errMsg: "must be"},
{name: "invalid typo", value: "link", wantErr: true, errMsg: "must be"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := &Import{
UnixFSHAMTDirectorySizeEstimation: *NewOptionalString(tt.value),
}

err := ValidateImportConfig(cfg)

if tt.wantErr {
if err == nil {
t.Errorf("expected error for value=%q, got nil", tt.value)
} else if tt.errMsg != "" && !strings.Contains(err.Error(), tt.errMsg) {
t.Errorf("error = %v, want error containing %q", err, tt.errMsg)
}
} else {
if err != nil {
t.Errorf("unexpected error for value=%q: %v", tt.value, err)
}
}
})
}
}

func TestValidateImportConfig_DAGLayout(t *testing.T) {
tests := []struct {
name string
value string
wantErr bool
errMsg string
}{
{name: "valid balanced", value: DAGLayoutBalanced, wantErr: false},
{name: "valid trickle", value: DAGLayoutTrickle, wantErr: false},
{name: "invalid unknown", value: "unknown", wantErr: true, errMsg: "must be"},
{name: "invalid empty", value: "", wantErr: true, errMsg: "must be"},
{name: "invalid flat", value: "flat", wantErr: true, errMsg: "must be"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := &Import{
UnixFSDAGLayout: *NewOptionalString(tt.value),
}

err := ValidateImportConfig(cfg)

if tt.wantErr {
if err == nil {
t.Errorf("expected error for value=%q, got nil", tt.value)
} else if tt.errMsg != "" && !strings.Contains(err.Error(), tt.errMsg) {
t.Errorf("error = %v, want error containing %q", err, tt.errMsg)
}
} else {
if err != nil {
t.Errorf("unexpected error for value=%q: %v", tt.value, err)
}
}
})
}
}

func TestImport_HAMTSizeEstimationMode(t *testing.T) {
tests := []struct {
cfg string
want io.SizeEstimationMode
}{
{HAMTSizeEstimationLinks, io.SizeEstimationLinks},
{HAMTSizeEstimationBlock, io.SizeEstimationBlock},
{HAMTSizeEstimationDisabled, io.SizeEstimationDisabled},
{"", io.SizeEstimationLinks}, // default (unset returns default)
{"unknown", io.SizeEstimationLinks}, // fallback to default
}

for _, tt := range tests {
t.Run(tt.cfg, func(t *testing.T) {
var imp Import
if tt.cfg != "" {
imp.UnixFSHAMTDirectorySizeEstimation = *NewOptionalString(tt.cfg)
}
got := imp.HAMTSizeEstimationMode()
if got != tt.want {
t.Errorf("Import.HAMTSizeEstimationMode() with %q = %v, want %v", tt.cfg, got, tt.want)
}
})
}
}
63 changes: 33 additions & 30 deletions config/profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,45 +312,33 @@ fetching may be degraded.
return nil
},
},
"unixfs-v0-2015": {
Description: `Legacy UnixFS import profile for backward-compatible CID generation.
Produces CIDv0 with no raw leaves, sha2-256, 256 KiB chunks, and
link-based HAMT size estimation. Use only when legacy CIDs are required.
See https://github.com/ipfs/specs/pull/499. Alias: legacy-cid-v0`,
Transform: applyUnixFSv02015,
},
"legacy-cid-v0": {
Description: `Makes UnixFS import produce legacy CIDv0 with no raw leaves, sha2-256 and 256 KiB chunks. This is likely the least optimal preset, use only if legacy behavior is required.`,
Transform: func(c *Config) error {
c.Import.CidVersion = *NewOptionalInteger(0)
c.Import.UnixFSRawLeaves = False
c.Import.UnixFSChunker = *NewOptionalString("size-262144")
c.Import.HashFunction = *NewOptionalString("sha2-256")
c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(174)
c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0)
c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256)
c.Import.UnixFSHAMTDirectorySizeThreshold = *NewOptionalBytes("256KiB")
return nil
},
Description: `Alias for unixfs-v0-2015 profile.`,
Transform: applyUnixFSv02015,
},
"test-cid-v1": {
Description: `Makes UnixFS import produce CIDv1 with raw leaves, sha2-256 and 1 MiB chunks (max 174 links per file, 256 per HAMT node, switch dir to HAMT above 256KiB).`,
"unixfs-v1-2025": {
Description: `Recommended UnixFS import profile for cross-implementation CID determinism.
Uses CIDv1, raw leaves, sha2-256, 1 MiB chunks, 1024 links per file node,
256 HAMT fanout, and block-based size estimation for HAMT threshold.
See https://github.com/ipfs/specs/pull/499`,
Transform: func(c *Config) error {
c.Import.CidVersion = *NewOptionalInteger(1)
c.Import.UnixFSRawLeaves = True
c.Import.UnixFSChunker = *NewOptionalString("size-1048576")
c.Import.UnixFSChunker = *NewOptionalString("size-1048576") // 1 MiB
c.Import.HashFunction = *NewOptionalString("sha2-256")
c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(174)
c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(1024)
c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0)
c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256)
c.Import.UnixFSHAMTDirectorySizeThreshold = *NewOptionalBytes("256KiB")
return nil
},
},
"test-cid-v1-wide": {
Description: `Makes UnixFS import produce CIDv1 with raw leaves, sha2-256 and 1MiB chunks and wider file DAGs (max 1024 links per every node type, switch dir to HAMT above 1MiB).`,
Transform: func(c *Config) error {
c.Import.CidVersion = *NewOptionalInteger(1)
c.Import.UnixFSRawLeaves = True
c.Import.UnixFSChunker = *NewOptionalString("size-1048576") // 1MiB
c.Import.HashFunction = *NewOptionalString("sha2-256")
c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(1024)
c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0) // no limit here, use size-based Import.UnixFSHAMTDirectorySizeThreshold instead
c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(1024)
c.Import.UnixFSHAMTDirectorySizeThreshold = *NewOptionalBytes("1MiB") // 1MiB
c.Import.UnixFSHAMTDirectorySizeEstimation = *NewOptionalString(HAMTSizeEstimationBlock)
c.Import.UnixFSDAGLayout = *NewOptionalString(DAGLayoutBalanced)
return nil
},
},
Expand Down Expand Up @@ -435,3 +423,18 @@ func mapKeys(m map[string]struct{}) []string {
}
return out
}

// applyUnixFSv02015 applies the legacy UnixFS v0 (2015) import settings.
func applyUnixFSv02015(c *Config) error {
c.Import.CidVersion = *NewOptionalInteger(0)
c.Import.UnixFSRawLeaves = False
c.Import.UnixFSChunker = *NewOptionalString("size-262144") // 256 KiB
c.Import.HashFunction = *NewOptionalString("sha2-256")
c.Import.UnixFSFileMaxLinks = *NewOptionalInteger(174)
c.Import.UnixFSDirectoryMaxLinks = *NewOptionalInteger(0)
c.Import.UnixFSHAMTDirectoryMaxFanout = *NewOptionalInteger(256)
c.Import.UnixFSHAMTDirectorySizeThreshold = *NewOptionalBytes("256KiB")
c.Import.UnixFSHAMTDirectorySizeEstimation = *NewOptionalString(HAMTSizeEstimationLinks)
c.Import.UnixFSDAGLayout = *NewOptionalString(DAGLayoutBalanced)
return nil
}
Loading
Loading