Skip to content

Commit 024b41d

Browse files
parkanArkadiy Kukarkin
andauthored
Feat/allow small pieces (#479)
feat: allow configurable min-piece-size to reduce excessive padding for small segments This change introduces support for specifying a `--min-piece-size` when preparing data, improving behavior for small DAGs, remainder CARs, and small preparations that would otherwise be padded to the full target piece size (e.g. 32GiB). Such excessive padding leads to inefficiencies and causes sectors to be rejected by Storage Providers or fail verified deal requirements. ### Key changes: - Add support for `--min-piece-size` (default: 256B, subject to adjustment) - Pass both `min` and `target` piece sizes to `GetCommp`, enabling finer control over padding - Retain power-of-2 padding via `target size`, but allow flexibility by setting it to `0` This helps avoid generating 90%+ padding pieces and reduces transfer times in many cases. ### Notes: - Default behavior remains unchanged if `--min-piece-size` is not set - Full support for non-padded pieces now depends on both chunker accuracy and downstream deal acceptance - `pieceType` is now tracked in metadata (e.g., data vs. DAG) ### Out of scope: - No cross-preparation aggregation; that responsibility remains with SPs - Edge cases like aggregating under-1MiB pieces are not yet solved Closes #473 Co-authored-by: Arkadiy Kukarkin <[email protected]>
1 parent a678f14 commit 024b41d

File tree

22 files changed

+848
-15
lines changed

22 files changed

+848
-15
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM golang:1.23.6-bullseye as builder
1+
FROM golang:1.23.6-bullseye AS builder
22
WORKDIR /app
33
COPY go.* ./
44
RUN go mod download

client/swagger/models/dataprep_create_request.go

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client/swagger/models/model_car.go

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client/swagger/models/model_preparation.go

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/api_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ func TestBasicDataPrep(t *testing.T) {
263263
require.True(t, listPiecesResp.IsSuccess())
264264
require.Len(t, listPiecesResp.Payload, 1)
265265
require.Len(t, listPiecesResp.Payload[0].Pieces, 1)
266-
require.Equal(t, "baga6ea4seaqoahdvfwkrp64ecsxbjvyuqcwpz3o7ctxrjanlv2x4u2cq2qjf2ji", listPiecesResp.Payload[0].Pieces[0].PieceCid)
266+
require.Equal(t, "baga6ea4seaqhmks2wnochilik4updmit54agfi5mjf6r7ehotu36ksdp46uxahi", listPiecesResp.Payload[0].Pieces[0].PieceCid)
267267
// Start daggen
268268
startDagGenResp, err := client.Job.StartDagGen(&job.StartDagGenParams{
269269
ID: "prep",
@@ -285,7 +285,9 @@ func TestBasicDataPrep(t *testing.T) {
285285
require.True(t, listPiecesResp.IsSuccess())
286286
require.Len(t, listPiecesResp.Payload, 1)
287287
require.Len(t, listPiecesResp.Payload[0].Pieces, 2)
288-
require.Equal(t, "baga6ea4seaqoahdvfwkrp64ecsxbjvyuqcwpz3o7ctxrjanlv2x4u2cq2qjf2ji", listPiecesResp.Payload[0].Pieces[0].PieceCid)
289-
require.Equal(t, "baga6ea4seaqbkouoyih2elxfrztq3gr23rpvgpx5e3fnud2rhvvzf4b7tneeyki", listPiecesResp.Payload[0].Pieces[1].PieceCid)
288+
// data piece, full size
289+
require.Equal(t, "baga6ea4seaqhmks2wnochilik4updmit54agfi5mjf6r7ehotu36ksdp46uxahi", listPiecesResp.Payload[0].Pieces[0].PieceCid)
290+
// dag piece, min piece size
291+
require.Equal(t, "baga6ea4seaqfoo2k3wmwp7gvxnc7hbjpb7ovtvt52tehwfvzxbreljcebbnwgiq", listPiecesResp.Payload[0].Pieces[1].PieceCid)
290292
})
291293
}

cmd/dataprep/create.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ var CreateCmd = &cli.Command{
5555
Value: "",
5656
DefaultText: "Determined by --max-size",
5757
},
58+
&cli.StringFlag{
59+
Name: "min-piece-size",
60+
Usage: "The minimum size of a piece. Pieces smaller than this will be padded up to this size. It's recommended to leave this as the default",
61+
Value: "1MiB",
62+
DefaultText: "1MiB",
63+
},
5864
&cli.BoolFlag{
5965
Name: "delete-after-export",
6066
Usage: "Whether to delete the source files after export to CAR files",
@@ -83,6 +89,7 @@ var CreateCmd = &cli.Command{
8389
outputStorages := c.StringSlice("output")
8490
maxSizeStr := c.String("max-size")
8591
pieceSizeStr := c.String("piece-size")
92+
minPieceSizeStr := c.String("min-piece-size")
8693
for _, sourcePath := range c.StringSlice("local-source") {
8794
source, err := createStorageIfNotExist(c.Context, db, sourcePath)
8895
if err != nil {
@@ -103,8 +110,9 @@ var CreateCmd = &cli.Command{
103110
OutputStorages: outputStorages,
104111
MaxSizeStr: maxSizeStr,
105112
PieceSizeStr: pieceSizeStr,
106-
DeleteAfterExport: c.Bool("delete-after-export"),
113+
MinPieceSizeStr: minPieceSizeStr,
107114
Name: name,
115+
DeleteAfterExport: c.Bool("delete-after-export"),
108116
NoInline: c.Bool("no-inline"),
109117
NoDag: c.Bool("no-dag"),
110118
})

cmd/functional_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,10 @@ func TestDataPrep(t *testing.T) {
451451
require.Equal(t, pieceCID, calculatedPieceCID)
452452
err = os.WriteFile(filepath.Join(downloadDir, pieceCID+".car"), downloaded, 0777)
453453
require.NoError(t, err)
454+
455+
// Verify piece size is a power of two
456+
pieceSize := uint64(len(downloaded))
457+
require.True(t, util.IsPowerOfTwo(pieceSize), "piece size %d is not a power of two", pieceSize)
454458
}
455459

456460
// Download all pieces using local download server
@@ -499,7 +503,7 @@ func TestNoDuplicatedOutput(t *testing.T) {
499503
_, _, err = runner.Run(ctx, fmt.Sprintf("singularity storage create local --name source --path %s", testutil.EscapePath(source)))
500504
require.NoError(t, err)
501505

502-
_, _, err = runner.Run(ctx, fmt.Sprintf("singularity prep create --name test-prep --delete-after-export --source source --local-output %s --max-size=500KiB", testutil.EscapePath(output)))
506+
_, _, err = runner.Run(ctx, fmt.Sprintf("singularity prep create --name test-prep --delete-after-export --source source --local-output %s --max-size=500KiB --min-piece-size=256KiB", testutil.EscapePath(output)))
503507
require.NoError(t, err)
504508

505509
// Start scanning

docs/en/cli-reference/prep/create.md

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/swagger/docs.go

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/swagger/swagger.json

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)