Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
94d5fe1
feat(builder): add support for directory tar archives in artifact cre…
ilopezluna Oct 16, 2025
5041450
feat(packaging): add tests for directory tar archive creation and unp…
ilopezluna Oct 16, 2025
7df30ba
feat(package): add support for packaging directories as tar archives
ilopezluna Oct 16, 2025
91ca308
feat(packaging): implement CreateDirectoryTarArchive function for cre…
ilopezluna Oct 16, 2025
2867491
refactor(unpack): streamline tar extraction by removing temporary fil…
ilopezluna Oct 16, 2025
f39c0e6
feat(packaging): skip symlinks during tar archive creation for securi…
ilopezluna Oct 16, 2025
0d8e66a
make docs
ilopezluna Oct 16, 2025
38faf8c
refactor(package): consolidate cleanup of temporary tar files
ilopezluna Oct 16, 2025
eea47c1
Update pkg/distribution/packaging/dirtar.go
ilopezluna Oct 16, 2025
f9cdf1c
Update pkg/distribution/packaging/dirtar.go
ilopezluna Oct 16, 2025
f5eb167
Update pkg/distribution/internal/bundle/unpack.go
ilopezluna Oct 16, 2025
03a367f
refactor(package): remove temporary file cleanup from tar archive cre…
ilopezluna Oct 16, 2025
8117b06
Merge remote-tracking branch 'origin/package-subfolders' into package…
ilopezluna Oct 16, 2025
66cbf1c
refactor(unpack): change error logging to warning for media type retr…
ilopezluna Oct 16, 2025
bfa9860
remove unnecessary blank line before file copy operation
ilopezluna Oct 16, 2025
5923794
add validation for relative dir-tar paths to prevent directory traversal
ilopezluna Oct 17, 2025
59bb314
Update pkg/distribution/packaging/dirtar.go
ilopezluna Oct 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions cmd/cli/commands/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,18 @@ func newPackagedCmd() *cobra.Command {
}
opts.licensePaths[i] = filepath.Clean(l)
}

// Validate dir-tar paths are relative (not absolute)
for _, dirPath := range opts.dirTarPaths {
if filepath.IsAbs(dirPath) {
return fmt.Errorf(
"dir-tar path must be relative, got absolute path: %s\n\n"+
"See 'docker model package --help' for more information",
dirPath,
)
}
}
Comment on lines +110 to +119
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Absolute-path validation for --dir-tar is duplicated here and again during processing. Consider centralizing this check (e.g., in PreRunE only or a small helper) to avoid drift and keep validation in one place.

Copilot uses AI. Check for mistakes.

return nil
},
RunE: func(cmd *cobra.Command, args []string) error {
Expand All @@ -123,6 +135,7 @@ func newPackagedCmd() *cobra.Command {
c.Flags().StringVar(&opts.safetensorsDir, "safetensors-dir", "", "absolute path to directory containing safetensors files and config")
c.Flags().StringVar(&opts.chatTemplatePath, "chat-template", "", "absolute path to chat template file (must be Jinja format)")
c.Flags().StringArrayVarP(&opts.licensePaths, "license", "l", nil, "absolute path to a license file")
c.Flags().StringArrayVar(&opts.dirTarPaths, "dir-tar", nil, "relative path to directory to package as tar (can be specified multiple times)")
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarify what 'relative' is relative to. Suggest: 'relative path (from --safetensors-dir, or from the GGUF file's directory) to a directory to package as a tar archive; can be specified multiple times'.

Suggested change
c.Flags().StringArrayVar(&opts.dirTarPaths, "dir-tar", nil, "relative path to directory to package as tar (can be specified multiple times)")
c.Flags().StringArrayVar(&opts.dirTarPaths, "dir-tar", nil, "relative path (from --safetensors-dir, or from the GGUF file's directory) to a directory to package as a tar archive; can be specified multiple times")

Copilot uses AI. Check for mistakes.
c.Flags().BoolVar(&opts.push, "push", false, "push to registry (if not set, the model is loaded into the Model Runner content store)")
c.Flags().Uint64Var(&opts.contextSize, "context-size", 0, "context size in tokens")
return c
Expand All @@ -134,6 +147,7 @@ type packageOptions struct {
ggufPath string
safetensorsDir string
licensePaths []string
dirTarPaths []string
push bool
tag string
}
Expand Down Expand Up @@ -213,6 +227,69 @@ func packageModel(cmd *cobra.Command, opts packageOptions) error {
}
}

// Process directory tar archives
var tempDirTarFiles []string
if len(opts.dirTarPaths) > 0 {
// Schedule cleanup of temp tar files
defer func() {
for _, tempFile := range tempDirTarFiles {
os.Remove(tempFile)
}
}()

// Determine base directory for resolving relative paths
var baseDir string
if opts.safetensorsDir != "" {
baseDir = opts.safetensorsDir
} else {
// For GGUF, use the directory containing the GGUF file
baseDir = filepath.Dir(opts.ggufPath)
}

for _, relDirPath := range opts.dirTarPaths {
// Reject absolute paths
if filepath.IsAbs(relDirPath) {
return fmt.Errorf("dir-tar path must be relative: %s", relDirPath)
}
Comment on lines +249 to +253
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] This re-enforces the absolute-path check already performed in PreRunE. To reduce duplication, either rely on the earlier validation or extract a shared helper used by both call sites.

Copilot uses AI. Check for mistakes.

// Resolve the full directory path
fullDirPath := filepath.Join(baseDir, relDirPath)
fullDirPath = filepath.Clean(fullDirPath)

// Verify the resolved path is within baseDir to prevent directory traversal
relPath, err := filepath.Rel(baseDir, fullDirPath)
if err != nil {
return fmt.Errorf("dir-tar path %q could not be validated: %w", relDirPath, err)
}
// Check if the relative path tries to escape the base directory
if relPath == ".." || len(relPath) >= 3 && relPath[:3] == ".."+string(filepath.Separator) {
return fmt.Errorf("dir-tar path %q escapes base directory", relDirPath)
Comment on lines +260 to +266
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The manual prefix check for '..' is a bit brittle to read. Consider using strings.HasPrefix(relPath, ".."+string(filepath.Separator)) or extracting a small helper for clarity and consistency with similar validations.

Copilot uses AI. Check for mistakes.
}

// Verify the directory exists
info, err := os.Stat(fullDirPath)
if err != nil {
return fmt.Errorf("cannot access directory %q (resolved from %q): %w", fullDirPath, relDirPath, err)
}
if !info.IsDir() {
return fmt.Errorf("path %q is not a directory", fullDirPath)
}

cmd.PrintErrf("Creating tar archive for directory %q\n", relDirPath)
tempTarPath, err := packaging.CreateDirectoryTarArchive(fullDirPath)
if err != nil {
return fmt.Errorf("create tar archive for directory %q: %w", relDirPath, err)
}
tempDirTarFiles = append(tempDirTarFiles, tempTarPath)

cmd.PrintErrf("Adding directory tar archive from %q\n", relDirPath)
pkg, err = pkg.WithDirTar(tempTarPath)
if err != nil {
return fmt.Errorf("add directory tar: %w", err)
}
}
}

if opts.push {
cmd.PrintErrln("Pushing model to registry...")
} else {
Expand Down
11 changes: 11 additions & 0 deletions cmd/cli/docs/reference/docker_model_package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ options:
experimentalcli: false
kubernetes: false
swarm: false
- option: dir-tar
value_type: stringArray
default_value: '[]'
description: |
relative path to directory to package as tar (can be specified multiple times)
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarify the base for the relative path. Suggest:\n'relative path (from --safetensors-dir, or from the GGUF file's directory) to a directory to package as a tar archive; can be specified multiple times'.

Suggested change
relative path to directory to package as tar (can be specified multiple times)
relative path (from --safetensors-dir, or from the GGUF file's directory) to a directory to package as a tar archive; can be specified multiple times

Copilot uses AI. Check for mistakes.
deprecated: false
hidden: false
experimental: false
experimentalcli: false
kubernetes: false
swarm: false
- option: gguf
value_type: string
description: absolute path to gguf file
Expand Down
1 change: 1 addition & 0 deletions cmd/cli/docs/reference/model_package.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ When packaging a Safetensors model, --safetensors-dir should point to a director
|:--------------------|:--------------|:--------|:---------------------------------------------------------------------------------------|
| `--chat-template` | `string` | | absolute path to chat template file (must be Jinja format) |
| `--context-size` | `uint64` | `0` | context size in tokens |
| `--dir-tar` | `stringArray` | | relative path to directory to package as tar (can be specified multiple times) |
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The help text doesn't specify what the path is relative to. Suggest: 'relative (to --safetensors-dir, or to the directory containing --gguf) path to a directory to package as a tar archive; can be specified multiple times'.

Suggested change
| `--dir-tar` | `stringArray` | | relative path to directory to package as tar (can be specified multiple times) |
| `--dir-tar` | `stringArray` | | relative (to --safetensors-dir, or to the directory containing --gguf) path to a directory to package as a tar archive; can be specified multiple times |

Copilot uses AI. Check for mistakes.
| `--gguf` | `string` | | absolute path to gguf file |
| `-l`, `--license` | `stringArray` | | absolute path to a license file |
| `--push` | `bool` | | push to registry (if not set, the model is loaded into the Model Runner content store) |
Expand Down
12 changes: 12 additions & 0 deletions pkg/distribution/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,18 @@ func (b *Builder) WithConfigArchive(path string) (*Builder, error) {
}, nil
}

// WithDirTar adds a directory tar archive to the artifact.
// Multiple directory tar archives can be added by calling this method multiple times.
func (b *Builder) WithDirTar(path string) (*Builder, error) {
dirTarLayer, err := partial.NewLayer(path, types.MediaTypeDirTar)
if err != nil {
return nil, fmt.Errorf("dir tar layer from %q: %w", path, err)
}
return &Builder{
model: mutate.AppendLayers(b.model, dirTarLayer),
}, nil
}

Comment on lines +105 to +116
Copy link

Copilot AI Oct 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The method name WithDirTar suggests it accepts a directory path, but it actually expects a tar file path. To avoid confusion for API consumers, either rename to WithDirTarArchive(path string) or accept a directory path and perform the archiving internally.

Copilot uses AI. Check for mistakes.
// Target represents a build target
type Target interface {
Write(context.Context, types.ModelArtifact, io.Writer) error
Expand Down
74 changes: 65 additions & 9 deletions pkg/distribution/internal/bundle/unpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ func Unpack(dir string, model types.Model) (*Bundle, error) {
}
}

// Unpack directory tar archives (can be multiple)
if err := unpackDirTarArchives(bundle, model); err != nil {
return nil, fmt.Errorf("unpack directory tar archives: %w", err)
}

// Always create the runtime config
if err := unpackRuntimeConfig(bundle, model); err != nil {
return nil, fmt.Errorf("add config.json to runtime bundle: %w", err)
Expand Down Expand Up @@ -230,6 +235,52 @@ func unpackConfigArchive(bundle *Bundle, mdl types.Model) error {
return nil
}

func unpackDirTarArchives(bundle *Bundle, mdl types.Model) error {
// Cast to ModelArtifact to access Layers() method
artifact, ok := mdl.(types.ModelArtifact)
if !ok {
// If it's not a ModelArtifact, there are no layers to extract
return nil
}

// Get all layers from the model
layers, err := artifact.Layers()
if err != nil {
return fmt.Errorf("get model layers: %w", err)
}

modelDir := filepath.Join(bundle.dir, ModelSubdir)

// Iterate through layers and extract directory tar archives
for _, layer := range layers {
mediaType, err := layer.MediaType()
if err != nil {
fmt.Printf("Warning: failed to get media type for layer: %v", err)
continue
}

// Check if this is a directory tar layer
if mediaType != types.MediaTypeDirTar {
continue
}

// Get the layer as an uncompressed stream (decompression handled automatically)
uncompressed, err := layer.Uncompressed()
if err != nil {
return fmt.Errorf("get uncompressed layer: %w", err)
}

// Stream directly to tar extraction - no temp file needed
if err := extractTarArchiveFromReader(uncompressed, modelDir); err != nil {
uncompressed.Close()
return fmt.Errorf("extract directory tar archive: %w", err)
}
uncompressed.Close()
}

return nil
}

// validatePathWithinDirectory checks if targetPath is within baseDir to prevent directory traversal attacks.
// It uses filepath.IsLocal() to provide robust security against
// various directory traversal attempts including edge cases like empty paths, ".", "..", symbolic links, etc.
Expand Down Expand Up @@ -264,22 +315,15 @@ func validatePathWithinDirectory(baseDir, targetPath string) error {
return nil
}

func extractTarArchive(archivePath, destDir string) error {
// Open the tar file
file, err := os.Open(archivePath)
if err != nil {
return fmt.Errorf("open tar archive: %w", err)
}
defer file.Close()

func extractTarArchiveFromReader(r io.Reader, destDir string) error {
// Get absolute path of destination directory for security checks
absDestDir, err := filepath.Abs(destDir)
if err != nil {
return fmt.Errorf("get absolute destination path: %w", err)
}

// Create tar reader
tr := tar.NewReader(file)
tr := tar.NewReader(r)

// Extract files
for {
Expand Down Expand Up @@ -328,6 +372,18 @@ func extractTarArchive(archivePath, destDir string) error {
return nil
}

func extractTarArchive(archivePath, destDir string) error {
// Open the tar file
file, err := os.Open(archivePath)
if err != nil {
return fmt.Errorf("open tar archive: %w", err)
}
defer file.Close()

// Delegate to the streaming version
return extractTarArchiveFromReader(file, destDir)
}

// extractFile extracts a single file from the tar reader
func extractFile(tr io.Reader, target string, mode os.FileMode) error {
// Ensure parent directory exists
Expand Down
116 changes: 116 additions & 0 deletions pkg/distribution/packaging/dirtar.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package packaging

import (
"archive/tar"
"fmt"
"io"
"os"
"path/filepath"
)

// CreateDirectoryTarArchive creates a temporary tar archive containing the specified directory
// with its structure preserved. Symlinks encountered in the directory are skipped and will not be included
// in the archive. It returns the path to the temporary tar file and any error encountered.
// The caller is responsible for removing the temporary file when done.
func CreateDirectoryTarArchive(dirPath string) (string, error) {
// Verify directory exists
info, err := os.Stat(dirPath)
if err != nil {
return "", fmt.Errorf("stat directory: %w", err)
}
if !info.IsDir() {
return "", fmt.Errorf("path is not a directory: %s", dirPath)
}

// Create temp file
tmpFile, err := os.CreateTemp("", "dir-tar-*.tar")
if err != nil {
return "", fmt.Errorf("create temp file: %w", err)
}
tmpPath := tmpFile.Name()

// Track success to determine if we should clean up the temp file
shouldKeepTempFile := false
defer func() {
if !shouldKeepTempFile {
os.Remove(tmpPath)
}
}()

// Create tar writer
tw := tar.NewWriter(tmpFile)

// Walk the directory tree
err = filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info == nil {
return fmt.Errorf("nil FileInfo for path: %s", path)
}
// Skip symlinks - they're not needed for model distribution and are
// skipped during extraction for security reasons
if info.Mode()&os.ModeSymlink != 0 {
return nil
}

// Create tar header
header, err := tar.FileInfoHeader(info, "")
if err != nil {
return fmt.Errorf("create tar header for %s: %w", path, err)
}

// Compute relative path from the parent of dirPath
relPath, err := filepath.Rel(filepath.Dir(dirPath), path)
if err != nil {
return fmt.Errorf("compute relative path: %w", err)
}

// Use forward slashes for tar archive paths
header.Name = filepath.ToSlash(relPath)

// Write header
if err := tw.WriteHeader(header); err != nil {
return fmt.Errorf("write tar header: %w", err)
}

// If it's a file, write its contents
if !info.IsDir() {
file, err := os.Open(path)
if err != nil {
return fmt.Errorf("open file %s: %w", path, err)
}

// Copy file contents
if _, err := io.Copy(tw, file); err != nil {
file.Close()
return fmt.Errorf("write tar content for %s: %w", path, err)
}
if err := file.Close(); err != nil {
return fmt.Errorf("close file %s: %w", path, err)
}
}

return nil
})

if err != nil {
tw.Close()
tmpFile.Close()
return "", fmt.Errorf("walk directory: %w", err)
}

// Close tar writer
if err := tw.Close(); err != nil {
tmpFile.Close()
return "", fmt.Errorf("close tar writer: %w", err)
}

// Close temp file
if err := tmpFile.Close(); err != nil {
return "", fmt.Errorf("close temp file: %w", err)
}

shouldKeepTempFile = true
return tmpPath, nil
}
Loading
Loading