Skip to content

Commit 6d49d3d

Browse files
authored
feat: introduce the DOC instruction (#92)
Signed-off-by: chlins <[email protected]>
1 parent db16e4c commit 6d49d3d

File tree

13 files changed

+171
-170
lines changed

13 files changed

+171
-170
lines changed

docs/getting-started.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ MODEL *.safetensors
5858
# Specify code, support glob path pattern.
5959
CODE *.py
6060

61+
# Specify documentation, support glob path pattern.
62+
DOC *.md
63+
6164
```
6265

6366
Then run the following command to build the model artifact:
@@ -110,8 +113,8 @@ Delete the model artifact in the local storage:
110113
$ modctl rm registry.com/models/llama3:v1.0.0
111114
```
112115

113-
Finally, you can use `purge` command to to remove all unnecessary blobs to free up the storage space:
116+
Finally, you can use `prune` command to remove all unnecessary blobs to free up the storage space:
114117

115118
```shell
116-
$ modctl purge
119+
$ modctl prune
117120
```

pkg/backend/build.go

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,8 @@ func (b *backend) Build(ctx context.Context, modelfilePath, workDir, target stri
7373
return nil
7474
}
7575

76-
func (b *backend) defaultProcessors() []processor.Processor {
77-
return []processor.Processor{
78-
// by default process the readme and license file.
79-
processor.NewReadmeProcessor(b.store, modelspec.MediaTypeModelDoc, []string{"README.md", "README"}),
80-
processor.NewLicenseProcessor(b.store, modelspec.MediaTypeModelDoc, []string{"LICENSE.txt", "LICENSE"}),
81-
}
82-
}
83-
8476
func (b *backend) getProcessors(modelfile modelfile.Modelfile) []processor.Processor {
85-
processors := b.defaultProcessors()
77+
processors := []processor.Processor{}
8678

8779
if configs := modelfile.GetConfigs(); len(configs) > 0 {
8880
processors = append(processors, processor.NewModelConfigProcessor(b.store, modelspec.MediaTypeModelWeightConfig, configs))
@@ -96,6 +88,10 @@ func (b *backend) getProcessors(modelfile modelfile.Modelfile) []processor.Proce
9688
processors = append(processors, processor.NewCodeProcessor(b.store, modelspec.MediaTypeModelCode, codes))
9789
}
9890

91+
if docs := modelfile.GetDocs(); len(docs) > 0 {
92+
processors = append(processors, processor.NewDocProcessor(b.store, modelspec.MediaTypeModelDoc, docs))
93+
}
94+
9995
return processors
10096
}
10197

pkg/backend/build_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ func TestGetProcessors(t *testing.T) {
2929
modelfile.On("GetConfigs").Return([]string{"config1", "config2"})
3030
modelfile.On("GetModels").Return([]string{"model1", "model2"})
3131
modelfile.On("GetCodes").Return([]string{"1.py", "2.py"})
32+
modelfile.On("GetDocs").Return([]string{"doc1", "doc2"})
3233

3334
b := &backend{}
3435
processors := b.getProcessors(modelfile)
3536

36-
assert.Len(t, processors, 5)
37-
assert.Equal(t, "readme", processors[0].Name())
38-
assert.Equal(t, "license", processors[1].Name())
39-
assert.Equal(t, "model_config", processors[2].Name())
40-
assert.Equal(t, "model", processors[3].Name())
41-
assert.Equal(t, "code", processors[4].Name())
37+
assert.Len(t, processors, 4)
38+
assert.Equal(t, "model_config", processors[0].Name())
39+
assert.Equal(t, "model", processors[1].Name())
40+
assert.Equal(t, "code", processors[2].Name())
41+
assert.Equal(t, "doc", processors[3].Name())
4242
}

pkg/backend/processor/base.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"path/filepath"
23+
"sort"
2324

2425
"github.com/CloudNativeAI/modctl/pkg/backend/build"
2526
"github.com/CloudNativeAI/modctl/pkg/storage"
@@ -54,6 +55,8 @@ func (b *base) Process(ctx context.Context, workDir, repo string) ([]ocispec.Des
5455
matchedPaths = append(matchedPaths, matches...)
5556
}
5657

58+
sort.Strings(matchedPaths)
59+
5760
var descriptors []ocispec.Descriptor
5861
for _, path := range matchedPaths {
5962
desc, err := build.BuildLayer(ctx, b.store, b.mediaType, workDir, repo, path)
Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,12 @@ import (
2020
"context"
2121

2222
"github.com/CloudNativeAI/modctl/pkg/storage"
23-
2423
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
2524
)
2625

27-
// NewReadmeProcessor creates a new README processor.
28-
func NewReadmeProcessor(store storage.Storage, mediaType string, patterns []string) Processor {
29-
return &readmeProcessor{
26+
// NewDocProcessor creates a new doc processor.
27+
func NewDocProcessor(store storage.Storage, mediaType string, patterns []string) Processor {
28+
return &docProcessor{
3029
base: &base{
3130
store: store,
3231
mediaType: mediaType,
@@ -35,15 +34,15 @@ func NewReadmeProcessor(store storage.Storage, mediaType string, patterns []stri
3534
}
3635
}
3736

38-
// readmeProcessor is the processor to process the README file.
39-
type readmeProcessor struct {
37+
// docProcessor is the processor to process the doc file.
38+
type docProcessor struct {
4039
base *base
4140
}
4241

43-
func (p *readmeProcessor) Name() string {
44-
return "readme"
42+
func (p *docProcessor) Name() string {
43+
return "doc"
4544
}
4645

47-
func (p *readmeProcessor) Process(ctx context.Context, workDir, repo string) ([]ocispec.Descriptor, error) {
46+
func (p *docProcessor) Process(ctx context.Context, workDir, repo string) ([]ocispec.Descriptor, error) {
4847
return p.base.Process(ctx, workDir, repo)
4948
}
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,28 @@ import (
3030
"github.com/stretchr/testify/suite"
3131
)
3232

33-
type licenseProcessorSuite struct {
33+
type docProcessorSuite struct {
3434
suite.Suite
3535
mockStore *storage.Storage
3636
processor Processor
3737
workDir string
3838
}
3939

40-
func (s *licenseProcessorSuite) SetupTest() {
40+
func (s *docProcessorSuite) SetupTest() {
4141
s.mockStore = &storage.Storage{}
42-
s.processor = NewLicenseProcessor(s.mockStore, modelspec.MediaTypeModelDoc, []string{"LICENSE"})
42+
s.processor = NewDocProcessor(s.mockStore, modelspec.MediaTypeModelDoc, []string{"LICENSE"})
4343
// generate test files for prorcess.
4444
s.workDir = s.Suite.T().TempDir()
4545
if err := os.WriteFile(filepath.Join(s.workDir, "LICENSE"), []byte(""), 0644); err != nil {
4646
s.Suite.T().Fatal(err)
4747
}
4848
}
4949

50-
func (s *licenseProcessorSuite) TestName() {
51-
assert.Equal(s.Suite.T(), "license", s.processor.Name())
50+
func (s *docProcessorSuite) TestName() {
51+
assert.Equal(s.Suite.T(), "doc", s.processor.Name())
5252
}
5353

54-
func (s *licenseProcessorSuite) TestProcess() {
54+
func (s *docProcessorSuite) TestProcess() {
5555
ctx := context.Background()
5656
repo := "test-repo"
5757
s.mockStore.On("PushBlob", ctx, repo, mock.Anything).Return("sha256:1234567890abcdef", int64(1024), nil)
@@ -64,6 +64,6 @@ func (s *licenseProcessorSuite) TestProcess() {
6464
assert.Equal(s.Suite.T(), "LICENSE", desc[0].Annotations[modelspec.AnnotationFilepath])
6565
}
6666

67-
func TestLicenseProcessorSuite(t *testing.T) {
68-
suite.Run(t, new(licenseProcessorSuite))
67+
func TestDocProcessorSuite(t *testing.T) {
68+
suite.Run(t, new(docProcessorSuite))
6969
}

pkg/backend/processor/license.go

Lines changed: 0 additions & 48 deletions
This file was deleted.

pkg/backend/processor/readme_test.go

Lines changed: 0 additions & 69 deletions
This file was deleted.

pkg/modelfile/command/command.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ const (
4545
// package, and each dataset file will be a layer.
4646
DATASET = "dataset"
4747

48+
// DOC is the command to set the documentation file path. The value of this commands
49+
// is the glob of the documentation file path to match the documentation file name.
50+
// The DOC command can be used multiple times in a modelfile, it will scan
51+
// the documentation file path by the glob and copy each documentation file to the artifact
52+
// package, and each documentation file will be a layer.
53+
DOC = "doc"
54+
4855
// NAME is the command to set the model name, such as llama3-8b-instruct, gpt2-xl,
4956
// qwen2-vl-72b-instruct, etc.
5057
NAME = "name"
@@ -75,6 +82,7 @@ var Commands = []string{
7582
MODEL,
7683
CODE,
7784
DATASET,
85+
DOC,
7886
NAME,
7987
ARCH,
8088
FAMILY,

0 commit comments

Comments
 (0)