Skip to content

Commit dd95023

Browse files
authored
refactor: change regex match to the glob path pattern (#73)
Signed-off-by: chlins <[email protected]>
1 parent f8dabcd commit dd95023

File tree

14 files changed

+255
-270
lines changed

14 files changed

+255
-270
lines changed

docs/getting-started.md

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,20 +46,14 @@ PRECISION bf16
4646
# Model quantization (string), such as awq, gptq, etc.
4747
QUANTIZATION awq
4848

49-
# Specify model configuration file.
49+
# Specify model configuration file, support glob path pattern.
5050
CONFIG config.json
5151

52-
# Specify model configuration file.
52+
# Specify model configuration file, support glob path pattern.
5353
CONFIG generation_config.json
5454

55-
# Model weight.
56-
MODEL \.safetensors$
57-
58-
# Model code.
59-
CODE \.py$
60-
61-
# Model dataset.
62-
DATASET \.csv$
55+
# Model weight, support glob path pattern.
56+
MODEL *.safetensors
6357
```
6458

6559
Then run the following command to build the model artifact:

pkg/backend/build.go

Lines changed: 16 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,12 @@ package backend
1919
import (
2020
"context"
2121
"fmt"
22-
"os"
23-
"path/filepath"
2422

2523
"github.com/CloudNativeAI/modctl/pkg/backend/build"
2624
"github.com/CloudNativeAI/modctl/pkg/backend/processor"
2725
"github.com/CloudNativeAI/modctl/pkg/modelfile"
2826

27+
modelspec "github.com/CloudNativeAI/model-spec/specs-go/v1"
2928
humanize "github.com/dustin/go-humanize"
3029
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
3130
)
@@ -45,7 +44,7 @@ func (b *backend) Build(ctx context.Context, modelfilePath, workDir, target stri
4544

4645
repo, tag := ref.Repository(), ref.Tag()
4746
layers := []ocispec.Descriptor{}
48-
layerDescs, err := b.process(ctx, workDir, repo, getProcessors(modelfile)...)
47+
layerDescs, err := b.process(ctx, workDir, repo, b.getProcessors(modelfile)...)
4948
if err != nil {
5049
return fmt.Errorf("failed to process files: %w", err)
5150
}
@@ -70,64 +69,41 @@ func (b *backend) Build(ctx context.Context, modelfilePath, workDir, target stri
7069
return nil
7170
}
7271

73-
func defaultProcessors() []processor.Processor {
72+
func (b *backend) defaultProcessors() []processor.Processor {
7473
return []processor.Processor{
75-
processor.NewLicenseProcessor(),
76-
processor.NewReadmeProcessor(),
74+
// by default process the readme and license file.
75+
processor.NewReadmeProcessor(b.store, modelspec.MediaTypeModelDoc, []string{"README.md", "README"}),
76+
processor.NewLicenseProcessor(b.store, modelspec.MediaTypeModelDoc, []string{"LICENSE.txt", "LICENSE"}),
7777
}
7878
}
7979

80-
func getProcessors(modelfile modelfile.Modelfile) []processor.Processor {
81-
processors := defaultProcessors()
80+
func (b *backend) getProcessors(modelfile modelfile.Modelfile) []processor.Processor {
81+
processors := b.defaultProcessors()
8282

8383
if configs := modelfile.GetConfigs(); len(configs) > 0 {
84-
processors = append(processors, processor.NewModelConfigProcessor(configs))
84+
processors = append(processors, processor.NewModelConfigProcessor(b.store, modelspec.MediaTypeModelWeightConfig, configs))
8585
}
8686

8787
if models := modelfile.GetModels(); len(models) > 0 {
88-
processors = append(processors, processor.NewModelProcessor(models))
88+
processors = append(processors, processor.NewModelProcessor(b.store, modelspec.MediaTypeModelWeight, models))
8989
}
9090

9191
return processors
9292
}
9393

9494
// process walks the user work directory and process the identified files.
9595
func (b *backend) process(ctx context.Context, workDir string, repo string, processors ...processor.Processor) ([]ocispec.Descriptor, error) {
96-
layers := []ocispec.Descriptor{}
97-
// walk the user work directory and handle the default identified files.
98-
if err := filepath.Walk(workDir, func(path string, info os.FileInfo, err error) error {
96+
descriptors := []ocispec.Descriptor{}
97+
for _, p := range processors {
98+
descs, err := p.Process(ctx, workDir, repo)
9999
if err != nil {
100-
return fmt.Errorf("failed to walk directory: %w", err)
101-
}
102-
// skip directories.
103-
if info.IsDir() {
104-
return nil
105-
}
106-
// get absolute path.
107-
path, err = filepath.Abs(path)
108-
if err != nil {
109-
return fmt.Errorf("failed to get absolute path: %w", err)
110-
}
111-
// fan-in file processors.
112-
for _, p := range processors {
113-
// process the file if it can be recognized.
114-
if p.Identify(ctx, path, info) {
115-
desc, err := p.Process(ctx, b.store, repo, path, workDir)
116-
if err != nil {
117-
return fmt.Errorf("failed to process file: %w", err)
118-
}
119-
120-
fmt.Printf("%-15s => %s (%s)\n", "Built blob", desc.Digest, humanize.IBytes(uint64(desc.Size)))
121-
layers = append(layers, desc)
122-
}
100+
return nil, err
123101
}
124102

125-
return nil
126-
}); err != nil {
127-
return nil, err
103+
descriptors = append(descriptors, descs...)
128104
}
129105

130-
return layers, nil
106+
return descriptors, nil
131107
}
132108

133109
// manifestAnnotation returns the annotations for the manifest.

pkg/backend/build/build.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ import (
3636
)
3737

3838
// BuildLayer converts the file to the image blob and push it to the storage.
39-
func BuildLayer(ctx context.Context, store storage.Storage, mediaType, repo, path, workDir string) (ocispec.Descriptor, error) {
39+
func BuildLayer(ctx context.Context, store storage.Storage, mediaType, workDir, repo, path string) (ocispec.Descriptor, error) {
4040
reader, err := archiver.Tar(path)
4141
if err != nil {
4242
return ocispec.Descriptor{}, fmt.Errorf("failed to tar file: %w", err)

pkg/backend/build_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,12 @@ func TestGetProcessors(t *testing.T) {
2929
modelfile.On("GetConfigs").Return([]string{"config1", "config2"})
3030
modelfile.On("GetModels").Return([]string{"model1", "model2"})
3131

32-
processors := getProcessors(modelfile)
32+
b := &backend{}
33+
processors := b.getProcessors(modelfile)
3334

3435
assert.Len(t, processors, 4)
35-
assert.Equal(t, "license", processors[0].Name())
36-
assert.Equal(t, "readme", processors[1].Name())
36+
assert.Equal(t, "readme", processors[0].Name())
37+
assert.Equal(t, "license", processors[1].Name())
3738
assert.Equal(t, "model_config", processors[2].Name())
3839
assert.Equal(t, "model", processors[3].Name())
3940
}

pkg/backend/processor/base.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright 2025 The CNAI Authors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package processor
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"path/filepath"
23+
24+
"github.com/CloudNativeAI/modctl/pkg/backend/build"
25+
"github.com/CloudNativeAI/modctl/pkg/storage"
26+
27+
humanize "github.com/dustin/go-humanize"
28+
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
29+
)
30+
31+
type base struct {
32+
// store is the underlying storage backend.
33+
store storage.Storage
34+
// mediaType is the media type of the processed content.
35+
mediaType string
36+
// patterns is the list of patterns to match.
37+
patterns []string
38+
}
39+
40+
// Process implements the Processor interface, which can be reused by other processors.
41+
func (b *base) Process(ctx context.Context, workDir, repo string) ([]ocispec.Descriptor, error) {
42+
absWorkDir, err := filepath.Abs(workDir)
43+
if err != nil {
44+
return nil, err
45+
}
46+
47+
var matchedPaths []string
48+
for _, pattern := range b.patterns {
49+
matches, err := filepath.Glob(filepath.Join(absWorkDir, pattern))
50+
if err != nil {
51+
return nil, err
52+
}
53+
54+
matchedPaths = append(matchedPaths, matches...)
55+
}
56+
57+
var descriptors []ocispec.Descriptor
58+
for _, path := range matchedPaths {
59+
desc, err := build.BuildLayer(ctx, b.store, b.mediaType, workDir, repo, path)
60+
if err != nil {
61+
return nil, err
62+
}
63+
64+
fmt.Printf("%-15s => %s (%s)\n", "Built blob", desc.Digest, humanize.IBytes(uint64(desc.Size)))
65+
descriptors = append(descriptors, desc)
66+
}
67+
68+
return descriptors, nil
69+
}

pkg/backend/processor/license.go

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,36 +18,31 @@ package processor
1818

1919
import (
2020
"context"
21-
"os"
2221

23-
"github.com/CloudNativeAI/modctl/pkg/backend/build"
2422
"github.com/CloudNativeAI/modctl/pkg/storage"
25-
modelspec "github.com/CloudNativeAI/model-spec/specs-go/v1"
26-
2723
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
2824
)
2925

3026
// NewLicenseProcessor creates a new LICENSE processor.
31-
func NewLicenseProcessor() Processor {
32-
return &licenseProcessor{}
27+
func NewLicenseProcessor(store storage.Storage, mediaType string, patterns []string) Processor {
28+
return &licenseProcessor{
29+
base: &base{
30+
store: store,
31+
mediaType: mediaType,
32+
patterns: patterns,
33+
},
34+
}
3335
}
3436

3537
// licenseProcessor is the processor to process the LICENSE file.
36-
type licenseProcessor struct{}
38+
type licenseProcessor struct {
39+
base *base
40+
}
3741

3842
func (p *licenseProcessor) Name() string {
3943
return "license"
4044
}
4145

42-
func (p *licenseProcessor) Identify(_ context.Context, path string, info os.FileInfo) bool {
43-
return info.Name() == "LICENSE" || info.Name() == "LICENSE.txt"
44-
}
45-
46-
func (p *licenseProcessor) Process(ctx context.Context, store storage.Storage, repo, path, workDir string) (ocispec.Descriptor, error) {
47-
desc, err := build.BuildLayer(ctx, store, modelspec.MediaTypeModelDoc, repo, path, workDir)
48-
if err != nil {
49-
return ocispec.Descriptor{}, err
50-
}
51-
52-
return desc, nil
46+
func (p *licenseProcessor) Process(ctx context.Context, workDir, repo string) ([]ocispec.Descriptor, error) {
47+
return p.base.Process(ctx, workDir, repo)
5348
}

pkg/backend/processor/license_test.go

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -18,54 +18,52 @@ package processor
1818

1919
import (
2020
"context"
21+
"os"
22+
"path/filepath"
2123
"testing"
22-
"testing/fstest"
2324

2425
"github.com/CloudNativeAI/modctl/test/mocks/storage"
2526
modelspec "github.com/CloudNativeAI/model-spec/specs-go/v1"
2627

2728
"github.com/stretchr/testify/assert"
2829
"github.com/stretchr/testify/mock"
30+
"github.com/stretchr/testify/suite"
2931
)
3032

31-
func TestLicenseProcessor_Name(t *testing.T) {
32-
p := NewLicenseProcessor()
33-
assert.Equal(t, "license", p.Name())
33+
type licenseProcessorSuite struct {
34+
suite.Suite
35+
mockStore *storage.Storage
36+
processor Processor
37+
workDir string
3438
}
3539

36-
func TestLicenseProcessor_Identify(t *testing.T) {
37-
p := NewLicenseProcessor()
38-
mockFS := fstest.MapFS{
39-
"LICENSE": &fstest.MapFile{},
40-
"LICENSE.txt": &fstest.MapFile{},
41-
"README.md": &fstest.MapFile{},
40+
func (s *licenseProcessorSuite) SetupTest() {
41+
s.mockStore = &storage.Storage{}
42+
s.processor = NewLicenseProcessor(s.mockStore, modelspec.MediaTypeModelDoc, []string{"LICENSE"})
43+
// generate test files for prorcess.
44+
s.workDir = s.Suite.T().TempDir()
45+
if err := os.WriteFile(filepath.Join(s.workDir, "LICENSE"), []byte(""), 0644); err != nil {
46+
s.Suite.T().Fatal(err)
4247
}
43-
info, err := mockFS.Stat("LICENSE")
44-
assert.NoError(t, err)
45-
assert.True(t, p.Identify(context.Background(), "LICENSE", info))
46-
47-
info, err = mockFS.Stat("LICENSE.txt")
48-
assert.NoError(t, err)
49-
assert.True(t, p.Identify(context.Background(), "LICENSE.txt", info))
48+
}
5049

51-
info, err = mockFS.Stat("README.md")
52-
assert.NoError(t, err)
53-
assert.False(t, p.Identify(context.Background(), "README.md", info))
50+
func (s *licenseProcessorSuite) TestName() {
51+
assert.Equal(s.Suite.T(), "license", s.processor.Name())
5452
}
5553

56-
func TestLicenseProcessor_Process(t *testing.T) {
57-
p := NewLicenseProcessor()
54+
func (s *licenseProcessorSuite) TestProcess() {
5855
ctx := context.Background()
59-
mockStore := &storage.Storage{}
6056
repo := "test-repo"
61-
path := "/tmp/LICENSE"
57+
s.mockStore.On("PushBlob", ctx, repo, mock.Anything).Return("sha256:1234567890abcdef", int64(1024), nil)
6258

63-
mockStore.On("PushBlob", ctx, repo, mock.Anything).Return("sha256:1234567890abcdef", int64(1024), nil)
59+
desc, err := s.processor.Process(ctx, s.workDir, repo)
60+
assert.NoError(s.Suite.T(), err)
61+
assert.NotNil(s.Suite.T(), desc)
62+
assert.Equal(s.Suite.T(), "sha256:1234567890abcdef", desc[0].Digest.String())
63+
assert.Equal(s.Suite.T(), int64(1024), desc[0].Size)
64+
assert.Equal(s.Suite.T(), "LICENSE", desc[0].Annotations[modelspec.AnnotationFilepath])
65+
}
6466

65-
desc, err := p.Process(ctx, mockStore, repo, path, "/tmp")
66-
assert.NoError(t, err)
67-
assert.NotNil(t, desc)
68-
assert.Equal(t, "sha256:1234567890abcdef", desc.Digest.String())
69-
assert.Equal(t, int64(1024), desc.Size)
70-
assert.Equal(t, "LICENSE", desc.Annotations[modelspec.AnnotationFilepath])
67+
func TestLicenseProcessorSuite(t *testing.T) {
68+
suite.Run(t, new(licenseProcessorSuite))
7169
}

0 commit comments

Comments
 (0)