Skip to content

Commit 0f01f66

Browse files
authored
Merge pull request #137 from docker/fix-blob-url
Fix remote memory estimation
2 parents d8ed374 + f09c4b4 commit 0f01f66

File tree

3 files changed

+27
-16
lines changed

3 files changed

+27
-16
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ go 1.23.7
55
require (
66
github.com/containerd/containerd/v2 v2.0.4
77
github.com/containerd/platforms v1.0.0-rc.1
8-
github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8
8+
github.com/docker/model-distribution v0.0.0-20250822164750-dcd03ba922e7
99
github.com/elastic/go-sysinfo v1.15.3
1010
github.com/google/go-containerregistry v0.20.3
1111
github.com/gpustack/gguf-parser-go v0.14.1

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBi
3838
github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
3939
github.com/docker/docker-credential-helpers v0.8.2 h1:bX3YxiGzFP5sOXWc3bTPEXdEaZSeVMrFgOr3T+zrFAo=
4040
github.com/docker/docker-credential-helpers v0.8.2/go.mod h1:P3ci7E3lwkZg6XiHdRKft1KckHiO9a2rNtyFbZ/ry9M=
41-
github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8 h1:agH5zeO6tf8lHgMcBZxqCFKPuXHM/cA53gdsn895eMI=
42-
github.com/docker/model-distribution v0.0.0-20250813080006-2a983516ebb8/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
41+
github.com/docker/model-distribution v0.0.0-20250822164750-dcd03ba922e7 h1:dOk1UTVMyDHNG4WFS8jnAtfKdPUE3QaMWNvrzRoK/dI=
42+
github.com/docker/model-distribution v0.0.0-20250822164750-dcd03ba922e7/go.mod h1:dThpO9JoG5Px3i+rTluAeZcqLGw8C0qepuEL4gL2o/c=
4343
github.com/elastic/go-sysinfo v1.15.3 h1:W+RnmhKFkqPTCRoFq2VCTmsT4p/fwpo+3gKNQsn1XU0=
4444
github.com/elastic/go-sysinfo v1.15.3/go.mod h1:K/cNrqYTDrSoMh2oDkYEMS2+a72GRxMvNP+GC+vRIlo=
4545
github.com/elastic/go-windows v1.0.2 h1:yoLLsAsV5cfg9FLhZ9EXZ2n2sQFKeDYrHenkcivY4vI=

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
parser "github.com/gpustack/gguf-parser-go"
2020

2121
"github.com/docker/model-distribution/types"
22+
2223
"github.com/docker/model-runner/pkg/diskusage"
2324
"github.com/docker/model-runner/pkg/inference"
2425
"github.com/docker/model-runner/pkg/inference/config"
@@ -308,19 +309,15 @@ func (l *llamaCpp) parseRemoteModel(ctx context.Context, model string) (*parser.
308309
if err != nil {
309310
return nil, types.Config{}, fmt.Errorf("getting layers of model(%s): %w", model, err)
310311
}
311-
var ggufDigest v1.Hash
312-
for _, layer := range layers {
313-
mt, err := layer.MediaType()
314-
if err != nil {
315-
return nil, types.Config{}, fmt.Errorf("getting media type of model(%s) layer: %w", model, err)
316-
}
317-
if mt == types.MediaTypeGGUF {
318-
ggufDigest, err = layer.Digest()
319-
if err != nil {
320-
return nil, types.Config{}, fmt.Errorf("getting digest of GGUF layer for model(%s): %w", model, err)
321-
}
322-
break
323-
}
312+
ggufLayers := getGGUFLayers(layers)
313+
if len(ggufLayers) != 1 {
314+
return nil, types.Config{}, fmt.Errorf(
315+
"remote memory estimation only supported for models with single GGUF layer, found %d layers", len(ggufLayers),
316+
)
317+
}
318+
ggufDigest, err := ggufLayers[0].Digest()
319+
if err != nil {
320+
return nil, types.Config{}, fmt.Errorf("getting digest of GGUF layer for model(%s): %w", model, err)
324321
}
325322
if ggufDigest.String() == "" {
326323
return nil, types.Config{}, fmt.Errorf("model(%s) has no GGUF layer", model)
@@ -344,6 +341,20 @@ func (l *llamaCpp) parseRemoteModel(ctx context.Context, model string) (*parser.
344341
return mdlGguf, config, nil
345342
}
346343

344+
func getGGUFLayers(layers []v1.Layer) []v1.Layer {
345+
var filtered []v1.Layer
346+
for _, layer := range layers {
347+
mt, err := layer.MediaType()
348+
if err != nil {
349+
continue
350+
}
351+
if mt == types.MediaTypeGGUF {
352+
filtered = append(filtered, layer)
353+
}
354+
}
355+
return filtered
356+
}
357+
347358
func (l *llamaCpp) checkGPUSupport(ctx context.Context) bool {
348359
binPath := l.vendoredServerStoragePath
349360
if l.updatedLlamaCpp {

0 commit comments

Comments
 (0)