Skip to content

Commit 7ddea9d

Browse files
authored
Merge pull request #45 from doringeman/df
Add /engines/df
2 parents 105562e + b881521 commit 7ddea9d

File tree

9 files changed

+94
-1
lines changed

9 files changed

+94
-1
lines changed

main.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,12 @@ func main() {
5555
modelManager,
5656
log.WithFields(logrus.Fields{"component": "llama.cpp"}),
5757
llamaServerPath,
58-
func() string { wd, _ := os.Getwd(); return wd }(),
58+
func() string {
59+
wd, _ := os.Getwd()
60+
d := filepath.Join(wd, "updated-inference")
61+
_ = os.MkdirAll(d, 0o755)
62+
return d
63+
}(),
5964
)
6065
if err != nil {
6166
log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)

pkg/diskusage/diskusage.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package diskusage
2+
3+
import (
4+
"io/fs"
5+
"path/filepath"
6+
)
7+
8+
func Size(path string) (float64, error) {
9+
var size int64
10+
err := filepath.WalkDir(path, func(_ string, d fs.DirEntry, err error) error {
11+
if err != nil {
12+
return err
13+
}
14+
if d.Type().IsRegular() {
15+
info, err := d.Info()
16+
if err != nil {
17+
return err
18+
}
19+
size += info.Size()
20+
}
21+
return nil
22+
})
23+
return float64(size), err
24+
}

pkg/inference/backend.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,6 @@ type Backend interface {
6969
Run(ctx context.Context, socket, model string, mode BackendMode) error
7070
// Status returns a description of the backend's state.
7171
Status() string
72+
// GetDiskUsage returns the disk usage of the backend.
73+
GetDiskUsage() (float64, error)
7274
}

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"runtime"
1313
"strconv"
1414

15+
"github.com/docker/model-runner/pkg/diskusage"
1516
"github.com/docker/model-runner/pkg/inference"
1617
"github.com/docker/model-runner/pkg/inference/models"
1718
"github.com/docker/model-runner/pkg/logging"
@@ -199,3 +200,11 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
199200
func (l *llamaCpp) Status() string {
200201
return l.status
201202
}
203+
204+
func (l *llamaCpp) GetDiskUsage() (float64, error) {
205+
size, err := diskusage.Size(l.updatedServerStoragePath)
206+
if err != nil {
207+
return 0, fmt.Errorf("error while getting store size: %v", err)
208+
}
209+
return size, nil
210+
}

pkg/inference/backends/mlx/mlx.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,7 @@ func (m *mlx) Run(ctx context.Context, socket, model string, mode inference.Back
5858
func (m *mlx) Status() string {
5959
return "not running"
6060
}
61+
62+
func (m *mlx) GetDiskUsage() (float64, error) {
63+
return 0, nil
64+
}

pkg/inference/backends/vllm/vllm.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,7 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, mode inference.Bac
5858
func (v *vLLM) Status() string {
5959
return "not running"
6060
}
61+
62+
func (v *vLLM) GetDiskUsage() (float64, error) {
63+
return 0, nil
64+
}

pkg/inference/models/manager.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/docker/model-distribution/distribution"
1515
"github.com/docker/model-distribution/registry"
1616
"github.com/docker/model-distribution/types"
17+
"github.com/docker/model-runner/pkg/diskusage"
1718
"github.com/docker/model-runner/pkg/inference"
1819
"github.com/docker/model-runner/pkg/logging"
1920
"github.com/sirupsen/logrus"
@@ -399,6 +400,21 @@ func (m *Manager) handlePushModel(w http.ResponseWriter, r *http.Request, model
399400
}
400401
}
401402

403+
// GetDiskUsage returns the disk usage of the model store.
404+
func (m *Manager) GetDiskUsage() (float64, error, int) {
405+
if m.distributionClient == nil {
406+
return 0, errors.New("model distribution service unavailable"), http.StatusServiceUnavailable
407+
}
408+
409+
storePath := m.distributionClient.GetStorePath()
410+
size, err := diskusage.Size(storePath)
411+
if err != nil {
412+
return 0, fmt.Errorf("error while getting store size: %v", err), http.StatusInternalServerError
413+
}
414+
415+
return size, nil, http.StatusOK
416+
}
417+
402418
// ServeHTTP implement net/http.Handler.ServeHTTP.
403419
func (m *Manager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
404420
m.router.ServeHTTP(w, r)

pkg/inference/scheduling/api.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,9 @@ type BackendStatus struct {
5555
// LastUsed represents when this (backend, model, mode) tuple was last used
5656
LastUsed time.Time `json:"last_used,omitempty"`
5757
}
58+
59+
// DiskUsage represents the disk usage of the models and default backend.
60+
type DiskUsage struct {
61+
ModelsDiskUsage float64 `json:"models_disk_usage"`
62+
DefaultBackendDiskUsage float64 `json:"default_backend_disk_usage"`
63+
}

pkg/inference/scheduling/scheduler.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ func (s *Scheduler) routeHandlers() map[string]http.HandlerFunc {
8383
}
8484
m["GET "+inference.InferencePrefix+"/status"] = s.GetBackendStatus
8585
m["GET "+inference.InferencePrefix+"/ps"] = s.GetRunningBackends
86+
m["GET "+inference.InferencePrefix+"/df"] = s.GetDiskUsage
8687
return m
8788
}
8889

@@ -266,6 +267,28 @@ func (s *Scheduler) getLoaderStatus() []BackendStatus {
266267
return result
267268
}
268269

270+
func (s *Scheduler) GetDiskUsage(w http.ResponseWriter, _ *http.Request) {
271+
modelsDiskUsage, err, httpCode := s.modelManager.GetDiskUsage()
272+
if err != nil {
273+
http.Error(w, fmt.Sprintf("Failed to get models disk usage: %v", err), httpCode)
274+
return
275+
}
276+
277+
// TODO: Get disk usage for each backend once the backends are implemented.
278+
defaultBackendDiskUsage, err := s.defaultBackend.GetDiskUsage()
279+
if err != nil {
280+
http.Error(w, fmt.Sprintf("Failed to get disk usage for %s: %v", s.defaultBackend.Name(), err), http.StatusInternalServerError)
281+
return
282+
}
283+
284+
diskUsage := DiskUsage{modelsDiskUsage, defaultBackendDiskUsage}
285+
w.Header().Set("Content-Type", "application/json")
286+
if err := json.NewEncoder(w).Encode(diskUsage); err != nil {
287+
http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError)
288+
return
289+
}
290+
}
291+
269292
// ServeHTTP implements net/http.Handler.ServeHTTP.
270293
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
271294
s.router.ServeHTTP(w, r)

0 commit comments

Comments
 (0)