Skip to content

Commit 105562e

Browse files
authored
Merge pull request #42 from doringeman/ps
Add /engines/ps
2 parents e6fd394 + 13c093c commit 105562e

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed

pkg/inference/scheduling/api.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package scheduling
22

33
import (
44
"strings"
5+
"time"
56

67
"github.com/docker/model-runner/pkg/inference"
78
)
@@ -42,3 +43,15 @@ type OpenAIInferenceRequest struct {
4243
// Model is the requested model name.
4344
Model string `json:"model"`
4445
}
46+
47+
// BackendStatus represents information about a running backend
48+
type BackendStatus struct {
49+
// BackendName is the name of the backend
50+
BackendName string `json:"backend_name"`
51+
// ModelName is the name of the model loaded in the backend
52+
ModelName string `json:"model_name"`
53+
// Mode is the mode the backend is operating in
54+
Mode string `json:"mode"`
55+
// LastUsed represents when this (backend, model, mode) tuple was last used
56+
LastUsed time.Time `json:"last_used,omitempty"`
57+
}

pkg/inference/scheduling/scheduler.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"fmt"
99
"io"
1010
"net/http"
11+
"time"
1112

1213
"github.com/docker/model-distribution/distribution"
1314
"github.com/docker/model-runner/pkg/inference"
@@ -81,6 +82,7 @@ func (s *Scheduler) routeHandlers() map[string]http.HandlerFunc {
8182
m[route] = s.handleOpenAIInference
8283
}
8384
m["GET "+inference.InferencePrefix+"/status"] = s.GetBackendStatus
85+
m["GET "+inference.InferencePrefix+"/ps"] = s.GetRunningBackends
8486
return m
8587
}
8688

@@ -224,6 +226,46 @@ func (s *Scheduler) ResetInstaller(httpClient *http.Client) {
224226
s.installer = newInstaller(s.log, s.backends, httpClient)
225227
}
226228

229+
// GetRunningBackends returns information about all running backends
230+
func (s *Scheduler) GetRunningBackends(w http.ResponseWriter, r *http.Request) {
231+
runningBackends := s.getLoaderStatus()
232+
233+
w.Header().Set("Content-Type", "application/json")
234+
if err := json.NewEncoder(w).Encode(runningBackends); err != nil {
235+
http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError)
236+
return
237+
}
238+
}
239+
240+
// getLoaderStatus returns information about all running backends managed by the loader
241+
func (s *Scheduler) getLoaderStatus() []BackendStatus {
242+
if !s.loader.lock(context.Background()) {
243+
return []BackendStatus{}
244+
}
245+
defer s.loader.unlock()
246+
247+
result := make([]BackendStatus, 0, len(s.loader.runners))
248+
249+
for key, slot := range s.loader.runners {
250+
if s.loader.slots[slot] != nil {
251+
status := BackendStatus{
252+
BackendName: key.backend,
253+
ModelName: key.model,
254+
Mode: key.mode.String(),
255+
LastUsed: time.Time{},
256+
}
257+
258+
if s.loader.references[slot] == 0 {
259+
status.LastUsed = s.loader.timestamps[slot]
260+
}
261+
262+
result = append(result, status)
263+
}
264+
}
265+
266+
return result
267+
}
268+
227269
// ServeHTTP implements net/http.Handler.ServeHTTP.
228270
func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
229271
s.router.ServeHTTP(w, r)

0 commit comments

Comments
 (0)