|
8 | 8 | "fmt" |
9 | 9 | "io" |
10 | 10 | "net/http" |
| 11 | + "time" |
11 | 12 |
|
12 | 13 | "github.com/docker/model-distribution/distribution" |
13 | 14 | "github.com/docker/model-runner/pkg/inference" |
@@ -81,6 +82,7 @@ func (s *Scheduler) routeHandlers() map[string]http.HandlerFunc { |
81 | 82 | m[route] = s.handleOpenAIInference |
82 | 83 | } |
83 | 84 | m["GET "+inference.InferencePrefix+"/status"] = s.GetBackendStatus |
| 85 | + m["GET "+inference.InferencePrefix+"/ps"] = s.GetRunningBackends |
84 | 86 | return m |
85 | 87 | } |
86 | 88 |
|
@@ -224,6 +226,46 @@ func (s *Scheduler) ResetInstaller(httpClient *http.Client) { |
224 | 226 | s.installer = newInstaller(s.log, s.backends, httpClient) |
225 | 227 | } |
226 | 228 |
|
| 229 | +// GetRunningBackends returns information about all running backends |
| 230 | +func (s *Scheduler) GetRunningBackends(w http.ResponseWriter, r *http.Request) { |
| 231 | + runningBackends := s.getLoaderStatus() |
| 232 | + |
| 233 | + w.Header().Set("Content-Type", "application/json") |
| 234 | + if err := json.NewEncoder(w).Encode(runningBackends); err != nil { |
| 235 | + http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError) |
| 236 | + return |
| 237 | + } |
| 238 | +} |
| 239 | + |
| 240 | +// getLoaderStatus returns information about all running backends managed by the loader |
| 241 | +func (s *Scheduler) getLoaderStatus() []BackendStatus { |
| 242 | + if !s.loader.lock(context.Background()) { |
| 243 | + return []BackendStatus{} |
| 244 | + } |
| 245 | + defer s.loader.unlock() |
| 246 | + |
| 247 | + result := make([]BackendStatus, 0, len(s.loader.runners)) |
| 248 | + |
| 249 | + for key, slot := range s.loader.runners { |
| 250 | + if s.loader.slots[slot] != nil { |
| 251 | + status := BackendStatus{ |
| 252 | + BackendName: key.backend, |
| 253 | + ModelName: key.model, |
| 254 | + Mode: key.mode.String(), |
| 255 | + LastUsed: time.Time{}, |
| 256 | + } |
| 257 | + |
| 258 | + if s.loader.references[slot] == 0 { |
| 259 | + status.LastUsed = s.loader.timestamps[slot] |
| 260 | + } |
| 261 | + |
| 262 | + result = append(result, status) |
| 263 | + } |
| 264 | + } |
| 265 | + |
| 266 | + return result |
| 267 | +} |
| 268 | + |
227 | 269 | // ServeHTTP implements net/http.Handler.ServeHTTP. |
228 | 270 | func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { |
229 | 271 | s.router.ServeHTTP(w, r) |
|
0 commit comments