Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/docker/model-runner/pkg/metrics"
"github.com/docker/model-runner/pkg/middleware"
"github.com/docker/model-runner/pkg/ollama"
"github.com/docker/model-runner/pkg/responses"
"github.com/docker/model-runner/pkg/routing"
"github.com/sirupsen/logrus"
)
Expand Down Expand Up @@ -165,6 +166,16 @@ func main() {
router.Handle(inference.ModelsPrefix, modelHandler)
router.Handle(inference.ModelsPrefix+"/", modelHandler)
router.Handle(inference.InferencePrefix+"/", schedulerHTTP)
// Add OpenAI Responses API compatibility layer
responsesHandler := responses.NewHTTPHandler(log, schedulerHTTP, nil)
router.Handle(responses.APIPrefix+"/", responsesHandler)
router.Handle(responses.APIPrefix, responsesHandler) // Also register for exact match without trailing slash
router.Handle("/v1"+responses.APIPrefix+"/", responsesHandler)
router.Handle("/v1"+responses.APIPrefix, responsesHandler)
// Also register Responses API under inference prefix to support all inference engines
router.Handle(inference.InferencePrefix+responses.APIPrefix+"/", responsesHandler)
router.Handle(inference.InferencePrefix+responses.APIPrefix, responsesHandler)

// Add path aliases: /v1 -> /engines/v1, /rerank -> /engines/rerank, /score -> /engines/score.
aliasHandler := &middleware.AliasHandler{Handler: schedulerHTTP}
router.Handle("/v1/", aliasHandler)
Expand Down
31 changes: 27 additions & 4 deletions pkg/inference/models/adapter.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package models

import (
"encoding/json"
"fmt"

"github.com/docker/model-runner/pkg/distribution/types"
Expand All @@ -27,12 +28,23 @@ func ToModel(m types.Model) (*Model, error) {
created = desc.Created.Unix()
}

return &Model{
model := &Model{
ID: id,
Tags: m.Tags(),
Created: created,
Config: cfg,
}, nil
}

// Marshal the config to populate RawConfig
if cfg != nil {
configData, err := json.Marshal(cfg)
if err != nil {
return nil, fmt.Errorf("marshal config: %w", err)
}
model.RawConfig = configData
}

return model, nil
}

// ToModelFromArtifact converts a types.ModelArtifact (typically from remote registry)
Expand All @@ -58,10 +70,21 @@ func ToModelFromArtifact(artifact types.ModelArtifact) (*Model, error) {
created = desc.Created.Unix()
}

return &Model{
model := &Model{
ID: id,
Tags: nil, // Remote models don't have local tags
Created: created,
Config: cfg,
}, nil
}

// Marshal the config to populate RawConfig
if cfg != nil {
configData, err := json.Marshal(cfg)
if err != nil {
return nil, fmt.Errorf("marshal config: %w", err)
}
model.RawConfig = configData
}

return model, nil
}
30 changes: 29 additions & 1 deletion pkg/inference/models/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,35 @@ type Model struct {
Created int64 `json:"created"`
// Config describes the model. Can be either Docker format (*types.Config)
// or ModelPack format (*modelpack.Model).
Config types.ModelConfig `json:"config"`
Config types.ModelConfig `json:"-"`
// RawConfig is used for JSON marshaling/unmarshaling
RawConfig json.RawMessage `json:"config"`
}

// MarshalJSON implements custom marshaling for Model
func (m Model) MarshalJSON() ([]byte, error) {
// Define a temporary struct to avoid recursion
type Alias Model
aux := struct {
*Alias
RawConfig json.RawMessage `json:"config"`
}{
Alias: (*Alias)(&m),
}

// Marshal the config separately
if m.Config != nil {
configData, err := json.Marshal(m.Config)
if err != nil {
return nil, err
}
aux.RawConfig = configData
} else {
// If Config is nil, use the RawConfig if available
aux.RawConfig = m.RawConfig
}

return json.Marshal(aux)
}

// UnmarshalJSON implements custom JSON unmarshaling for Model.
Expand Down
Loading