Skip to content

Commit 5a27d33

Browse files
committed
feature: add sglang backend
1 parent a791616 commit 5a27d33

File tree

12 files changed

+677
-5
lines changed

12 files changed

+677
-5
lines changed

main.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/docker/model-runner/pkg/inference"
1616
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
1717
"github.com/docker/model-runner/pkg/inference/backends/mlx"
18+
"github.com/docker/model-runner/pkg/inference/backends/sglang"
1819
"github.com/docker/model-runner/pkg/inference/backends/vllm"
1920
"github.com/docker/model-runner/pkg/inference/config"
2021
"github.com/docker/model-runner/pkg/inference/memory"
@@ -144,12 +145,23 @@ func main() {
144145
log.Fatalf("unable to initialize %s backend: %v", mlx.Name, err)
145146
}
146147

148+
sglangBackend, err := sglang.New(
149+
log,
150+
modelManager,
151+
log.WithFields(logrus.Fields{"component": sglang.Name}),
152+
nil,
153+
)
154+
if err != nil {
155+
log.Fatalf("unable to initialize %s backend: %v", sglang.Name, err)
156+
}
157+
147158
scheduler := scheduling.NewScheduler(
148159
log,
149160
map[string]inference.Backend{
150161
llamacpp.Name: llamaCppBackend,
151162
vllm.Name: vllmBackend,
152163
mlx.Name: mlxBackend,
164+
sglang.Name: sglangBackend,
153165
},
154166
llamaCppBackend,
155167
modelManager,

pkg/inference/backend.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ type Backend interface {
7272
// external model management system and false if the backend uses the shared
7373
// model manager.
7474
UsesExternalModelManagement() bool
75+
// UsesTCP returns true if the backend uses TCP for communication instead
76+
// of Unix sockets. When true, the scheduler will create a TCP transport
77+
// and pass a "host:port" address to Run instead of a Unix socket path.
78+
UsesTCP() bool
7579
// Install ensures that the backend is installed. It should return a nil
7680
// error if installation succeeds or if the backend is already installed.
7781
// The provided HTTP client should be used for any HTTP operations.

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ func (l *llamaCpp) UsesExternalModelManagement() bool {
8989
return false
9090
}
9191

92+
// UsesTCP implements inference.Backend.UsesTCP.
93+
func (l *llamaCpp) UsesTCP() bool {
94+
return false
95+
}
96+
9297
// Install implements inference.Backend.Install.
9398
func (l *llamaCpp) Install(ctx context.Context, httpClient *http.Client) error {
9499
l.updatedLlamaCpp = false

pkg/inference/backends/mlx/mlx.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ func (m *mlx) UsesExternalModelManagement() bool {
6565
return false
6666
}
6767

68+
// UsesTCP implements inference.Backend.UsesTCP.
69+
func (m *mlx) UsesTCP() bool {
70+
return false
71+
}
72+
6873
// Install implements inference.Backend.Install.
6974
func (m *mlx) Install(ctx context.Context, httpClient *http.Client) error {
7075
if !platform.SupportsMLX() {
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
package sglang
2+
3+
import (
4+
"context"
5+
"errors"
6+
"fmt"
7+
"io/fs"
8+
"net/http"
9+
"os"
10+
"os/exec"
11+
"path/filepath"
12+
"strings"
13+
14+
"github.com/docker/model-runner/pkg/diskusage"
15+
"github.com/docker/model-runner/pkg/inference"
16+
"github.com/docker/model-runner/pkg/inference/backends"
17+
"github.com/docker/model-runner/pkg/inference/models"
18+
"github.com/docker/model-runner/pkg/inference/platform"
19+
"github.com/docker/model-runner/pkg/logging"
20+
)
21+
22+
const (
23+
// Name is the backend name.
24+
Name = "sglang"
25+
sglangDir = "/opt/sglang-env/bin"
26+
sglangVersionFile = "/opt/sglang-env/version"
27+
)
28+
29+
var (
30+
ErrNotImplemented = errors.New("not implemented")
31+
ErrSGLangNotFound = errors.New("sglang package not installed")
32+
ErrPythonNotFound = errors.New("python3 not found in PATH")
33+
)
34+
35+
// sglang is the SGLang-based backend implementation.
36+
type sglang struct {
37+
// log is the associated logger.
38+
log logging.Logger
39+
// modelManager is the shared model manager.
40+
modelManager *models.Manager
41+
// serverLog is the logger to use for the SGLang server process.
42+
serverLog logging.Logger
43+
// config is the configuration for the SGLang backend.
44+
config *Config
45+
// status is the state in which the SGLang backend is in.
46+
status string
47+
// pythonPath is the path to the python3 binary.
48+
pythonPath string
49+
}
50+
51+
// New creates a new SGLang-based backend.
52+
func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, conf *Config) (inference.Backend, error) {
53+
// If no config is provided, use the default configuration
54+
if conf == nil {
55+
conf = NewDefaultSGLangConfig()
56+
}
57+
58+
return &sglang{
59+
log: log,
60+
modelManager: modelManager,
61+
serverLog: serverLog,
62+
config: conf,
63+
status: "not installed",
64+
}, nil
65+
}
66+
67+
// Name implements inference.Backend.Name.
68+
func (s *sglang) Name() string {
69+
return Name
70+
}
71+
72+
func (s *sglang) UsesExternalModelManagement() bool {
73+
return false
74+
}
75+
76+
// UsesTCP implements inference.Backend.UsesTCP.
77+
// SGLang only supports TCP, not Unix sockets.
78+
func (s *sglang) UsesTCP() bool {
79+
return true
80+
}
81+
82+
func (s *sglang) Install(_ context.Context, _ *http.Client) error {
83+
if !platform.SupportsSGLang() {
84+
return ErrNotImplemented
85+
}
86+
87+
if err := s.initFromDocker(); err == nil {
88+
return nil
89+
} else if !errors.Is(err, fs.ErrNotExist) {
90+
return fmt.Errorf("failed to check SGLang binary: %w", err)
91+
}
92+
93+
return s.initFromHost()
94+
}
95+
96+
97+
func (s *sglang) initFromDocker() error {
98+
sglangBinaryPath := s.binaryPath()
99+
100+
if _, err := os.Stat(sglangBinaryPath); err != nil {
101+
return err
102+
}
103+
104+
versionBytes, err := os.ReadFile(sglangVersionFile)
105+
if err != nil {
106+
s.log.Warnf("could not get sglang version: %v", err)
107+
s.status = "running sglang version: unknown"
108+
return nil
109+
}
110+
111+
s.status = fmt.Sprintf(
112+
"running sglang version: %s",
113+
strings.TrimSpace(string(versionBytes)),
114+
)
115+
116+
return nil
117+
}
118+
119+
120+
func (s *sglang) initFromHost() error {
121+
pythonPath, err := exec.LookPath("python3")
122+
if err != nil {
123+
s.status = ErrPythonNotFound.Error()
124+
return ErrPythonNotFound
125+
}
126+
127+
s.pythonPath = pythonPath
128+
129+
if err := exec.Command(pythonPath, "-c", "import sglang").Run(); err != nil {
130+
s.status = "sglang package not installed"
131+
s.log.Warnf("sglang package not found. Install with: uv pip install sglang[all]")
132+
return ErrSGLangNotFound
133+
}
134+
135+
output, err := exec.Command(pythonPath, "-c", "import sglang; print(sglang.__version__)").Output()
136+
if err != nil {
137+
s.log.Warnf("could not get sglang version: %v", err)
138+
s.status = "running sglang version: unknown"
139+
return nil
140+
}
141+
142+
s.status = fmt.Sprintf("running sglang version: %s", strings.TrimSpace(string(output)))
143+
144+
return nil
145+
}
146+
147+
func (s *sglang) Run(ctx context.Context, socket, model string, modelRef string, mode inference.BackendMode, backendConfig *inference.BackendConfiguration) error {
148+
if !platform.SupportsSGLang() {
149+
s.log.Warn("sglang backend is not yet supported")
150+
return ErrNotImplemented
151+
}
152+
153+
bundle, err := s.modelManager.GetBundle(model)
154+
if err != nil {
155+
return fmt.Errorf("failed to get model: %w", err)
156+
}
157+
158+
args, err := s.config.GetArgs(bundle, socket, mode, backendConfig)
159+
if err != nil {
160+
return fmt.Errorf("failed to get SGLang arguments: %w", err)
161+
}
162+
163+
// Add served model name and weight version
164+
if model != "" {
165+
args = append(args, "--served-model-name", model)
166+
}
167+
if modelRef != "" {
168+
args = append(args, "--weight-version", modelRef)
169+
}
170+
171+
// Determine binary path - use Docker installation if available, otherwise use Python
172+
binaryPath := s.binaryPath()
173+
sandboxPath := sglangDir
174+
if _, err := os.Stat(binaryPath); errors.Is(err, fs.ErrNotExist) {
175+
// Use Python installation
176+
if s.pythonPath == "" {
177+
return fmt.Errorf("sglang: no docker binary at %s and no python runtime configured; did you forget to call Install?", binaryPath)
178+
}
179+
binaryPath = s.pythonPath
180+
sandboxPath = ""
181+
}
182+
183+
return backends.RunBackend(ctx, backends.RunnerConfig{
184+
BackendName: "SGLang",
185+
Socket: socket,
186+
BinaryPath: binaryPath,
187+
SandboxPath: sandboxPath,
188+
SandboxConfig: "",
189+
Args: args,
190+
Logger: s.log,
191+
ServerLogWriter: s.serverLog.Writer(),
192+
})
193+
}
194+
195+
func (s *sglang) Status() string {
196+
return s.status
197+
}
198+
199+
func (s *sglang) GetDiskUsage() (int64, error) {
200+
// Check if Docker installation exists
201+
if _, err := os.Stat(sglangDir); err == nil {
202+
size, err := diskusage.Size(sglangDir)
203+
if err != nil {
204+
return 0, fmt.Errorf("error while getting sglang dir size: %w", err)
205+
}
206+
return size, nil
207+
}
208+
// Python installation doesn't have a dedicated installation directory
209+
// It's installed via pip in the system Python environment
210+
return 0, nil
211+
}
212+
213+
func (s *sglang) GetRequiredMemoryForModel(_ context.Context, _ string, _ *inference.BackendConfiguration) (inference.RequiredMemory, error) {
214+
if !platform.SupportsSGLang() {
215+
return inference.RequiredMemory{}, ErrNotImplemented
216+
}
217+
218+
return inference.RequiredMemory{
219+
RAM: 1,
220+
VRAM: 1,
221+
}, nil
222+
}
223+
224+
func (s *sglang) binaryPath() string {
225+
return filepath.Join(sglangDir, "sglang")
226+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package sglang
2+
3+
import (
4+
"fmt"
5+
"net"
6+
"path/filepath"
7+
"strconv"
8+
9+
"github.com/docker/model-runner/pkg/distribution/types"
10+
"github.com/docker/model-runner/pkg/inference"
11+
)
12+
13+
// Config is the configuration for the SGLang backend.
14+
type Config struct {
15+
// Args are the base arguments that are always included.
16+
Args []string
17+
}
18+
19+
// NewDefaultSGLangConfig creates a new SGLangConfig with default values.
20+
func NewDefaultSGLangConfig() *Config {
21+
return &Config{}
22+
}
23+
24+
// GetArgs implements BackendConfig.GetArgs.
25+
func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) {
26+
// Start with the arguments from SGLangConfig
27+
args := append([]string{}, c.Args...)
28+
29+
// SGLang uses Python module: python -m sglang.launch_server
30+
args = append(args, "-m", "sglang.launch_server")
31+
32+
// Add model path
33+
safetensorsPath := bundle.SafetensorsPath()
34+
if safetensorsPath == "" {
35+
return nil, fmt.Errorf("safetensors path required by SGLang backend")
36+
}
37+
modelPath := filepath.Dir(safetensorsPath)
38+
args = append(args, "--model-path", modelPath)
39+
40+
host, port, err := net.SplitHostPort(socket)
41+
if err != nil {
42+
return nil, fmt.Errorf("failed to parse host:port from %q: %w", socket, err)
43+
}
44+
args = append(args, "--host", host, "--port", port)
45+
46+
// Add mode-specific arguments
47+
switch mode {
48+
case inference.BackendModeCompletion:
49+
// Default mode for SGLang
50+
case inference.BackendModeEmbedding:
51+
args = append(args, "--is-embedding")
52+
case inference.BackendModeReranking:
53+
default:
54+
return nil, fmt.Errorf("unsupported backend mode %q", mode)
55+
}
56+
57+
// Add context-length if specified in model config or backend config
58+
if contextLen := GetContextLength(bundle.RuntimeConfig(), config); contextLen != nil {
59+
args = append(args, "--context-length", strconv.FormatUint(*contextLen, 10))
60+
}
61+
62+
// Add arguments from backend config
63+
if config != nil {
64+
args = append(args, config.RuntimeFlags...)
65+
}
66+
67+
return args, nil
68+
}
69+
70+
// GetContextLength returns the context length (context size) from model config or backend config.
71+
// Model config takes precedence over backend config.
72+
// Returns nil if neither is specified (SGLang will auto-derive from model).
73+
func GetContextLength(modelCfg types.Config, backendCfg *inference.BackendConfiguration) *uint64 {
74+
// Model config takes precedence
75+
if modelCfg.ContextSize != nil {
76+
return modelCfg.ContextSize
77+
}
78+
// else use backend config
79+
if backendCfg != nil && backendCfg.ContextSize > 0 {
80+
val := uint64(backendCfg.ContextSize)
81+
return &val
82+
}
83+
// Return nil to let SGLang auto-derive from model config
84+
return nil
85+
}

0 commit comments

Comments
 (0)