fix: rebase main and fix args and int64 lint errors

denyszhak · denyszhak · commit c77c706dd40c · 2025-12-15T01:02:46.000+01:00
diff --git a/pkg/inference/backends/sglang/sglang_config.go b/pkg/inference/backends/sglang/sglang_config.go
@@ -59,11 +59,6 @@ func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference
 		args = append(args, "--context-length", strconv.FormatUint(*contextLen, 10))
 	}
 
-	// Add arguments from backend config
-	if config != nil {
-		args = append(args, config.RuntimeFlags...)
-	}
-
 	return args, nil
 }
 
@@ -73,11 +68,12 @@ func (c *Config) GetArgs(bundle types.ModelBundle, socket string, mode inference
 func GetContextLength(modelCfg types.Config, backendCfg *inference.BackendConfiguration) *uint64 {
 	// Model config takes precedence
 	if modelCfg.ContextSize != nil {
-		return modelCfg.ContextSize
+		val := uint64(*modelCfg.ContextSize)
+		return &val
 	}
-	// else use backend config
-	if backendCfg != nil && backendCfg.ContextSize > 0 {
-		val := uint64(backendCfg.ContextSize)
+	// Fallback to backend config
+	if backendCfg != nil && backendCfg.ContextSize != nil && *backendCfg.ContextSize > 0 {
+		val := uint64(*backendCfg.ContextSize)
 		return &val
 	}
 	// Return nil to let SGLang auto-derive from model config
diff --git a/pkg/inference/backends/sglang/sglang_config_test.go b/pkg/inference/backends/sglang/sglang_config_test.go
@@ -80,7 +80,7 @@ func TestGetArgs(t *testing.T) {
 			},
 			mode: inference.BackendModeCompletion,
 			config: &inference.BackendConfiguration{
-				ContextSize: 8192,
+				ContextSize: int32ptr(8192),
 			},
 			expected: []string{
 				"-m",
@@ -95,39 +95,17 @@ func TestGetArgs(t *testing.T) {
 				"8192",
 			},
 		},
-		{
-			name: "with runtime flags",
-			bundle: &mockModelBundle{
-				safetensorsPath: "/path/to/model/model.safetensors",
-			},
-			mode: inference.BackendModeCompletion,
-			config: &inference.BackendConfiguration{
-				RuntimeFlags: []string{"--mem-fraction-static", "0.9"},
-			},
-			expected: []string{
-				"-m",
-				"sglang.launch_server",
-				"--model-path",
-				"/path/to/model",
-				"--host",
-				"127.0.0.1",
-				"--port",
-				"30000",
-				"--mem-fraction-static",
-				"0.9",
-			},
-		},
 		{
 			name: "with model context size (takes precedence)",
 			bundle: &mockModelBundle{
 				safetensorsPath: "/path/to/model/model.safetensors",
 				runtimeConfig: types.Config{
-					ContextSize: ptrUint64(16384),
+					ContextSize: int32ptr(16384),
 				},
 			},
 			mode: inference.BackendModeCompletion,
 			config: &inference.BackendConfiguration{
-				ContextSize: 8192,
+				ContextSize: int32ptr(8192),
 			},
 			expected: []string{
 				"-m",
@@ -179,32 +157,6 @@ func TestGetArgs(t *testing.T) {
 				"30000",
 			},
 		},
-		{
-			name: "combined config with context size and runtime flags",
-			bundle: &mockModelBundle{
-				safetensorsPath: "/path/to/model/model.safetensors",
-			},
-			mode: inference.BackendModeCompletion,
-			config: &inference.BackendConfiguration{
-				ContextSize:  4096,
-				RuntimeFlags: []string{"--tp-size", "2", "--enable-flashinfer"},
-			},
-			expected: []string{
-				"-m",
-				"sglang.launch_server",
-				"--model-path",
-				"/path/to/model",
-				"--host",
-				"127.0.0.1",
-				"--port",
-				"30000",
-				"--context-length",
-				"4096",
-				"--tp-size",
-				"2",
-				"--enable-flashinfer",
-			},
-		},
 	}
 
 	for _, tt := range tests {
@@ -253,33 +205,33 @@ func TestGetContextLength(t *testing.T) {
 			name:     "backend config only",
 			modelCfg: types.Config{},
 			backendCfg: &inference.BackendConfiguration{
-				ContextSize: 4096,
+				ContextSize: int32ptr(4096),
 			},
 			expectedValue: ptrUint64(4096),
 		},
 		{
 			name: "model config only",
 			modelCfg: types.Config{
-				ContextSize: ptrUint64(8192),
+				ContextSize: int32ptr(8192),
 			},
 			backendCfg:    nil,
 			expectedValue: ptrUint64(8192),
 		},
 		{
 			name: "model config takes precedence",
 			modelCfg: types.Config{
-				ContextSize: ptrUint64(16384),
+				ContextSize: int32ptr(16384),
 			},
 			backendCfg: &inference.BackendConfiguration{
-				ContextSize: 4096,
+				ContextSize: int32ptr(4096),
 			},
 			expectedValue: ptrUint64(16384),
 		},
 		{
 			name:     "zero context size in backend config returns nil",
 			modelCfg: types.Config{},
 			backendCfg: &inference.BackendConfiguration{
-				ContextSize: 0,
+				ContextSize: int32ptr(0),
 			},
 			expectedValue: nil,
 		},
@@ -300,3 +252,7 @@ func TestGetContextLength(t *testing.T) {
 func ptrUint64(v uint64) *uint64 {
 	return &v
 }
+
+func int32ptr(v int32) *int32 {
+	return &v
+}