Skip to content

Commit f12b17b

Browse files
authored
Merge pull request #503 from doringeman/fix-configure-gpu-memory-utilization
fix(scheduler): handle GPUMemoryUtilization
2 parents 0f534e9 + 0aeb07c commit f12b17b

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

pkg/inference/scheduling/scheduler.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ func (s *Scheduler) ConfigureRunner(ctx context.Context, backend inference.Backe
241241
}
242242
}
243243
runnerConfig.VLLM = &inference.VLLMConfig{
244-
HFOverrides: req.VLLM.HFOverrides,
244+
HFOverrides: req.VLLM.HFOverrides,
245+
GPUMemoryUtilization: req.VLLM.GPUMemoryUtilization,
245246
}
246247
}
247248

0 commit comments

Comments
 (0)