Skip to content

Commit e5e7863

Browse files
authored
Merge pull request #684 from nerdalert/vllm-image
Replace the image vllm/vllm-openai with instructlab-nvidia
2 parents ffa473a + bea60da commit e5e7863

File tree

1 file changed

+13
-15
lines changed

1 file changed

+13
-15
lines changed

api-server/handlers.go

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -660,22 +660,20 @@ func (srv *ILabServer) runVllmContainerHandler(
660660
srv.log.Infof("No existing job found for model '%s'. Starting a new job.", servedModelName)
661661

662662
cmdArgs := []string{
663-
"run", "--rm",
664-
fmt.Sprintf("--device=nvidia.com/gpu=%d", gpuIndex),
665-
fmt.Sprintf("-e=NVIDIA_VISIBLE_DEVICES=%d", gpuIndex),
666-
"-v", "/usr/local/cuda-12.4/lib64:/usr/local/cuda-12.4/lib64",
663+
"run", "--rm", "-it",
664+
"--device", fmt.Sprintf("nvidia.com/gpu=%d", gpuIndex),
665+
"--security-opt", "label=disable",
666+
"--net", "host",
667+
"--shm-size", "10G",
668+
"--pids-limit", "-1",
667669
"-v", fmt.Sprintf("%s:%s", hostVolume, containerVolume),
668-
"-p", fmt.Sprintf("%s:%s", port, port),
669-
"--ipc=host",
670-
"vllm/vllm-openai:latest",
671-
"--host", "0.0.0.0",
672-
"--port", port,
673-
"--model", modelPath,
674-
"--load-format", "safetensors",
675-
"--config-format", "hf",
676-
"--trust-remote-code",
677-
"--device", "cuda",
670+
"--entrypoint", "/opt/app-root/bin/vllm",
671+
"registry.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.4-1738905416",
672+
"serve", modelPath,
678673
"--served-model-name", servedModelName,
674+
"--load-format", "safetensors",
675+
"--host", "127.0.0.1",
676+
"--port", port,
679677
}
680678

681679
// Log the command for debugging
@@ -685,7 +683,7 @@ func (srv *ILabServer) runVllmContainerHandler(
685683
// Create a unique job ID and a log file
686684
jobID := fmt.Sprintf("v-%d", time.Now().UnixNano())
687685
logFilePath := filepath.Join("logs", fmt.Sprintf("%s.log", jobID))
688-
srv.log.Infof("Starting vllm-openai container with job_id: %s, logs: %s", jobID, logFilePath)
686+
srv.log.Infof("Starting vllm container with job_id: %s, logs: %s", jobID, logFilePath)
689687

690688
cmd := exec.Command("podman", cmdArgs...)
691689

0 commit comments

Comments
 (0)