@@ -660,22 +660,20 @@ func (srv *ILabServer) runVllmContainerHandler(
660660 srv .log .Infof ("No existing job found for model '%s'. Starting a new job." , servedModelName )
661661
662662 cmdArgs := []string {
663- "run" , "--rm" ,
664- fmt .Sprintf ("--device=nvidia.com/gpu=%d" , gpuIndex ),
665- fmt .Sprintf ("-e=NVIDIA_VISIBLE_DEVICES=%d" , gpuIndex ),
666- "-v" , "/usr/local/cuda-12.4/lib64:/usr/local/cuda-12.4/lib64" ,
663+ "run" , "--rm" , "-it" ,
664+ "--device" , fmt .Sprintf ("nvidia.com/gpu=%d" , gpuIndex ),
665+ "--security-opt" , "label=disable" ,
666+ "--net" , "host" ,
667+ "--shm-size" , "10G" ,
668+ "--pids-limit" , "-1" ,
667669 "-v" , fmt .Sprintf ("%s:%s" , hostVolume , containerVolume ),
668- "-p" , fmt .Sprintf ("%s:%s" , port , port ),
669- "--ipc=host" ,
670- "vllm/vllm-openai:latest" ,
671- "--host" , "0.0.0.0" ,
672- "--port" , port ,
673- "--model" , modelPath ,
674- "--load-format" , "safetensors" ,
675- "--config-format" , "hf" ,
676- "--trust-remote-code" ,
677- "--device" , "cuda" ,
670+ "--entrypoint" , "/opt/app-root/bin/vllm" ,
671+ "registry.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.4-1738905416" ,
672+ "serve" , modelPath ,
678673 "--served-model-name" , servedModelName ,
674+ "--load-format" , "safetensors" ,
675+ "--host" , "127.0.0.1" ,
676+ "--port" , port ,
679677 }
680678
681679 // Log the command for debugging
@@ -685,7 +683,7 @@ func (srv *ILabServer) runVllmContainerHandler(
685683 // Create a unique job ID and a log file
686684 jobID := fmt .Sprintf ("v-%d" , time .Now ().UnixNano ())
687685 logFilePath := filepath .Join ("logs" , fmt .Sprintf ("%s.log" , jobID ))
688- srv .log .Infof ("Starting vllm-openai container with job_id: %s, logs: %s" , jobID , logFilePath )
686+ srv .log .Infof ("Starting vllm container with job_id: %s, logs: %s" , jobID , logFilePath )
689687
690688 cmd := exec .Command ("podman" , cmdArgs ... )
691689
0 commit comments