Remove runtime=nvidia and enable interactive mode (docker run -it ...)

alvarobartt · alvarobartt · commit fa3b17807dc8 · 2024-09-02T09:36:28.000+02:00
diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -58,13 +58,6 @@ def test_transformers(
 
     client = docker.from_env()
 
-    cuda_kwargs = {}
-    if CUDA_AVAILABLE:
-        cuda_kwargs = {
-            "runtime": "nvidia",
-            "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])],
-        }
-
     logging.info(f"Starting container for {hf_model_id}...")
     container = client.containers.run(
         os.getenv(
@@ -91,8 +84,13 @@ def test_transformers(
         },
         platform="linux/amd64",
         detach=True,
-        # Extra kwargs related to the CUDA devices
-        **cuda_kwargs,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
+        # Extra `device_requests` related to the CUDA devices if any
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
+        if CUDA_AVAILABLE
+        else None,
     )
 
     # Start log streaming in a separate thread
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
@@ -57,15 +57,17 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
         },
         platform="linux/amd64",
         detach=True,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
             f"{tmp_path}/": {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
             }
         },
-        # Extra kwargs related to the CUDA devices
-        runtime="nvidia",
+        # Extra `device_requests` related to the CUDA devices
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
@@ -131,15 +133,17 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
         },
         platform="linux/amd64",
         detach=True,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
         # Mount the volume from the `tmp_path` to the `/opt/huggingface/trained_model`
         volumes={
             f"{tmp_path}/": {
                 "bind": "/opt/huggingface/trained_model",
                 "mode": "rw",
             }
         },
-        # Extra kwargs related to the CUDA devices
-        runtime="nvidia",
+        # Extra `device_requests` related to the CUDA devices
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
 
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
@@ -35,13 +35,6 @@ def test_text_embeddings_inference(
 
     client = docker.from_env()
 
-    cuda_kwargs = {}
-    if CUDA_AVAILABLE:
-        cuda_kwargs = {
-            "runtime": "nvidia",
-            "device_requests": [DeviceRequest(count=-1, capabilities=[["gpu"]])],
-        }
-
     logging.info(
         f"Starting container for {text_embeddings_router_kwargs.get('MODEL_ID', None)}..."
     )
@@ -66,8 +59,13 @@ def test_text_embeddings_inference(
         },
         platform="linux/amd64",
         detach=True,
-        # Extra kwargs related to the CUDA devices
-        **cuda_kwargs,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
+        # Extra `device_requests` related to the CUDA devices if any
+        device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])]
+        if CUDA_AVAILABLE
+        else None,
     )
     logging.info(f"Container {container.id} started...")  # type: ignore
 
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
@@ -53,17 +53,19 @@ def test_text_generation_inference(
         ),
         ports={8080: 8080},
         environment=text_generation_launcher_kwargs,
-        # healthcheck={
-        #     "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
-        #     "interval": int(30 * 1e9),
-        #     "timeout": int(30 * 1e9),
-        #     "retries": 3,
-        #     "start_period": int(30 * 1e9),
-        # },
-        # platform="linux/amd64",
+        healthcheck={
+            "test": ["CMD", "curl", "-s", "http://localhost:8080/health"],
+            "interval": int(30 * 1e9),
+            "timeout": int(30 * 1e9),
+            "retries": 3,
+            "start_period": int(30 * 1e9),
+        },
+        platform="linux/amd64",
         detach=True,
+        # Enable interactive mode
+        tty=True,
+        stdin_open=True,
         # Extra kwargs related to the CUDA devices
-        runtime="nvidia",
         device_requests=[DeviceRequest(count=-1, capabilities=[["gpu"]])],
     )
     logging.info(f"Container {container.id} started...")  # type: ignore