Apply suggestions from code review

alvarobartt · philschmid · alvarobartt · commit 7ce5aebf2849 · 2024-09-02T15:37:30.000+02:00
- Capture `container_uri` from environment variable before running
testand remove the default value to prevent issues when testing
- Remove `max_train_epochs=-1` as not required since `max_steps`
isalready specified
- Rename `test_transformers` to `test_huggingface_inference_toolkit`
- Remove `transformers` and `jinja2` dependencies as not required, as
well as `AutoTokenizer` usage for prompt formatting

Co-authored-by: Philipp Schmid &lt;philschmid@users.noreply.github.com&gt;
diff --git a/tests/pytorch/inference/test_huggingface_inference_toolkit.py b/tests/pytorch/inference/test_huggingface_inference_toolkit.py
@@ -48,24 +48,23 @@
         ),
     ],
 )
-def test_transformers(
+def test_huggingface_inference_toolkit(
     caplog: pytest.LogCaptureFixture,
     hf_model_id: str,
     hf_task: str,
     prediction_payload: dict,
 ) -> None:
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("INFERENCE_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "INFERENCE_DLC environment variable is not set"
+
     client = docker.from_env()
 
     logging.info(f"Starting container for {hf_model_id}...")
     container = client.containers.run(
-        os.getenv(
-            "INFERENCE_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cpu.2-2.transformers.4-44.ubuntu2204.py311"
-            if not CUDA_AVAILABLE
-            else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-inference-cu121.2-2.transformers.4-44.ubuntu2204.py311",
-        ),
+        container_uri,
         ports={"8080": 8080},
         environment={
             "HF_MODEL_ID": hf_model_id,
diff --git a/tests/pytorch/training/test_trl.py b/tests/pytorch/training/test_trl.py
@@ -19,14 +19,15 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("TRAINING_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "TRAINING_DLC environment variable is not set"
+
     client = docker.from_env()
 
     logging.info("Running the container for TRL...")
     container = client.containers.run(
-        os.getenv(
-            "TRAINING_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
-        ),
+        container_uri,
         command=[
             "trl",
             "sft",
@@ -38,7 +39,6 @@ def test_trl(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None:
             "--gradient_accumulation_steps=1",
             "--output_dir=/opt/huggingface/trained_model",
             "--logging_steps=1",
-            "--num_train_epochs=-1",
             "--max_steps=10",
             "--gradient_checkpointing",
         ],
@@ -81,14 +81,15 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
     """Adapted from https://github.com/huggingface/trl/blob/main/examples/scripts/sft.py"""
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("TRAINING_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "TRAINING_DLC environment variable is not set"
+
     client = docker.from_env()
 
     logging.info("Running the container for TRL...")
     container = client.containers.run(
-        os.getenv(
-            "TRAINING_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-pytorch-training-cu121.2-3.transformers.4-42.ubuntu2204.py310",
-        ),
+        container_uri,
         command=[
             "trl",
             "sft",
@@ -100,7 +101,6 @@ def test_trl_peft(caplog: pytest.LogCaptureFixture, tmp_path: PosixPath) -> None
             "--gradient_accumulation_steps=1",
             "--output_dir=/opt/huggingface/trained_model",
             "--logging_steps=1",
-            "--num_train_epochs=-1",
             "--max_steps=10",
             "--gradient_checkpointing",
             "--use_peft",
diff --git a/tests/requirements.txt b/tests/requirements.txt
@@ -1,6 +1,4 @@
 docker==7.1.0
 GPUtil==1.4.0
-jinja2==3.1.4
 pytest==8.3.2
 nvidia-ml-py==12.560.30
-transformers==4.44.2
diff --git a/tests/tei/test_tei.py b/tests/tei/test_tei.py
@@ -33,18 +33,17 @@ def test_text_embeddings_inference(
 ) -> None:
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("TEI_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "TEI_DLC environment variable is not set"
+
     client = docker.from_env()
 
     logging.info(
         f"Starting container for {text_embeddings_router_kwargs.get('MODEL_ID', None)}..."
     )
     container = client.containers.run(
-        os.getenv(
-            "TEI_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cpu.1-2"
-            if not CUDA_AVAILABLE
-            else "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-embeddings-inference-cu122.1-4.ubuntu2204",
-        ),
+        container_uri,
         # TODO: udpate once the TEI DLCs is updated, as the current is still on revision:
         # https://github.com/huggingface/Google-Cloud-Containers/blob/517b8728725f6249774dcd46ee8d7ede8d95bb70/containers/tei/cpu/1.2.2/Dockerfile
         # and it exposes the 80 port and uses the /data directory instead of /tmp
diff --git a/tests/tgi/test_tgi.py b/tests/tgi/test_tgi.py
@@ -9,7 +9,6 @@
 
 import pynvml
 from docker.types.containers import DeviceRequest
-from transformers import AutoTokenizer
 
 from ..constants import CUDA_AVAILABLE
 from ..utils import stream_logs
@@ -42,6 +41,10 @@ def test_text_generation_inference(
 ) -> None:
     caplog.set_level(logging.INFO)
 
+    container_uri = os.getenv("TGI_DLC", None)
+    if container_uri is None or container_uri == "":
+        assert False, "TGI_DLC environment variable is not set"
+
     client = docker.from_env()
 
     # If the GPU compute capability is lower than 8.0 (Ampere), then set `USE_FLASH_ATTENTION=false`
@@ -56,10 +59,7 @@ def test_text_generation_inference(
         f"Starting container for {text_generation_launcher_kwargs.get('MODEL_ID', None)}..."
     )
     container = client.containers.run(
-        os.getenv(
-            "TGI_DLC",
-            "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu121.2-2.ubuntu2204.py310",
-        ),
+        container_uri,
         ports={8080: 8080},
         environment=text_generation_launcher_kwargs,
         healthcheck={
@@ -113,22 +113,14 @@ def test_text_generation_inference(
 
     assert container_healthy
 
-    tokenizer = AutoTokenizer.from_pretrained(
-        text_generation_launcher_kwargs["MODEL_ID"]
-    )
-
     container_failed = False
     try:
         for prompt in ["What's Deep Learning?", "What's the capital of France?"]:
             logging.info(
                 f"Sending prediction request for {prompt=} to http://localhost:8080{predict_route}..."
             )
             payload = {
-                "inputs": tokenizer.apply_chat_template(
-                    [{"role": "user", "content": prompt}],
-                    tokenize=False,
-                    add_generation_prompt=True,
-                ),
+                "inputs": prompt,
                 "parameters": {
                     "max_new_tokens": 256,
                     "do_sample": True,