Minor fixes for lmi release (#2992)

smouaa · web-flow · commit 17cfb9e6f91b · 2026-01-26T09:35:50.000-08:00
diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py
@@ -1581,14 +1581,6 @@ def response_checker(res, message):
                 if len(item) > 0:
                     if item.startswith('data: '):
                         item = item[6:]  # Remove "data: " prefix
-
-                    # Skip [DONE] markers
-                    if item == '[DONE]':
-                        continue
-
-                    # Skip empty items after stripping
-                    if not item:
-                        continue
                     try:
                         json_lines.append(json.loads(item))
                     except json.JSONDecodeError as e:
@@ -1731,14 +1723,6 @@ def check_output_formatter_applied(response_text, expected_identifier):
         if line.startswith('data: '):
             line = line[6:]  # Remove "data: " prefix
 
-        # Skip [DONE] markers
-        if line == '[DONE]':
-            continue
-
-        # Skip empty lines after stripping
-        if not line:
-            continue
-
         try:
             parsed_json = json.loads(line)
             # Check for text completion format
diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py
@@ -529,10 +529,11 @@
     },
     "qwen3-8b-no-cache": {
         "option.model_id": "Qwen/Qwen3-8B",
-        "option.tensor_parallel_degree": 2,
+        "option.tensor_parallel_degree": 1,
         "option.load_format": "dummy",
         "option.max_new_tokens": 100,
         "option.enable_prefix_caching": False,
+        "option.max_model_len": 9000,
         "load_on_devices": 0,
     },
     "qwen3-8b-vllm-prefix-cache": {
@@ -637,7 +638,8 @@
         "option.model_id":
         "s3://djl-llm/llama-3-8b-instruct-hf/",
         "option.tensor_parallel_degree":
-        4,
+        1,
+        "option.max_model_len": 8192,
         "option.lmcache_config_file":
         "lmcache_local_storage.yaml",
         "option.kv_transfer_config":
diff --git a/tests/integration/tests.py b/tests/integration/tests.py
@@ -714,13 +714,14 @@ def test_lmcache_auto_config(self):
             ])
             client.run("vllm_lmcache qwen3-8b-lmcache-auto".split())
 
-    def test_lmcache_auto_config_larger_model(self):
-        with Runner("lmi", "qwen2.5-32b") as r:
-            prepare.build_vllm_async_model("qwen2.5-32b")
-            r.launch(env_vars=[
-                "PYTHONHASHSEED=0", "OPTION_LMCACHE_AUTO_CONFIG=True"
-            ])
-            client.run("vllm_lmcache qwen2.5-32b-lmcache-auto".split())
+    # Commented out until LMCache disk backend fix is upstreamed
+    # def test_lmcache_auto_config_larger_model(self):
+    #     with Runner("lmi", "qwen2.5-32b") as r:
+    #         prepare.build_vllm_async_model("qwen2.5-32b")
+    #         r.launch(env_vars=[
+    #             "PYTHONHASHSEED=0", "OPTION_LMCACHE_AUTO_CONFIG=True"
+    #         ])
+    #         client.run("vllm_lmcache qwen2.5-32b-lmcache-auto".split())
 
 
 @pytest.mark.vllm