Skip to content

Commit c240cc1

Browse files
committed
[llm-d] testing: test_llmd: ensure that there's no llmisvc in the namespace before testing
1 parent d22d16d commit c240cc1

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed

projects/llm-d/testing/test_llmd.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ def test():
3636
test_directory = env.ARTIFACT_DIR
3737
prom_start_ts = None
3838
try:
39+
# Clean up any existing LLM inference services and pods before testing
40+
cleanup_llm_inference_resources()
41+
3942
# Reset Prometheus before testing
4043
logging.info("Resetting Prometheus database before testing")
4144
prom_start_ts = prom.reset_prometheus()
@@ -283,6 +286,45 @@ def capture_llm_inference_service_state():
283286
logging.error(f"Failed to capture LLM inference service state: {e}")
284287

285288

289+
def cleanup_llm_inference_resources():
290+
"""
291+
Clean up all llminferenceservice resources in the namespace before testing.
292+
Fails the test if cleanup is not successful.
293+
"""
294+
295+
namespace = config.project.get_config("tests.llmd.namespace")
296+
logging.info(f"Cleaning up all llminferenceservice resources in namespace {namespace}")
297+
298+
# Delete all llminferenceservice resources in the namespace
299+
logging.info("Deleting all llminferenceservice resources")
300+
run.run(f"oc delete llminferenceservice --all -n {namespace} --wait=true --timeout=180s")
301+
302+
# Verify no llminferenceservice resources remain
303+
logging.info("Verifying no llminferenceservice resources remain")
304+
for i in range(6): # Check up to 6 times with 10 second intervals
305+
result = run.run(f"oc get llminferenceservice -n {namespace} --no-headers",
306+
capture_stdout=True)
307+
308+
if not result.stdout.strip():
309+
logging.info("No llminferenceservice resources found - cleanup successful")
310+
break
311+
else:
312+
remaining_services = result.stdout.strip().split('\n')
313+
remaining_count = len([s for s in remaining_services if s.strip()])
314+
logging.info(f"Still found {remaining_count} llminferenceservice resources, waiting...")
315+
316+
if i == 5: # Last iteration
317+
logging.error(f"Failed to clean up llminferenceservice resources after 60 seconds. {remaining_count} resources still exist:")
318+
for service in remaining_services:
319+
if service.strip():
320+
logging.error(f" - {service}")
321+
raise RuntimeError(f"Cannot proceed with test - {remaining_count} llminferenceservice resources still exist in namespace {namespace}")
322+
323+
time.sleep(10)
324+
325+
logging.info("LLM inference service cleanup completed successfully")
326+
327+
286328
def ensure_gpu_nodes_available():
287329
"""
288330
Ensures that there are GPU nodes available in the cluster.

0 commit comments

Comments
 (0)