From 4058b4f71515f991be46e46457c99b623ba82d4e Mon Sep 17 00:00:00 2001
From: Evan Smothers <ebs@fb.com>
Date: Tue, 14 Oct 2025 15:03:02 -0700
Subject: [PATCH 1/2] mark vLLM policy integration tests as async

---
 tests/integration_tests/test_vllm_policy_correctness.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/integration_tests/test_vllm_policy_correctness.py b/tests/integration_tests/test_vllm_policy_correctness.py
index b512591ba..c6b505eb8 100644
--- a/tests/integration_tests/test_vllm_policy_correctness.py
+++ b/tests/integration_tests/test_vllm_policy_correctness.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import asyncio
+import pytest
 
 from forge.actors.policy import Policy
 from vllm import SamplingParams
@@ -28,6 +28,7 @@
 N_SAMPLES = 1
 
 
+@pytest.mark.asyncio
 async def test_same_output():
     """Compare outputs between vLLM and Policy service"""
     test_prompts = [
@@ -105,6 +106,7 @@ async def test_same_output():
             await policy.shutdown()
 
 
+@pytest.mark.asyncio
 async def test_cache_usage():
     """Test that KV cache usage is consistent between vLLM and Policy service.
 
@@ -240,8 +242,3 @@ async def test_cache_usage():
     finally:
         if policy is not None:
             await policy.shutdown()
-
-
-if __name__ == "__main__":
-    asyncio.run(test_same_output())
-    asyncio.run(test_cache_usage())

From a504c46cb0bbc02021b8748212c1309f4468d083 Mon Sep 17 00:00:00 2001
From: Evan Smothers <ebs@fb.com>
Date: Tue, 14 Oct 2025 20:25:19 -0700
Subject: [PATCH 2/2] comments

---
 tests/integration_tests/test_vllm_policy_correctness.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/integration_tests/test_vllm_policy_correctness.py b/tests/integration_tests/test_vllm_policy_correctness.py
index c6b505eb8..e2da9b068 100644
--- a/tests/integration_tests/test_vllm_policy_correctness.py
+++ b/tests/integration_tests/test_vllm_policy_correctness.py
@@ -97,9 +97,7 @@ async def test_same_output():
         for vllm_output, policy_output in zip(vllm_outputs, policy_outputs):
             assert vllm_output != ""
             assert policy_output != ""
-            if vllm_output != policy_output:
-                print(f"❌ Got different results: {vllm_output} vs. {policy_output}")
-        print("✅ Outputs are the same!")
+            assert vllm_output == policy_output
 
     finally:
         if policy is not None:
@@ -234,10 +232,7 @@ async def test_cache_usage():
         for vllm_output, policy_output in zip(vllm_outputs, policy_outputs):
             assert vllm_output != ""
             assert policy_output != ""
-            if vllm_output != policy_output:
-                print(f"❌ Got different results: {vllm_output} vs. {policy_output}")
-
-        print("\n✅ Prefix cache usage is the same!")
+            assert vllm_output == policy_output
 
     finally:
         if policy is not None: