Skip to content

Fix the mismatch between vLLM OpenAI API and vLLM generate #1156

Fix the mismatch between vLLM OpenAI API and vLLM generate

Fix the mismatch between vLLM OpenAI API and vLLM generate #1156

Triggered via issue December 8, 2025 09:20
@pan-x-cpan-x-c
commented on #431 613194d
Status Failure
Total duration 26m 31s
Artifacts 1

unittest.yaml

on: issue_comment
Fit to window
Zoom out
Zoom in

Annotations

9 errors
unittest
Process completed with exit code 1.
Failed Test: tests/explorer/workflow_test.py::TestWorkflowRunner::test_workflow_with_openai
tests/explorer/workflow_test.py::TestWorkflowRunner::test_workflow_with_openai: The test failed in the call phase - self = <tests.explorer.workflow_test.TestWorkflowRunner testMethod=test_workflow_with_openai> async def test_workflow_with_openai(self): config = get_template_config() config.mode = "explore" config.model.model_path = get_model_path() config.explorer.rollout_model.engine_num = 1 config.explorer.rollout_model.enable_openai_api = True config.explorer.rollout_model.enable_history = True config.check_and_update() engines, auxiliary_engines = create_inference_models(config) runner = WorkflowRunner( config, model=engines[0], auxiliary_models=[], runner_id=0, ) > await runner.prepare() tests/explorer/workflow_test.py:737: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ trinity/explorer/workflow_runner.py:87: in prepare await asyncio.gather( _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <trinity.common.models.model.ModelWrapper object at 0x7ef8f41af670> async def prepare(self) -> None: """Prepare the model wrapper.""" > self.api_address = await self.model.get_api_server_url.remote() E ray.exceptions.RayTaskError(RuntimeError): ray::vLLMRolloutModel.get_api_server_url() (pid=672, ip=172.20.0.3, actor_id=38d9a26618d938af101e88dc34000000, repr=<trinity.common.models.vllm_model.vLLMRolloutModel object at 0x7f960239fc70>) E File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result E return self.__get_result() E File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result E raise self._exception E File "/workspace/trinity/common/models/vllm_model.py", line 590, in get_api_server_url E raise RuntimeError("Model is not prepared. Please call `prepare()` first.") E RuntimeError: Model is not prepared. Please call `prepare()` first. trinity/common/models/model.py:116: RayTaskError(RuntimeError)
Failed Test: tests/explorer/explorer_test.py::TestExplorerGSM8k::test_explorer
tests/explorer/explorer_test.py::TestExplorerGSM8k::test_explorer: The test failed in the call phase due to an exception - self = <tests.explorer.explorer_test.TestExplorerGSM8k testMethod=test_explorer> def test_explorer(self): self.config.algorithm.repeat_times = 2 self.config.buffer.total_epochs = 1 self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("gsm8k") self.config.name = f"explore-{datetime.now().strftime('%Y%m%d%H%M%S')}" # some step may be skipped due to same reward self.config.algorithm.algorithm_type = "grpo" self.config.algorithm.advantage_fn = "grpo" self.config.algorithm.advantage_fn_args = { "epsilon": 1e-6, } self.config.model.max_model_len = 10240 self.config.model.max_response_tokens = 8192 self.config.model.min_response_tokens = 8192 self.config.explorer.rollout_model.ignore_eos = True self.config.check_and_update() explorer = Explorer.get_actor(self.config) > ray.get(explorer.prepare.remote()) tests/explorer/explorer_test.py:129: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper return fn(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper return func(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2961: in get values, debugger_breakpoint = worker.get_objects( _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <ray._private.worker.Worker object at 0x7f0b9083fb80> object_refs = [ObjectRef(aa720e6b051a8e0bd4785757bdd934f75ae823de1800000001000000)] timeout = None, return_exceptions = False, skip_deserialization = False _tensor_transport = None def get_objects( self, object_refs: list, timeout: Optional[float] = None, return_exceptions: bool = False, skip_deserialization: bool = False, _tensor_transport: Optional[str] = None, ) -> Tuple[List[serialization.SerializedRayObject], bytes]: """Get the values in the object store associated with the IDs. Return the values from the local object store for object_refs. This will block until all the values for object_refs have been written to the local object store. Args: object_refs: A list of the object refs whose values should be retrieved. timeout: The maximum amount of time in seconds to wait before returning. return_exceptions: If any of the objects deserialize to an Exception object, whether to return them as values in the returned list. If False, then the first found exception will be raised. skip_deserialization: If true, only the buffer will be released and the object associated with the buffer will not be deserialized. _tensor_transport: [Alpha] The tensor transport to use to fetch `torch.Tensors` found in the Ray Direct Transport object. Currently, this supports "object_store" and "nixl". Returns: list: List of deserialized objects or None if skip_deserialization is True. bytes: UUID of the debugger breakpoint we should drop into or b"" if there is no breakpoint. """ # Make sure that the values are object refs. for object_ref in object_refs: if not isinstance(object_ref, ObjectRef): raise TypeError( f"Attempting to call `get` on the value {object_ref}, " "which is not an ray.ObjectRef." ) tensor_transport: TensorTransportEnum = ( TensorTransportEnum.from_str(_tensor_transport) if _tensor_transport is not None else None ) assert tensor_transport in [ T
Failed Test: tests/explorer/explorer_test.py::TestExplorerGSM8KRULERNoEval::test_explorer
tests/explorer/explorer_test.py::TestExplorerGSM8KRULERNoEval::test_explorer: The test failed in the call phase due to an assertion error - self = <tests.explorer.explorer_test.TestExplorerGSM8KRULERNoEval testMethod=test_explorer> def test_explorer(self): self.config.explorer.rollout_model.engine_num = 2 self.config.explorer.auxiliary_models = [ InferenceModelConfig( model_path=get_api_model_path(), tensor_parallel_size=1, engine_num=2, ) ] self.config.algorithm.repeat_times = 2 self.config.buffer.total_steps = 2 self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("gsm8k_ruler") self.config.name = f"explore-no-eval-{datetime.now().strftime('%Y%m%d%H%M%S')}" self.config.algorithm.algorithm_type = "grpo" self.config.algorithm.advantage_fn = "grpo" self.config.algorithm.advantage_fn_args = { "std_threshold": 0.0001, } self.config.check_and_update() explore(self.config) parser = TensorBoardParser(os.path.join(self.config.monitor.cache_dir, "tensorboard")) rollout_metrics = parser.metric_list("rollout") > self.assertTrue(len(rollout_metrics) > 0) E AssertionError: False is not true tests/explorer/explorer_test.py:105: AssertionError
Failed Test: tests/explorer/explorer_test.py::TestExplorerCountdownEval::test_explorer
tests/explorer/explorer_test.py::TestExplorerCountdownEval::test_explorer: The test failed in the call phase due to an assertion error - self = <tests.explorer.explorer_test.TestExplorerCountdownEval testMethod=test_explorer> def test_explorer(self): self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("countdown") eval_tasksets = self.config.buffer.explorer_input.eval_tasksets eval_tasksets.extend( [ get_unittest_dataset_config("countdown", "test"), get_unittest_dataset_config("eval_short"), get_unittest_dataset_config("eval_long"), ] ) eval_tasksets[1].repeat_times = 6 eval_tasksets[2].repeat_times = 10 self.config.name = f"explore-eval-{datetime.now().strftime('%Y%m%d%H%M%S')}" self.config.check_and_update() explore(self.config) parser = TensorBoardParser(os.path.join(self.config.monitor.cache_dir, "tensorboard")) rollout_metrics = parser.metric_list("rollout") > self.assertTrue(len(rollout_metrics) > 0) E AssertionError: False is not true tests/explorer/explorer_test.py:66: AssertionError
Failed Test: tests/common/vllm_test.py::ModelWrapperTest_2::test_generate
tests/common/vllm_test.py::ModelWrapperTest_2::test_generate: The test failed in the call phase due to an exception - self = <tests.common.vllm_test.ModelWrapperTest_2 testMethod=test_generate> async def test_generate( self, ): await prepare_engines(self.engines, self.auxiliary_engines) > await self.model_wrapper.prepare() tests/common/vllm_test.py:140: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <trinity.common.models.model.ModelWrapper object at 0x7f08902d6830> async def prepare(self) -> None: """Prepare the model wrapper.""" self.api_address = await self.model.get_api_server_url.remote() if self.api_address is None: self.logger.info("API server is not enabled for inference model.") return max_retries = 30 interval = 2 # seconds for i in range(max_retries): try: async with httpx.AsyncClient() as client: response = await client.get(self.api_address + "/health", timeout=5) if response.status_code == 200: return except Exception as e: self.logger.info(f"API server not ready (attempt {i + 1}/{max_retries}): {e}") await asyncio.sleep(interval) > raise RuntimeError( f"API server at {self.api_address} not ready after {max_retries} attempts." ) E RuntimeError: API server at http://None:None not ready after 30 attempts. trinity/common/models/model.py:131: RuntimeError
Failed Test: tests/common/vllm_test.py::ModelWrapperTest_1::test_generate
tests/common/vllm_test.py::ModelWrapperTest_1::test_generate: The test failed in the call phase due to an exception - self = <tests.common.vllm_test.ModelWrapperTest_1 testMethod=test_generate> async def test_generate( self, ): await prepare_engines(self.engines, self.auxiliary_engines) > await self.model_wrapper.prepare() tests/common/vllm_test.py:140: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <trinity.common.models.model.ModelWrapper object at 0x7f08544c0c70> async def prepare(self) -> None: """Prepare the model wrapper.""" self.api_address = await self.model.get_api_server_url.remote() if self.api_address is None: self.logger.info("API server is not enabled for inference model.") return max_retries = 30 interval = 2 # seconds for i in range(max_retries): try: async with httpx.AsyncClient() as client: response = await client.get(self.api_address + "/health", timeout=5) if response.status_code == 200: return except Exception as e: self.logger.info(f"API server not ready (attempt {i + 1}/{max_retries}): {e}") await asyncio.sleep(interval) > raise RuntimeError( f"API server at {self.api_address} not ready after {max_retries} attempts." ) E RuntimeError: API server at http://None:None not ready after 30 attempts. trinity/common/models/model.py:131: RuntimeError
Failed Test: tests/common/vllm_test.py::ModelWrapperTest_0::test_generate
tests/common/vllm_test.py::ModelWrapperTest_0::test_generate: The test failed in the call phase due to an exception - self = <tests.common.vllm_test.ModelWrapperTest_0 testMethod=test_generate> async def test_generate( self, ): await prepare_engines(self.engines, self.auxiliary_engines) > await self.model_wrapper.prepare() tests/common/vllm_test.py:140: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <trinity.common.models.model.ModelWrapper object at 0x7f08544c0f40> async def prepare(self) -> None: """Prepare the model wrapper.""" self.api_address = await self.model.get_api_server_url.remote() if self.api_address is None: self.logger.info("API server is not enabled for inference model.") return max_retries = 30 interval = 2 # seconds for i in range(max_retries): try: async with httpx.AsyncClient() as client: response = await client.get(self.api_address + "/health", timeout=5) if response.status_code == 200: return except Exception as e: self.logger.info(f"API server not ready (attempt {i + 1}/{max_retries}): {e}") await asyncio.sleep(interval) > raise RuntimeError( f"API server at {self.api_address} not ready after {max_retries} attempts." ) E RuntimeError: API server at http://None:None not ready after 30 attempts. trinity/common/models/model.py:131: RuntimeError
unittest
Process completed with exit code 1.

Artifacts

Produced during runtime
Name Size Digest
pytest-results
7.75 KB
sha256:00fb6b6aea850b591604f0f833e797ab34da6791dfd547f51c859f00d6addf17