Fix the mismatch between vLLM OpenAI API and vLLM generate
#1156
Annotations
9 errors
|
unittest
Process completed with exit code 1.
|
|
Failed Test: tests/explorer/workflow_test.py::TestWorkflowRunner::test_workflow_with_openai
tests/explorer/workflow_test.py::TestWorkflowRunner::test_workflow_with_openai: The test failed in the call phase - self = <tests.explorer.workflow_test.TestWorkflowRunner testMethod=test_workflow_with_openai>
async def test_workflow_with_openai(self):
config = get_template_config()
config.mode = "explore"
config.model.model_path = get_model_path()
config.explorer.rollout_model.engine_num = 1
config.explorer.rollout_model.enable_openai_api = True
config.explorer.rollout_model.enable_history = True
config.check_and_update()
engines, auxiliary_engines = create_inference_models(config)
runner = WorkflowRunner(
config,
model=engines[0],
auxiliary_models=[],
runner_id=0,
)
> await runner.prepare()
tests/explorer/workflow_test.py:737:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
trinity/explorer/workflow_runner.py:87: in prepare
await asyncio.gather(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <trinity.common.models.model.ModelWrapper object at 0x7ef8f41af670>
async def prepare(self) -> None:
"""Prepare the model wrapper."""
> self.api_address = await self.model.get_api_server_url.remote()
E ray.exceptions.RayTaskError(RuntimeError): ray::vLLMRolloutModel.get_api_server_url() (pid=672, ip=172.20.0.3, actor_id=38d9a26618d938af101e88dc34000000, repr=<trinity.common.models.vllm_model.vLLMRolloutModel object at 0x7f960239fc70>)
E File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result
E return self.__get_result()
E File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
E raise self._exception
E File "/workspace/trinity/common/models/vllm_model.py", line 590, in get_api_server_url
E raise RuntimeError("Model is not prepared. Please call `prepare()` first.")
E RuntimeError: Model is not prepared. Please call `prepare()` first.
trinity/common/models/model.py:116: RayTaskError(RuntimeError)
|
|
Failed Test: tests/explorer/explorer_test.py::TestExplorerGSM8k::test_explorer
tests/explorer/explorer_test.py::TestExplorerGSM8k::test_explorer: The test failed in the call phase due to an exception - self = <tests.explorer.explorer_test.TestExplorerGSM8k testMethod=test_explorer>
def test_explorer(self):
self.config.algorithm.repeat_times = 2
self.config.buffer.total_epochs = 1
self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("gsm8k")
self.config.name = f"explore-{datetime.now().strftime('%Y%m%d%H%M%S')}"
# some step may be skipped due to same reward
self.config.algorithm.algorithm_type = "grpo"
self.config.algorithm.advantage_fn = "grpo"
self.config.algorithm.advantage_fn_args = {
"epsilon": 1e-6,
}
self.config.model.max_model_len = 10240
self.config.model.max_response_tokens = 8192
self.config.model.min_response_tokens = 8192
self.config.explorer.rollout_model.ignore_eos = True
self.config.check_and_update()
explorer = Explorer.get_actor(self.config)
> ray.get(explorer.prepare.remote())
tests/explorer/explorer_test.py:129:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper
return fn(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper
return func(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2961: in get
values, debugger_breakpoint = worker.get_objects(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ray._private.worker.Worker object at 0x7f0b9083fb80>
object_refs = [ObjectRef(aa720e6b051a8e0bd4785757bdd934f75ae823de1800000001000000)]
timeout = None, return_exceptions = False, skip_deserialization = False
_tensor_transport = None
def get_objects(
self,
object_refs: list,
timeout: Optional[float] = None,
return_exceptions: bool = False,
skip_deserialization: bool = False,
_tensor_transport: Optional[str] = None,
) -> Tuple[List[serialization.SerializedRayObject], bytes]:
"""Get the values in the object store associated with the IDs.
Return the values from the local object store for object_refs. This
will block until all the values for object_refs have been written to
the local object store.
Args:
object_refs: A list of the object refs
whose values should be retrieved.
timeout: The maximum amount of time in
seconds to wait before returning.
return_exceptions: If any of the objects deserialize to an
Exception object, whether to return them as values in the
returned list. If False, then the first found exception will be
raised.
skip_deserialization: If true, only the buffer will be released and
the object associated with the buffer will not be deserialized.
_tensor_transport: [Alpha] The tensor transport to use to fetch `torch.Tensors` found in the Ray Direct Transport object. Currently, this supports "object_store" and "nixl".
Returns:
list: List of deserialized objects or None if skip_deserialization is True.
bytes: UUID of the debugger breakpoint we should drop
into or b"" if there is no breakpoint.
"""
# Make sure that the values are object refs.
for object_ref in object_refs:
if not isinstance(object_ref, ObjectRef):
raise TypeError(
f"Attempting to call `get` on the value {object_ref}, "
"which is not an ray.ObjectRef."
)
tensor_transport: TensorTransportEnum = (
TensorTransportEnum.from_str(_tensor_transport)
if _tensor_transport is not None
else None
)
assert tensor_transport in [
T
|
|
Failed Test: tests/explorer/explorer_test.py::TestExplorerGSM8KRULERNoEval::test_explorer
tests/explorer/explorer_test.py::TestExplorerGSM8KRULERNoEval::test_explorer: The test failed in the call phase due to an assertion error - self = <tests.explorer.explorer_test.TestExplorerGSM8KRULERNoEval testMethod=test_explorer>
def test_explorer(self):
self.config.explorer.rollout_model.engine_num = 2
self.config.explorer.auxiliary_models = [
InferenceModelConfig(
model_path=get_api_model_path(),
tensor_parallel_size=1,
engine_num=2,
)
]
self.config.algorithm.repeat_times = 2
self.config.buffer.total_steps = 2
self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("gsm8k_ruler")
self.config.name = f"explore-no-eval-{datetime.now().strftime('%Y%m%d%H%M%S')}"
self.config.algorithm.algorithm_type = "grpo"
self.config.algorithm.advantage_fn = "grpo"
self.config.algorithm.advantage_fn_args = {
"std_threshold": 0.0001,
}
self.config.check_and_update()
explore(self.config)
parser = TensorBoardParser(os.path.join(self.config.monitor.cache_dir, "tensorboard"))
rollout_metrics = parser.metric_list("rollout")
> self.assertTrue(len(rollout_metrics) > 0)
E AssertionError: False is not true
tests/explorer/explorer_test.py:105: AssertionError
|
|
Failed Test: tests/explorer/explorer_test.py::TestExplorerCountdownEval::test_explorer
tests/explorer/explorer_test.py::TestExplorerCountdownEval::test_explorer: The test failed in the call phase due to an assertion error - self = <tests.explorer.explorer_test.TestExplorerCountdownEval testMethod=test_explorer>
def test_explorer(self):
self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("countdown")
eval_tasksets = self.config.buffer.explorer_input.eval_tasksets
eval_tasksets.extend(
[
get_unittest_dataset_config("countdown", "test"),
get_unittest_dataset_config("eval_short"),
get_unittest_dataset_config("eval_long"),
]
)
eval_tasksets[1].repeat_times = 6
eval_tasksets[2].repeat_times = 10
self.config.name = f"explore-eval-{datetime.now().strftime('%Y%m%d%H%M%S')}"
self.config.check_and_update()
explore(self.config)
parser = TensorBoardParser(os.path.join(self.config.monitor.cache_dir, "tensorboard"))
rollout_metrics = parser.metric_list("rollout")
> self.assertTrue(len(rollout_metrics) > 0)
E AssertionError: False is not true
tests/explorer/explorer_test.py:66: AssertionError
|
|
Failed Test: tests/common/vllm_test.py::ModelWrapperTest_2::test_generate
tests/common/vllm_test.py::ModelWrapperTest_2::test_generate: The test failed in the call phase due to an exception - self = <tests.common.vllm_test.ModelWrapperTest_2 testMethod=test_generate>
async def test_generate(
self,
):
await prepare_engines(self.engines, self.auxiliary_engines)
> await self.model_wrapper.prepare()
tests/common/vllm_test.py:140:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <trinity.common.models.model.ModelWrapper object at 0x7f08902d6830>
async def prepare(self) -> None:
"""Prepare the model wrapper."""
self.api_address = await self.model.get_api_server_url.remote()
if self.api_address is None:
self.logger.info("API server is not enabled for inference model.")
return
max_retries = 30
interval = 2 # seconds
for i in range(max_retries):
try:
async with httpx.AsyncClient() as client:
response = await client.get(self.api_address + "/health", timeout=5)
if response.status_code == 200:
return
except Exception as e:
self.logger.info(f"API server not ready (attempt {i + 1}/{max_retries}): {e}")
await asyncio.sleep(interval)
> raise RuntimeError(
f"API server at {self.api_address} not ready after {max_retries} attempts."
)
E RuntimeError: API server at http://None:None not ready after 30 attempts.
trinity/common/models/model.py:131: RuntimeError
|
|
Failed Test: tests/common/vllm_test.py::ModelWrapperTest_1::test_generate
tests/common/vllm_test.py::ModelWrapperTest_1::test_generate: The test failed in the call phase due to an exception - self = <tests.common.vllm_test.ModelWrapperTest_1 testMethod=test_generate>
async def test_generate(
self,
):
await prepare_engines(self.engines, self.auxiliary_engines)
> await self.model_wrapper.prepare()
tests/common/vllm_test.py:140:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <trinity.common.models.model.ModelWrapper object at 0x7f08544c0c70>
async def prepare(self) -> None:
"""Prepare the model wrapper."""
self.api_address = await self.model.get_api_server_url.remote()
if self.api_address is None:
self.logger.info("API server is not enabled for inference model.")
return
max_retries = 30
interval = 2 # seconds
for i in range(max_retries):
try:
async with httpx.AsyncClient() as client:
response = await client.get(self.api_address + "/health", timeout=5)
if response.status_code == 200:
return
except Exception as e:
self.logger.info(f"API server not ready (attempt {i + 1}/{max_retries}): {e}")
await asyncio.sleep(interval)
> raise RuntimeError(
f"API server at {self.api_address} not ready after {max_retries} attempts."
)
E RuntimeError: API server at http://None:None not ready after 30 attempts.
trinity/common/models/model.py:131: RuntimeError
|
|
Failed Test: tests/common/vllm_test.py::ModelWrapperTest_0::test_generate
tests/common/vllm_test.py::ModelWrapperTest_0::test_generate: The test failed in the call phase due to an exception - self = <tests.common.vllm_test.ModelWrapperTest_0 testMethod=test_generate>
async def test_generate(
self,
):
await prepare_engines(self.engines, self.auxiliary_engines)
> await self.model_wrapper.prepare()
tests/common/vllm_test.py:140:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <trinity.common.models.model.ModelWrapper object at 0x7f08544c0f40>
async def prepare(self) -> None:
"""Prepare the model wrapper."""
self.api_address = await self.model.get_api_server_url.remote()
if self.api_address is None:
self.logger.info("API server is not enabled for inference model.")
return
max_retries = 30
interval = 2 # seconds
for i in range(max_retries):
try:
async with httpx.AsyncClient() as client:
response = await client.get(self.api_address + "/health", timeout=5)
if response.status_code == 200:
return
except Exception as e:
self.logger.info(f"API server not ready (attempt {i + 1}/{max_retries}): {e}")
await asyncio.sleep(interval)
> raise RuntimeError(
f"API server at {self.api_address} not ready after {max_retries} attempts."
)
E RuntimeError: API server at http://None:None not ready after 30 attempts.
trinity/common/models/model.py:131: RuntimeError
|
|
unittest
Process completed with exit code 1.
|
Artifacts
Produced during runtime
| Name | Size | Digest | |
|---|---|---|---|
|
pytest-results
|
7.75 KB |
sha256:00fb6b6aea850b591604f0f833e797ab34da6791dfd547f51c859f00d6addf17
|
|