Refactor on select_keys
#109
Triggered via issue
June 17, 2025 10:58
Status
Failure
Total duration
12m 33s
Artifacts
–
Annotations
7 errors
|
unittest
Process completed with exit code 1.
|
|
Failed Test: tests/trainer/trainer_test.py::TestTrainerDPO::test_trainer
tests/trainer/trainer_test.py::TestTrainerDPO::test_trainer: The test failed in the call phase due to an exception - self = <tests.trainer.trainer_test.TestTrainerDPO testMethod=test_trainer>
def test_trainer(self):
"""Test DPO."""
# test both mode
self.config.mode = "train"
self.config.algorithm.algorithm_type = "dpo"
self.config.algorithm.policy_loss_fn = "dpo"
self.config.algorithm.policy_loss_fn_args = {}
# self.config.buffer.batch_size = 32
self.config.buffer.trainer_input.experience_buffer = get_unittest_dataset_config("dpo")
self.config.check_and_update()
self.config.trainer.trainer_config.trainer.total_training_steps = 4
self.config.trainer.trainer_config.trainer.max_actor_ckpt_to_keep = 2
self.config.trainer.trainer_config.actor_rollout_ref.actor.optim.lr = 5e-7
> train(self.config)
tests/trainer/trainer_test.py:205:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
trinity/cli/launcher.py:58: in train
raise e
trinity/cli/launcher.py:53: in train
ray.get(trainer.train.remote())
/usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper
return fn(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper
return func(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2849: in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ray._private.worker.Worker object at 0x7fefe0d493f0>
object_refs = [ObjectRef(ea28637d3674700252feab1610d010bccb6d2e991100000001000000)]
timeout = None, return_exceptions = False, skip_deserialization = False
def get_objects(
self,
object_refs: list,
timeout: Optional[float] = None,
return_exceptions: bool = False,
skip_deserialization: bool = False,
):
"""Get the values in the object store associated with the IDs.
Return the values from the local object store for object_refs. This
will block until all the values for object_refs have been written to
the local object store.
Args:
object_refs: A list of the object refs
whose values should be retrieved.
timeout: The maximum amount of time in
seconds to wait before returning.
return_exceptions: If any of the objects deserialize to an
Exception object, whether to return them as values in the
returned list. If False, then the first found exception will be
raised.
skip_deserialization: If true, only the buffer will be released and
the object associated with the buffer will not be deserialized.
Returns:
list: List of deserialized objects or None if skip_deserialization is True.
bytes: UUID of the debugger breakpoint we should drop
into or b"" if there is no breakpoint.
"""
# Make sure that the values are object refs.
for object_ref in object_refs:
if not isinstance(object_ref, ObjectRef):
raise TypeError(
f"Attempting to call `get` on the value {object_ref}, "
"which is not an ray.ObjectRef."
)
timeout_ms = (
int(timeout * 1000) if timeout is not None and timeout != -1 else -1
)
data_metadata_pairs: List[
Tuple[ray._raylet.Buffer, bytes]
] = self.core_worker.get_objects(
object_refs,
timeout_ms,
)
debugger_breakpoint = b""
for data, metadata in data_metadata_pairs:
if metadata:
metadata_fields = metadata.split(b",")
if len(metadata_fields) >= 2 and metadata_fields[1].startswith(
ray_constants.OBJE
|
|
Failed Test: tests/trainer/trainer_test.py::TestTrainerGSM8KWithSFT::test_trainer
tests/trainer/trainer_test.py::TestTrainerGSM8KWithSFT::test_trainer: The test failed in the call phase due to an exception - self = <tests.trainer.trainer_test.TestTrainerGSM8KWithSFT testMethod=test_trainer>
def test_trainer(self):
"""Test GSM8K With SFT."""
# test both mode
self.config.algorithm.algorithm_type = "grpo"
self.config.algorithm.repeat_times = 4
self.config.algorithm.advantage_fn = "grpo"
self.config.algorithm.advantage_fn_args = {}
self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("gsm8k")
self.config.buffer.trainer_input.sft_warmup_steps = 2
self.config.buffer.trainer_input.sft_warmup_dataset = get_unittest_dataset_config(
"sft_for_gsm8k"
)
self.config.check_and_update()
self.config.trainer.trainer_config.trainer.total_training_steps = 4
self.config.trainer.trainer_config.trainer.max_actor_ckpt_to_keep = 2
self.config.trainer.trainer_config.actor_rollout_ref.actor.optim.lr = 1e-5
> both(self.config)
tests/trainer/trainer_test.py:173:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
trinity/cli/launcher.py:105: in both
raise e
trinity/cli/launcher.py:89: in both
train_continue, train_step_num = ray.get(ref_train)
/usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper
return fn(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper
return func(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2849: in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ray._private.worker.Worker object at 0x7fefe0d493f0>
object_refs = [ObjectRef(c101104bb440cbdcf25651bcca66a7631abf655d1000000001000000)]
timeout = None, return_exceptions = False, skip_deserialization = False
def get_objects(
self,
object_refs: list,
timeout: Optional[float] = None,
return_exceptions: bool = False,
skip_deserialization: bool = False,
):
"""Get the values in the object store associated with the IDs.
Return the values from the local object store for object_refs. This
will block until all the values for object_refs have been written to
the local object store.
Args:
object_refs: A list of the object refs
whose values should be retrieved.
timeout: The maximum amount of time in
seconds to wait before returning.
return_exceptions: If any of the objects deserialize to an
Exception object, whether to return them as values in the
returned list. If False, then the first found exception will be
raised.
skip_deserialization: If true, only the buffer will be released and
the object associated with the buffer will not be deserialized.
Returns:
list: List of deserialized objects or None if skip_deserialization is True.
bytes: UUID of the debugger breakpoint we should drop
into or b"" if there is no breakpoint.
"""
# Make sure that the values are object refs.
for object_ref in object_refs:
if not isinstance(object_ref, ObjectRef):
raise TypeError(
f"Attempting to call `get` on the value {object_ref}, "
"which is not an ray.ObjectRef."
)
timeout_ms = (
int(timeout * 1000) if timeout is not None and timeout != -1 else -1
)
data_metadata_pairs: List[
Tuple[ray._raylet.Buffer, bytes]
] = self.core_worker.get_objects(
object_refs,
timeout_ms,
)
debugger_breakpoint = b""
for data, metadata in data_metadata_pairs:
|
|
Failed Test: tests/trainer/trainer_test.py::TestTrainerGSM8K::test_trainer
tests/trainer/trainer_test.py::TestTrainerGSM8K::test_trainer: The test failed in the call phase due to an exception - self = <tests.trainer.trainer_test.TestTrainerGSM8K testMethod=test_trainer>
def test_trainer(self):
"""Test GSM8K."""
# test both mode
self.config.algorithm.algorithm_type = "grpo"
self.config.algorithm.repeat_times = 4
# self.config.algorithm.repeat_times = 8 # TODO: used for real testing
self.config.algorithm.advantage_fn = "grpo"
self.config.algorithm.advantage_fn_args = {}
# self.config.buffer.batch_size = 96 # TODO: used for real testing
self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("gsm8k")
self.config.check_and_update()
self.config.trainer.trainer_config.trainer.total_training_steps = 4
self.config.trainer.trainer_config.trainer.max_actor_ckpt_to_keep = 2
self.config.trainer.trainer_config.actor_rollout_ref.actor.optim.lr = 1e-5
> both(self.config)
tests/trainer/trainer_test.py:133:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
trinity/cli/launcher.py:105: in both
raise e
trinity/cli/launcher.py:89: in both
train_continue, train_step_num = ray.get(ref_train)
/usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper
return fn(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper
return func(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2849: in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ray._private.worker.Worker object at 0x7fefe0d493f0>
object_refs = [ObjectRef(0d1a710d39e557a54cea9d88aab5bc8ec6ff24590f00000001000000)]
timeout = None, return_exceptions = False, skip_deserialization = False
def get_objects(
self,
object_refs: list,
timeout: Optional[float] = None,
return_exceptions: bool = False,
skip_deserialization: bool = False,
):
"""Get the values in the object store associated with the IDs.
Return the values from the local object store for object_refs. This
will block until all the values for object_refs have been written to
the local object store.
Args:
object_refs: A list of the object refs
whose values should be retrieved.
timeout: The maximum amount of time in
seconds to wait before returning.
return_exceptions: If any of the objects deserialize to an
Exception object, whether to return them as values in the
returned list. If False, then the first found exception will be
raised.
skip_deserialization: If true, only the buffer will be released and
the object associated with the buffer will not be deserialized.
Returns:
list: List of deserialized objects or None if skip_deserialization is True.
bytes: UUID of the debugger breakpoint we should drop
into or b"" if there is no breakpoint.
"""
# Make sure that the values are object refs.
for object_ref in object_refs:
if not isinstance(object_ref, ObjectRef):
raise TypeError(
f"Attempting to call `get` on the value {object_ref}, "
"which is not an ray.ObjectRef."
)
timeout_ms = (
int(timeout * 1000) if timeout is not None and timeout != -1 else -1
)
data_metadata_pairs: List[
Tuple[ray._raylet.Buffer, bytes]
] = self.core_worker.get_objects(
object_refs,
timeout_ms,
)
debugger_breakpoint = b""
for data, metadata in data_metadata_pairs:
if metadata:
metadata_fields = m
|
|
Failed Test: tests/trainer/trainer_test.py::TestTrainerCountdown::test_trainer
tests/trainer/trainer_test.py::TestTrainerCountdown::test_trainer: The test failed in the call phase due to an exception - self = <tests.trainer.trainer_test.TestTrainerCountdown testMethod=test_trainer>
def test_trainer(self):
"""Test the both and bench mode."""
# test both mode
self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("countdown")
self.config.buffer.explorer_input.eval_tasksets.append(
get_unittest_dataset_config("countdown", "test")
)
self.config.buffer.explorer_input.eval_tasksets.append(
get_unittest_dataset_config("copy_countdown", "test")
)
self.config.trainer.save_interval = 4
self.config.check_and_update()
self.config.trainer.trainer_config.trainer.max_actor_ckpt_to_keep = 2
self.config.trainer.trainer_config.trainer.max_critic_ckpt_to_keep = 2
> both(self.config)
tests/trainer/trainer_test.py:59:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
trinity/cli/launcher.py:105: in both
raise e
trinity/cli/launcher.py:89: in both
train_continue, train_step_num = ray.get(ref_train)
/usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper
return fn(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper
return func(*args, **kwargs)
/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2849: in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <ray._private.worker.Worker object at 0x7fefe0d493f0>
object_refs = [ObjectRef(90bed3839ab1a68ed3fd85d91d27eb260c8655010e00000001000000)]
timeout = None, return_exceptions = False, skip_deserialization = False
def get_objects(
self,
object_refs: list,
timeout: Optional[float] = None,
return_exceptions: bool = False,
skip_deserialization: bool = False,
):
"""Get the values in the object store associated with the IDs.
Return the values from the local object store for object_refs. This
will block until all the values for object_refs have been written to
the local object store.
Args:
object_refs: A list of the object refs
whose values should be retrieved.
timeout: The maximum amount of time in
seconds to wait before returning.
return_exceptions: If any of the objects deserialize to an
Exception object, whether to return them as values in the
returned list. If False, then the first found exception will be
raised.
skip_deserialization: If true, only the buffer will be released and
the object associated with the buffer will not be deserialized.
Returns:
list: List of deserialized objects or None if skip_deserialization is True.
bytes: UUID of the debugger breakpoint we should drop
into or b"" if there is no breakpoint.
"""
# Make sure that the values are object refs.
for object_ref in object_refs:
if not isinstance(object_ref, ObjectRef):
raise TypeError(
f"Attempting to call `get` on the value {object_ref}, "
"which is not an ray.ObjectRef."
)
timeout_ms = (
int(timeout * 1000) if timeout is not None and timeout != -1 else -1
)
data_metadata_pairs: List[
Tuple[ray._raylet.Buffer, bytes]
] = self.core_worker.get_objects(
object_refs,
timeout_ms,
)
debugger_breakpoint = b""
for data, metadata in data_metadata_pairs:
if metadata:
metadata_fields = metadata.split(b",")
if len(metadata_fields) >= 2 and metadata_fields[1].star
|
|
Failed Test: tests/common/config_test.py::TestConfig::test_all_examples_are_valid
tests/common/config_test.py::TestConfig::test_all_examples_are_valid: The test failed in the call phase due to an exception - config_path = '/workspace/tests/common/../../examples/mix_math/mix_math.yaml'
def load_config(config_path: str) -> Config:
"""Load the configuration from the given path."""
# TODO: add test
schema = OmegaConf.structured(Config)
yaml_config = OmegaConf.load(config_path)
try:
> config = OmegaConf.merge(schema, yaml_config)
trinity/common/config.py:647:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/usr/local/lib/python3.10/dist-packages/omegaconf/omegaconf.py:273: in merge
target.merge_with(*configs[1:])
/usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:492: in merge_with
self._format_and_raise(key=None, value=None, cause=e)
/usr/local/lib/python3.10/dist-packages/omegaconf/base.py:231: in _format_and_raise
format_and_raise(
/usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:819: in format_and_raise
_raise(ex, cause)
/usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:797: in _raise
raise ex.with_traceback(sys.exc_info()[2]) # set env var OC_CAUSE=1 for full trace
/usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:490: in merge_with
self._merge_with(*others)
/usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:514: in _merge_with
BaseContainer._map_merge(self, other)
/usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:399: in _map_merge
dest_node._merge_with(src_node)
/usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:514: in _merge_with
BaseContainer._map_merge(self, other)
/usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:432: in _map_merge
dest[key] = src._get_node(key)
/usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:310: in __setitem__
self._format_and_raise(
/usr/local/lib/python3.10/dist-packages/omegaconf/base.py:231: in _format_and_raise
format_and_raise(
/usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:819: in format_and_raise
_raise(ex, cause)
/usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:797: in _raise
raise ex.with_traceback(sys.exc_info()[2]) # set env var OC_CAUSE=1 for full trace
/usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:308: in __setitem__
self.__set_impl(key=key, value=value)
/usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:318: in __set_impl
self._set_item_impl(key, value)
/usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:545: in _set_item_impl
self._validate_set(key, value)
/usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:180: in _validate_set
target = self._get_node(key) if key is not None else self
/usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:475: in _get_node
self._validate_get(key)
/usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:164: in _validate_get
self._format_and_raise(
/usr/local/lib/python3.10/dist-packages/omegaconf/base.py:231: in _format_and_raise
format_and_raise(
/usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:899: in format_and_raise
_raise(ex, cause)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ex = ConfigAttributeError("Key 'explore_batch_size' not in 'BufferConfig'\n full_key: buffer.explore_batch_size\n reference_type=BufferConfig\n object_type=BufferConfig")
cause = ConfigAttributeError("Key 'explore_batch_size' not in 'BufferConfig'")
def _raise(ex: Exception, cause: Exception) -> None:
# Set the environment variable OC_CAUSE=1 to get a stacktrace that includes the
# causing exception.
env_var = os.environ["OC_CAUSE"] if "OC_CAUSE" in os.environ else None
debugging = sys.gettrace() is not None
full_backtrace = (debugging and not env_var == "0") or (env_var == "1")
if full_backtrace:
ex.__cause__ = cause
|
|
unittest
Process completed with exit code 1.
|