Skip to content

Refactor on select_keys #109

Refactor on select_keys

Refactor on select_keys #109

Triggered via issue June 17, 2025 10:58
@chenyushuochenyushuo
commented on #84 ad77ffe
Status Failure
Total duration 12m 33s
Artifacts

unittest.yaml

on: issue_comment
Fit to window
Zoom out
Zoom in

Annotations

7 errors
unittest
Process completed with exit code 1.
Failed Test: tests/trainer/trainer_test.py::TestTrainerDPO::test_trainer
tests/trainer/trainer_test.py::TestTrainerDPO::test_trainer: The test failed in the call phase due to an exception - self = <tests.trainer.trainer_test.TestTrainerDPO testMethod=test_trainer> def test_trainer(self): """Test DPO.""" # test both mode self.config.mode = "train" self.config.algorithm.algorithm_type = "dpo" self.config.algorithm.policy_loss_fn = "dpo" self.config.algorithm.policy_loss_fn_args = {} # self.config.buffer.batch_size = 32 self.config.buffer.trainer_input.experience_buffer = get_unittest_dataset_config("dpo") self.config.check_and_update() self.config.trainer.trainer_config.trainer.total_training_steps = 4 self.config.trainer.trainer_config.trainer.max_actor_ckpt_to_keep = 2 self.config.trainer.trainer_config.actor_rollout_ref.actor.optim.lr = 5e-7 > train(self.config) tests/trainer/trainer_test.py:205: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ trinity/cli/launcher.py:58: in train raise e trinity/cli/launcher.py:53: in train ray.get(trainer.train.remote()) /usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper return fn(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper return func(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2849: in get values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <ray._private.worker.Worker object at 0x7fefe0d493f0> object_refs = [ObjectRef(ea28637d3674700252feab1610d010bccb6d2e991100000001000000)] timeout = None, return_exceptions = False, skip_deserialization = False def get_objects( self, object_refs: list, timeout: Optional[float] = None, return_exceptions: bool = False, skip_deserialization: bool = False, ): """Get the values in the object store associated with the IDs. Return the values from the local object store for object_refs. This will block until all the values for object_refs have been written to the local object store. Args: object_refs: A list of the object refs whose values should be retrieved. timeout: The maximum amount of time in seconds to wait before returning. return_exceptions: If any of the objects deserialize to an Exception object, whether to return them as values in the returned list. If False, then the first found exception will be raised. skip_deserialization: If true, only the buffer will be released and the object associated with the buffer will not be deserialized. Returns: list: List of deserialized objects or None if skip_deserialization is True. bytes: UUID of the debugger breakpoint we should drop into or b"" if there is no breakpoint. """ # Make sure that the values are object refs. for object_ref in object_refs: if not isinstance(object_ref, ObjectRef): raise TypeError( f"Attempting to call `get` on the value {object_ref}, " "which is not an ray.ObjectRef." ) timeout_ms = ( int(timeout * 1000) if timeout is not None and timeout != -1 else -1 ) data_metadata_pairs: List[ Tuple[ray._raylet.Buffer, bytes] ] = self.core_worker.get_objects( object_refs, timeout_ms, ) debugger_breakpoint = b"" for data, metadata in data_metadata_pairs: if metadata: metadata_fields = metadata.split(b",") if len(metadata_fields) >= 2 and metadata_fields[1].startswith( ray_constants.OBJE
Failed Test: tests/trainer/trainer_test.py::TestTrainerGSM8KWithSFT::test_trainer
tests/trainer/trainer_test.py::TestTrainerGSM8KWithSFT::test_trainer: The test failed in the call phase due to an exception - self = <tests.trainer.trainer_test.TestTrainerGSM8KWithSFT testMethod=test_trainer> def test_trainer(self): """Test GSM8K With SFT.""" # test both mode self.config.algorithm.algorithm_type = "grpo" self.config.algorithm.repeat_times = 4 self.config.algorithm.advantage_fn = "grpo" self.config.algorithm.advantage_fn_args = {} self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("gsm8k") self.config.buffer.trainer_input.sft_warmup_steps = 2 self.config.buffer.trainer_input.sft_warmup_dataset = get_unittest_dataset_config( "sft_for_gsm8k" ) self.config.check_and_update() self.config.trainer.trainer_config.trainer.total_training_steps = 4 self.config.trainer.trainer_config.trainer.max_actor_ckpt_to_keep = 2 self.config.trainer.trainer_config.actor_rollout_ref.actor.optim.lr = 1e-5 > both(self.config) tests/trainer/trainer_test.py:173: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ trinity/cli/launcher.py:105: in both raise e trinity/cli/launcher.py:89: in both train_continue, train_step_num = ray.get(ref_train) /usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper return fn(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper return func(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2849: in get values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <ray._private.worker.Worker object at 0x7fefe0d493f0> object_refs = [ObjectRef(c101104bb440cbdcf25651bcca66a7631abf655d1000000001000000)] timeout = None, return_exceptions = False, skip_deserialization = False def get_objects( self, object_refs: list, timeout: Optional[float] = None, return_exceptions: bool = False, skip_deserialization: bool = False, ): """Get the values in the object store associated with the IDs. Return the values from the local object store for object_refs. This will block until all the values for object_refs have been written to the local object store. Args: object_refs: A list of the object refs whose values should be retrieved. timeout: The maximum amount of time in seconds to wait before returning. return_exceptions: If any of the objects deserialize to an Exception object, whether to return them as values in the returned list. If False, then the first found exception will be raised. skip_deserialization: If true, only the buffer will be released and the object associated with the buffer will not be deserialized. Returns: list: List of deserialized objects or None if skip_deserialization is True. bytes: UUID of the debugger breakpoint we should drop into or b"" if there is no breakpoint. """ # Make sure that the values are object refs. for object_ref in object_refs: if not isinstance(object_ref, ObjectRef): raise TypeError( f"Attempting to call `get` on the value {object_ref}, " "which is not an ray.ObjectRef." ) timeout_ms = ( int(timeout * 1000) if timeout is not None and timeout != -1 else -1 ) data_metadata_pairs: List[ Tuple[ray._raylet.Buffer, bytes] ] = self.core_worker.get_objects( object_refs, timeout_ms, ) debugger_breakpoint = b"" for data, metadata in data_metadata_pairs:
Failed Test: tests/trainer/trainer_test.py::TestTrainerGSM8K::test_trainer
tests/trainer/trainer_test.py::TestTrainerGSM8K::test_trainer: The test failed in the call phase due to an exception - self = <tests.trainer.trainer_test.TestTrainerGSM8K testMethod=test_trainer> def test_trainer(self): """Test GSM8K.""" # test both mode self.config.algorithm.algorithm_type = "grpo" self.config.algorithm.repeat_times = 4 # self.config.algorithm.repeat_times = 8 # TODO: used for real testing self.config.algorithm.advantage_fn = "grpo" self.config.algorithm.advantage_fn_args = {} # self.config.buffer.batch_size = 96 # TODO: used for real testing self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("gsm8k") self.config.check_and_update() self.config.trainer.trainer_config.trainer.total_training_steps = 4 self.config.trainer.trainer_config.trainer.max_actor_ckpt_to_keep = 2 self.config.trainer.trainer_config.actor_rollout_ref.actor.optim.lr = 1e-5 > both(self.config) tests/trainer/trainer_test.py:133: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ trinity/cli/launcher.py:105: in both raise e trinity/cli/launcher.py:89: in both train_continue, train_step_num = ray.get(ref_train) /usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper return fn(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper return func(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2849: in get values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <ray._private.worker.Worker object at 0x7fefe0d493f0> object_refs = [ObjectRef(0d1a710d39e557a54cea9d88aab5bc8ec6ff24590f00000001000000)] timeout = None, return_exceptions = False, skip_deserialization = False def get_objects( self, object_refs: list, timeout: Optional[float] = None, return_exceptions: bool = False, skip_deserialization: bool = False, ): """Get the values in the object store associated with the IDs. Return the values from the local object store for object_refs. This will block until all the values for object_refs have been written to the local object store. Args: object_refs: A list of the object refs whose values should be retrieved. timeout: The maximum amount of time in seconds to wait before returning. return_exceptions: If any of the objects deserialize to an Exception object, whether to return them as values in the returned list. If False, then the first found exception will be raised. skip_deserialization: If true, only the buffer will be released and the object associated with the buffer will not be deserialized. Returns: list: List of deserialized objects or None if skip_deserialization is True. bytes: UUID of the debugger breakpoint we should drop into or b"" if there is no breakpoint. """ # Make sure that the values are object refs. for object_ref in object_refs: if not isinstance(object_ref, ObjectRef): raise TypeError( f"Attempting to call `get` on the value {object_ref}, " "which is not an ray.ObjectRef." ) timeout_ms = ( int(timeout * 1000) if timeout is not None and timeout != -1 else -1 ) data_metadata_pairs: List[ Tuple[ray._raylet.Buffer, bytes] ] = self.core_worker.get_objects( object_refs, timeout_ms, ) debugger_breakpoint = b"" for data, metadata in data_metadata_pairs: if metadata: metadata_fields = m
Failed Test: tests/trainer/trainer_test.py::TestTrainerCountdown::test_trainer
tests/trainer/trainer_test.py::TestTrainerCountdown::test_trainer: The test failed in the call phase due to an exception - self = <tests.trainer.trainer_test.TestTrainerCountdown testMethod=test_trainer> def test_trainer(self): """Test the both and bench mode.""" # test both mode self.config.buffer.explorer_input.taskset = get_unittest_dataset_config("countdown") self.config.buffer.explorer_input.eval_tasksets.append( get_unittest_dataset_config("countdown", "test") ) self.config.buffer.explorer_input.eval_tasksets.append( get_unittest_dataset_config("copy_countdown", "test") ) self.config.trainer.save_interval = 4 self.config.check_and_update() self.config.trainer.trainer_config.trainer.max_actor_ckpt_to_keep = 2 self.config.trainer.trainer_config.trainer.max_critic_ckpt_to_keep = 2 > both(self.config) tests/trainer/trainer_test.py:59: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ trinity/cli/launcher.py:105: in both raise e trinity/cli/launcher.py:89: in both train_continue, train_step_num = ray.get(ref_train) /usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py:22: in auto_init_wrapper return fn(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py:104: in wrapper return func(*args, **kwargs) /usr/local/lib/python3.10/dist-packages/ray/_private/worker.py:2849: in get values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <ray._private.worker.Worker object at 0x7fefe0d493f0> object_refs = [ObjectRef(90bed3839ab1a68ed3fd85d91d27eb260c8655010e00000001000000)] timeout = None, return_exceptions = False, skip_deserialization = False def get_objects( self, object_refs: list, timeout: Optional[float] = None, return_exceptions: bool = False, skip_deserialization: bool = False, ): """Get the values in the object store associated with the IDs. Return the values from the local object store for object_refs. This will block until all the values for object_refs have been written to the local object store. Args: object_refs: A list of the object refs whose values should be retrieved. timeout: The maximum amount of time in seconds to wait before returning. return_exceptions: If any of the objects deserialize to an Exception object, whether to return them as values in the returned list. If False, then the first found exception will be raised. skip_deserialization: If true, only the buffer will be released and the object associated with the buffer will not be deserialized. Returns: list: List of deserialized objects or None if skip_deserialization is True. bytes: UUID of the debugger breakpoint we should drop into or b"" if there is no breakpoint. """ # Make sure that the values are object refs. for object_ref in object_refs: if not isinstance(object_ref, ObjectRef): raise TypeError( f"Attempting to call `get` on the value {object_ref}, " "which is not an ray.ObjectRef." ) timeout_ms = ( int(timeout * 1000) if timeout is not None and timeout != -1 else -1 ) data_metadata_pairs: List[ Tuple[ray._raylet.Buffer, bytes] ] = self.core_worker.get_objects( object_refs, timeout_ms, ) debugger_breakpoint = b"" for data, metadata in data_metadata_pairs: if metadata: metadata_fields = metadata.split(b",") if len(metadata_fields) >= 2 and metadata_fields[1].star
Failed Test: tests/common/config_test.py::TestConfig::test_all_examples_are_valid
tests/common/config_test.py::TestConfig::test_all_examples_are_valid: The test failed in the call phase due to an exception - config_path = '/workspace/tests/common/../../examples/mix_math/mix_math.yaml' def load_config(config_path: str) -> Config: """Load the configuration from the given path.""" # TODO: add test schema = OmegaConf.structured(Config) yaml_config = OmegaConf.load(config_path) try: > config = OmegaConf.merge(schema, yaml_config) trinity/common/config.py:647: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /usr/local/lib/python3.10/dist-packages/omegaconf/omegaconf.py:273: in merge target.merge_with(*configs[1:]) /usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:492: in merge_with self._format_and_raise(key=None, value=None, cause=e) /usr/local/lib/python3.10/dist-packages/omegaconf/base.py:231: in _format_and_raise format_and_raise( /usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:819: in format_and_raise _raise(ex, cause) /usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:797: in _raise raise ex.with_traceback(sys.exc_info()[2]) # set env var OC_CAUSE=1 for full trace /usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:490: in merge_with self._merge_with(*others) /usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:514: in _merge_with BaseContainer._map_merge(self, other) /usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:399: in _map_merge dest_node._merge_with(src_node) /usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:514: in _merge_with BaseContainer._map_merge(self, other) /usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:432: in _map_merge dest[key] = src._get_node(key) /usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:310: in __setitem__ self._format_and_raise( /usr/local/lib/python3.10/dist-packages/omegaconf/base.py:231: in _format_and_raise format_and_raise( /usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:819: in format_and_raise _raise(ex, cause) /usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:797: in _raise raise ex.with_traceback(sys.exc_info()[2]) # set env var OC_CAUSE=1 for full trace /usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:308: in __setitem__ self.__set_impl(key=key, value=value) /usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:318: in __set_impl self._set_item_impl(key, value) /usr/local/lib/python3.10/dist-packages/omegaconf/basecontainer.py:545: in _set_item_impl self._validate_set(key, value) /usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:180: in _validate_set target = self._get_node(key) if key is not None else self /usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:475: in _get_node self._validate_get(key) /usr/local/lib/python3.10/dist-packages/omegaconf/dictconfig.py:164: in _validate_get self._format_and_raise( /usr/local/lib/python3.10/dist-packages/omegaconf/base.py:231: in _format_and_raise format_and_raise( /usr/local/lib/python3.10/dist-packages/omegaconf/_utils.py:899: in format_and_raise _raise(ex, cause) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ex = ConfigAttributeError("Key 'explore_batch_size' not in 'BufferConfig'\n full_key: buffer.explore_batch_size\n reference_type=BufferConfig\n object_type=BufferConfig") cause = ConfigAttributeError("Key 'explore_batch_size' not in 'BufferConfig'") def _raise(ex: Exception, cause: Exception) -> None: # Set the environment variable OC_CAUSE=1 to get a stacktrace that includes the # causing exception. env_var = os.environ["OC_CAUSE"] if "OC_CAUSE" in os.environ else None debugging = sys.gettrace() is not None full_backtrace = (debugging and not env_var == "0") or (env_var == "1") if full_backtrace: ex.__cause__ = cause
unittest
Process completed with exit code 1.