1818def dummy_config ():
1919 return yaml .safe_load (
2020 """
21- hidden_units: 128
21+ hidden_units: 32
2222 learning_rate: 3.0e-4
23- num_layers: 2
23+ num_layers: 1
2424 use_recurrent: false
2525 sequence_length: 32
2626 memory_size: 32
@@ -32,8 +32,8 @@ def dummy_config():
3232 )
3333
3434
35- @ mock . patch ( "mlagents.envs.UnityEnvironment" )
36- def test_bc_trainer ( mock_env , dummy_config ):
35+ def create_bc_trainer ( dummy_config ):
36+ mock_env = mock . Mock ()
3737 mock_brain = mb .create_mock_3dball_brain ()
3838 mock_braininfo = mb .create_mock_braininfo (num_agents = 12 , num_vector_observations = 8 )
3939 mb .setup_mock_unityenvironment (mock_env , mock_brain , mock_braininfo )
@@ -49,12 +49,54 @@ def test_bc_trainer(mock_env, dummy_config):
4949 mock_brain , trainer_parameters , training = True , load = False , seed = 0 , run_id = 0
5050 )
5151 trainer .demonstration_buffer = mb .simulate_rollout (env , trainer .policy , 100 )
52+ return trainer , env
53+
54+
55+ def test_bc_trainer_step (dummy_config ):
56+ trainer , env = create_bc_trainer (dummy_config )
57+ # Test get_step
58+ assert trainer .get_step == 0
59+ # Test update policy
5260 trainer .update_policy ()
5361 assert len (trainer .stats ["Losses/Cloning Loss" ]) > 0
62+ # Test increment step
5463 trainer .increment_step (1 )
5564 assert trainer .step == 1
5665
5766
67+ def test_bc_trainer_add_proc_experiences (dummy_config ):
68+ trainer , env = create_bc_trainer (dummy_config )
69+ # Test add_experiences
70+ returned_braininfo = env .step ()
71+ trainer .add_experiences (
72+ returned_braininfo , returned_braininfo , {}
73+ ) # Take action outputs is not used
74+ for agent_id in returned_braininfo ["Ball3DBrain" ].agents :
75+ assert trainer .evaluation_buffer [agent_id ].last_brain_info is not None
76+ assert trainer .episode_steps [agent_id ] > 0
77+ assert trainer .cumulative_rewards [agent_id ] > 0
78+ # Test process_experiences by setting done
79+ returned_braininfo ["Ball3DBrain" ].local_done = 12 * [True ]
80+ trainer .process_experiences (returned_braininfo , returned_braininfo )
81+ for agent_id in returned_braininfo ["Ball3DBrain" ].agents :
82+ assert trainer .episode_steps [agent_id ] == 0
83+ assert trainer .cumulative_rewards [agent_id ] == 0
84+
85+
86+ def test_bc_trainer_end_episode (dummy_config ):
87+ trainer , env = create_bc_trainer (dummy_config )
88+ returned_braininfo = env .step ()
89+ trainer .add_experiences (
90+ returned_braininfo , returned_braininfo , {}
91+ ) # Take action outputs is not used
92+ trainer .process_experiences (returned_braininfo , returned_braininfo )
93+ # Should set everything to 0
94+ trainer .end_episode ()
95+ for agent_id in returned_braininfo ["Ball3DBrain" ].agents :
96+ assert trainer .episode_steps [agent_id ] == 0
97+ assert trainer .cumulative_rewards [agent_id ] == 0
98+
99+
58100@mock .patch ("mlagents.envs.UnityEnvironment.executable_launcher" )
59101@mock .patch ("mlagents.envs.UnityEnvironment.get_communicator" )
60102def test_bc_policy_evaluate (mock_communicator , mock_launcher , dummy_config ):
0 commit comments