Adding config keys required by imitation learning code updates

levitation · levitation · commit 642268af4b18 · 2025-10-07T02:53:15.000+03:00
diff --git a/aintelope/config/config_experiment.yaml b/aintelope/config/config_experiment.yaml
@@ -38,11 +38,14 @@ hparams:
   warm_start_steps: 0       # not used at the moment, except for the unit tests
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10} # how often to save a model
   use_separate_models_for_each_experiment: True
-  agent_params: {}
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]    
   model_params:
     use_weight_sharing: False
     eps_start: 0.66
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: -1       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_exampleagent.yaml b/aintelope/config/config_experiment_exampleagent.yaml
@@ -36,7 +36,8 @@ hparams:
   agent_class: example_agent
   save_frequency: 0 # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]    
   model_params:
     use_weight_sharing: False
   env_layout_seed_repeat_sequence_length: -1 # 10
diff --git a/aintelope/config/config_experiment_handwritten_rules.yaml b/aintelope/config/config_experiment_handwritten_rules.yaml
@@ -42,6 +42,8 @@ hparams:
     use_weight_sharing: False
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: -1       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_llm_agent.yaml b/aintelope/config/config_experiment_llm_agent.yaml
@@ -36,11 +36,14 @@ hparams:
   agent_class: llm_agent
   save_frequency: 0 # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]    
   model_params:
     use_weight_sharing: False
     eps_start: 0.5
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_random.yaml b/aintelope/config/config_experiment_random.yaml
@@ -36,7 +36,8 @@ hparams:
   agent_class: random_agent
   save_frequency: 0 # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]    
   model_params:
     use_weight_sharing: False
   env_layout_seed_repeat_sequence_length: -1 # 10
diff --git a/aintelope/config/config_experiment_score_a2c.yaml b/aintelope/config/config_experiment_score_a2c.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_a2c_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}    
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]        
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 0
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_a2c_cnn2.yaml b/aintelope/config/config_experiment_score_a2c_cnn2.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_a2c_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}    
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]        
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 2
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_a2c_cnn3.yaml b/aintelope/config/config_experiment_score_a2c_cnn3.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_a2c_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}    
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]        
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 3
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_a2c_mlp.yaml b/aintelope/config/config_experiment_score_a2c_mlp.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_a2c_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}    
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]        
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 0
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_dqn.yaml b/aintelope/config/config_experiment_score_dqn.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_dqn_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 0
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_dqn_cnn2.yaml b/aintelope/config/config_experiment_score_dqn_cnn2.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_dqn_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 2
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_dqn_cnn3.yaml b/aintelope/config/config_experiment_score_dqn_cnn3.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_dqn_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 3
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_dqn_mlp.yaml b/aintelope/config/config_experiment_score_dqn_mlp.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_dqn_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 0
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_ppo.yaml b/aintelope/config/config_experiment_score_ppo.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_ppo_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 0
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_ppo_cnn2.yaml b/aintelope/config/config_experiment_score_ppo_cnn2.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_ppo_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]    
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 2
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_ppo_cnn2_weightsharing.yaml b/aintelope/config/config_experiment_score_ppo_cnn2_weightsharing.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_ppo_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: True
     num_conv_layers: 2
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_ppo_cnn3.yaml b/aintelope/config/config_experiment_score_ppo_cnn3.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_ppo_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 3
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_ppo_cnn3_weightsharing.yaml b/aintelope/config/config_experiment_score_ppo_cnn3_weightsharing.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_ppo_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: True
     num_conv_layers: 3
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_ppo_mlp.yaml b/aintelope/config/config_experiment_score_ppo_mlp.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_ppo_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: False
     num_conv_layers: 0
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_ppo_mlp_weightsharing.yaml b/aintelope/config/config_experiment_score_ppo_mlp_weightsharing.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_ppo_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: True
     num_conv_layers: 0
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled
diff --git a/aintelope/config/config_experiment_score_ppo_weightsharing.yaml b/aintelope/config/config_experiment_score_ppo_weightsharing.yaml
@@ -37,12 +37,15 @@ hparams:
   agent_class: sb3_ppo_agent 
   save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10}  # how often to save a model. 0 means that the model is saved only at the end, improving training performance
   use_separate_models_for_each_experiment: True
-  agent_params: {}     
+  agent_params:
+    target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]         
   model_params:  # TODO
     use_weight_sharing: True
     num_conv_layers: 0
     eps_start: 0.0
     eps_end: 0.0
+    instinct_bias_epsilon_start: 0.0
+    instinct_bias_epsilon_end: 0.0
     eps_last_pipeline_cycle: -1
     eps_last_episode: ${hparams.num_episodes}       # use -1 when episode counting for eps is disabled
     eps_last_env_layout_seed: -1  # 10       # use -1 when trials counting for eps is disabled