Skip to content

Commit 642268a

Browse files
committed
Adding config keys required by imitation learning code updates
1 parent d244076 commit 642268a

21 files changed

+78
-20
lines changed

aintelope/config/config_experiment.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,14 @@ hparams:
3838
warm_start_steps: 0 # not used at the moment, except for the unit tests
3939
save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10} # how often to save a model
4040
use_separate_models_for_each_experiment: True
41-
agent_params: {}
41+
agent_params:
42+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4243
model_params:
4344
use_weight_sharing: False
4445
eps_start: 0.66
4546
eps_end: 0.0
47+
instinct_bias_epsilon_start: 0.0
48+
instinct_bias_epsilon_end: 0.0
4649
eps_last_pipeline_cycle: -1
4750
eps_last_episode: -1 # use -1 when episode counting for eps is disabled
4851
eps_last_env_layout_seed: -1 # 10 # use -1 when trials counting for eps is disabled

aintelope/config/config_experiment_exampleagent.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ hparams:
3636
agent_class: example_agent
3737
save_frequency: 0 # how often to save a model. 0 means that the model is saved only at the end, improving training performance
3838
use_separate_models_for_each_experiment: True
39-
agent_params: {}
39+
agent_params:
40+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4041
model_params:
4142
use_weight_sharing: False
4243
env_layout_seed_repeat_sequence_length: -1 # 10

aintelope/config/config_experiment_handwritten_rules.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ hparams:
4242
use_weight_sharing: False
4343
eps_start: 0.0
4444
eps_end: 0.0
45+
instinct_bias_epsilon_start: 0.0
46+
instinct_bias_epsilon_end: 0.0
4547
eps_last_pipeline_cycle: -1
4648
eps_last_episode: -1 # use -1 when episode counting for eps is disabled
4749
eps_last_env_layout_seed: -1 # 10 # use -1 when trials counting for eps is disabled

aintelope/config/config_experiment_llm_agent.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,14 @@ hparams:
3636
agent_class: llm_agent
3737
save_frequency: 0 # how often to save a model. 0 means that the model is saved only at the end, improving training performance
3838
use_separate_models_for_each_experiment: True
39-
agent_params: {}
39+
agent_params:
40+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4041
model_params:
4142
use_weight_sharing: False
4243
eps_start: 0.5
4344
eps_end: 0.0
45+
instinct_bias_epsilon_start: 0.0
46+
instinct_bias_epsilon_end: 0.0
4447
eps_last_pipeline_cycle: -1
4548
eps_last_episode: ${hparams.num_episodes} # use -1 when episode counting for eps is disabled
4649
eps_last_env_layout_seed: -1 # 10 # use -1 when trials counting for eps is disabled

aintelope/config/config_experiment_random.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ hparams:
3636
agent_class: random_agent
3737
save_frequency: 0 # how often to save a model. 0 means that the model is saved only at the end, improving training performance
3838
use_separate_models_for_each_experiment: True
39-
agent_params: {}
39+
agent_params:
40+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4041
model_params:
4142
use_weight_sharing: False
4243
env_layout_seed_repeat_sequence_length: -1 # 10

aintelope/config/config_experiment_score_a2c.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,15 @@ hparams:
3737
agent_class: sb3_a2c_agent
3838
save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10} # how often to save a model. 0 means that the model is saved only at the end, improving training performance
3939
use_separate_models_for_each_experiment: True
40-
agent_params: {}
40+
agent_params:
41+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4142
model_params: # TODO
4243
use_weight_sharing: False
4344
num_conv_layers: 0
4445
eps_start: 0.0
4546
eps_end: 0.0
47+
instinct_bias_epsilon_start: 0.0
48+
instinct_bias_epsilon_end: 0.0
4649
eps_last_pipeline_cycle: -1
4750
eps_last_episode: ${hparams.num_episodes} # use -1 when episode counting for eps is disabled
4851
eps_last_env_layout_seed: -1 # 10 # use -1 when trials counting for eps is disabled

aintelope/config/config_experiment_score_a2c_cnn2.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,15 @@ hparams:
3737
agent_class: sb3_a2c_agent
3838
save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10} # how often to save a model. 0 means that the model is saved only at the end, improving training performance
3939
use_separate_models_for_each_experiment: True
40-
agent_params: {}
40+
agent_params:
41+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4142
model_params: # TODO
4243
use_weight_sharing: False
4344
num_conv_layers: 2
4445
eps_start: 0.0
4546
eps_end: 0.0
47+
instinct_bias_epsilon_start: 0.0
48+
instinct_bias_epsilon_end: 0.0
4649
eps_last_pipeline_cycle: -1
4750
eps_last_episode: ${hparams.num_episodes} # use -1 when episode counting for eps is disabled
4851
eps_last_env_layout_seed: -1 # 10 # use -1 when trials counting for eps is disabled

aintelope/config/config_experiment_score_a2c_cnn3.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,15 @@ hparams:
3737
agent_class: sb3_a2c_agent
3838
save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10} # how often to save a model. 0 means that the model is saved only at the end, improving training performance
3939
use_separate_models_for_each_experiment: True
40-
agent_params: {}
40+
agent_params:
41+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4142
model_params: # TODO
4243
use_weight_sharing: False
4344
num_conv_layers: 3
4445
eps_start: 0.0
4546
eps_end: 0.0
47+
instinct_bias_epsilon_start: 0.0
48+
instinct_bias_epsilon_end: 0.0
4649
eps_last_pipeline_cycle: -1
4750
eps_last_episode: ${hparams.num_episodes} # use -1 when episode counting for eps is disabled
4851
eps_last_env_layout_seed: -1 # 10 # use -1 when trials counting for eps is disabled

aintelope/config/config_experiment_score_a2c_mlp.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,15 @@ hparams:
3737
agent_class: sb3_a2c_agent
3838
save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10} # how often to save a model. 0 means that the model is saved only at the end, improving training performance
3939
use_separate_models_for_each_experiment: True
40-
agent_params: {}
40+
agent_params:
41+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4142
model_params: # TODO
4243
use_weight_sharing: False
4344
num_conv_layers: 0
4445
eps_start: 0.0
4546
eps_end: 0.0
47+
instinct_bias_epsilon_start: 0.0
48+
instinct_bias_epsilon_end: 0.0
4649
eps_last_pipeline_cycle: -1
4750
eps_last_episode: ${hparams.num_episodes} # use -1 when episode counting for eps is disabled
4851
eps_last_env_layout_seed: -1 # 10 # use -1 when trials counting for eps is disabled

aintelope/config/config_experiment_score_dqn.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,15 @@ hparams:
3737
agent_class: sb3_dqn_agent
3838
save_frequency: ${muldiv:${hparams.num_episodes},${hparams.env_params.num_iters},10} # how often to save a model. 0 means that the model is saved only at the end, improving training performance
3939
use_separate_models_for_each_experiment: True
40-
agent_params: {}
40+
agent_params:
41+
target_handwritten_rules: [food,drink,gold,silver,danger,predator,collision,antimovement]
4142
model_params: # TODO
4243
use_weight_sharing: False
4344
num_conv_layers: 0
4445
eps_start: 0.0
4546
eps_end: 0.0
47+
instinct_bias_epsilon_start: 0.0
48+
instinct_bias_epsilon_end: 0.0
4649
eps_last_pipeline_cycle: -1
4750
eps_last_episode: ${hparams.num_episodes} # use -1 when episode counting for eps is disabled
4851
eps_last_env_layout_seed: -1 # 10 # use -1 when trials counting for eps is disabled

0 commit comments

Comments
 (0)