ISLa-RL-Playground/config.yml at main · CrazyBitDev/ISLa-RL-Playground · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
use_wandb: True
wandb_config:
  project: RL-playground
  entity: matteoingusci01
  tag: VR506254

DRL_methods:

  - name: SAC
    parameters:
      hidden_layers_actor: 1                  # The number of hidden layers of the actor neural network
      hidden_layers_critic: 1                 # The number of hidden layers of the critic neural network
      nodes_hidden_layers_actor: 256          # The number of neurons in hidden layers of the neural network
      nodes_hidden_layers_critic: 256         # The number of neurons in hidden layers of the neural network
      lr_actor_optimizer: 0.001               # learning rate optimizer policy pi
      lr_critic_optimizer: 0.001              # learning rate optimizer critic function (Q function)
      lr_temperature_optimizer: 0.001         # learning rate temperature (log entropy coefficient)
      tau: 0.005                              # polyak averaging in soft-update of parameters
      gamma: 0.99                             # Discount factor
      epsilon_decay: 0.9999                   # Decay factor for epsilon
      min_epsilon: 0.01                       # Minimum value for epsilon

    gym_environment: LunarLanderContinuous
    memory_size: 100_000                      # The size of the replay buffer
    batch_size: 32                            # The number of samples to take from the replay buffer
    tot_episodes: 500                         # The number of episodes to run the agent
    seeds_to_test: [0,1,2]

  - name: SAC
    parameters:
      hidden_layers_actor: 1                  # The number of hidden layers of the actor neural network
      hidden_layers_critic: 1                 # The number of hidden layers of the critic neural network
      nodes_hidden_layers_actor: 256          # The number of neurons in hidden layers of the neural network
      nodes_hidden_layers_critic: 256         # The number of neurons in hidden layers of the neural network
      lr_actor_optimizer: 0.001               # learning rate optimizer policy pi
      lr_critic_optimizer: 0.001              # learning rate optimizer critic function (Q function)
      lr_temperature_optimizer: 0.001         # learning rate temperature (log entropy coefficient)
      tau: 0.005                              # polyak averaging in soft-update of parameters
      gamma: 0.99                             # Discount factor
      epsilon_decay: 0.9999                   # Decay factor for epsilon
      min_epsilon: 0.01                       # Minimum value for epsilon

    gym_environment: TB3
    memory_size: 100_000                      # The size of the replay buffer
    batch_size: 32                            # The number of samples to take from the replay buffer
    tot_episodes: 300                         # The number of episodes to run the agent
    seeds_to_test: [0,1,2]

  - name: DDPG
    parameters:
      hidden_layers_actor: 1                  # The number of hidden layers of the actor neural network
      hidden_layers_critic: 1                 # The number of hidden layers of the critic neural network
      nodes_hidden_layers_actor: 256          # The number of neurons in hidden layers of the neural network
      nodes_hidden_layers_critic: 256         # The number of neurons in hidden layers of the neural network
      lr_actor_optimizer: 0.0002              # learning rate optimizer policy pi
      lr_critic_optimizer: 0.0003             # learning rate optimizer critic function (Q function)
      tau: 0.005                              # polyak averaging in soft-update of parameters
      gamma: 0.99                             # Discount factor
      epsilon_decay: 0.9999                   # Decay factor for epsilon
      min_epsilon: 0.01                       # Minimum value for epsilon
      noise_std: 0.1                          # Standard deviation of the noise added to the actions

    gym_environment: LunarLanderContinuous
    memory_size: 50_000                       # The size of the replay buffer
    batch_size: 64                            # The number of samples to take from the replay buffer
    tot_episodes: 1000                        # The number of episodes to run the agent
    seeds_to_test: [0,1,2]