Skip to content

Commit 2c02f06

Browse files
authored
Merge pull request #129 from kengz/schedule
Solution: Pendulum-v0 with DDPG LinearNoisePolicy
2 parents 14e9fcd + 649f1a7 commit 2c02f06

File tree

3 files changed

+266
-17
lines changed

3 files changed

+266
-17
lines changed

rl/memory/prioritized_exp_replay.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(self, env_spec, max_mem_len=10000, e=0.01, alpha=0.6,
2929

3030
# bump to account for negative terms in reward get_priority
3131
# and we cannot abs(reward) cuz it's sign sensitive
32-
SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD']
32+
SOLVED_MEAN_REWARD = self.env_spec['problem']['SOLVED_MEAN_REWARD'] or 10000
3333
self.min_priority = abs(10 * SOLVED_MEAN_REWARD)
3434

3535
def get_priority(self, error):

rl/spec/box2d_experiment_specs.json

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,12 +257,74 @@
257257
"hidden_layers_activation": "sigmoid"
258258
},
259259
"param_range": {
260-
"lr": [0.005, 0.01, 0.05, 0.1],
261-
"gamma": [0.97, 0.99, 0.999],
260+
"lr": [0.001, 0.005, 0.01],
261+
"gamma": [0.99, 0.999],
262262
"hidden_layers": [
263-
[400, 200],
263+
[400, 300],
264264
[800, 400],
265-
[400, 200, 100]
265+
[800, 600]
266+
]
267+
}
268+
},
269+
"lunar_ddpg_linearnoise": {
270+
"problem": "LunarLanderContinuous-v2",
271+
"Agent": "DDPG",
272+
"HyperOptimizer": "GridSearch",
273+
"Memory": "LinearMemoryWithForgetting",
274+
"Optimizer": "AdamOptimizer",
275+
"Policy": "LinearNoisePolicy",
276+
"PreProcessor": "NoPreProcessor",
277+
"param": {
278+
"batch_size": 64,
279+
"n_epoch": 1,
280+
"tau": 0.005,
281+
"lr": 0.001,
282+
"critic_lr": 0.001,
283+
"exploration_anneal_episodes": 100,
284+
"gamma": 0.97,
285+
"hidden_layers": [400, 300],
286+
"hidden_layers_activation": "relu",
287+
"output_layer_activation": "tanh"
288+
},
289+
"param_range": {
290+
"lr": [0.0001, 0.0005, 0.001],
291+
"critic_lr": [0.001, 0.005, 0.01],
292+
"gamma": [0.97, 0.99, 0.999],
293+
"hidden_layers": [
294+
[400, 300],
295+
[600, 300],
296+
[800, 400, 200]
297+
]
298+
}
299+
},
300+
"walker_ddpg_linearnoise": {
301+
"problem": "BipedalWalker-v2",
302+
"Agent": "DDPG",
303+
"HyperOptimizer": "GridSearch",
304+
"Memory": "LinearMemoryWithForgetting",
305+
"Optimizer": "AdamOptimizer",
306+
"Policy": "LinearNoisePolicy",
307+
"PreProcessor": "NoPreProcessor",
308+
"param": {
309+
"batch_size": 64,
310+
"n_epoch": 1,
311+
"tau": 0.005,
312+
"lr": 0.001,
313+
"critic_lr": 0.001,
314+
"exploration_anneal_episodes": 100,
315+
"gamma": 0.97,
316+
"hidden_layers": [400, 300],
317+
"hidden_layers_activation": "relu",
318+
"output_layer_activation": "tanh"
319+
},
320+
"param_range": {
321+
"lr": [0.0001, 0.0005, 0.001],
322+
"critic_lr": [0.001, 0.005, 0.01],
323+
"gamma": [0.97, 0.99, 0.999],
324+
"hidden_layers": [
325+
[400, 300],
326+
[600, 300],
327+
[800, 400, 200]
266328
]
267329
}
268330
}

rl/spec/classic_experiment_specs.json

Lines changed: 199 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,195 @@
826826
]
827827
}
828828
},
829+
"pendulum_ddpg_nonoise": {
830+
"problem": "Pendulum-v0",
831+
"Agent": "DDPG",
832+
"HyperOptimizer": "GridSearch",
833+
"Memory": "LinearMemoryWithForgetting",
834+
"Optimizer": "AdamOptimizer",
835+
"Policy": "NoNoisePolicy",
836+
"PreProcessor": "NoPreProcessor",
837+
"param": {
838+
"batch_size": 64,
839+
"n_epoch": 1,
840+
"tau": 0.005,
841+
"lr": 0.001,
842+
"critic_lr": 0.001,
843+
"exploration_anneal_episodes": 50,
844+
"gamma": 0.97,
845+
"hidden_layers": [400, 300],
846+
"hidden_layers_activation": "relu",
847+
"output_layer_activation": "tanh"
848+
},
849+
"param_range": {
850+
"lr": [0.0001, 0.0005, 0.001],
851+
"critic_lr": [0.001, 0.005],
852+
"gamma": [0.95, 0.97, 0.99],
853+
"hidden_layers": [
854+
[400, 300],
855+
[800, 400, 200],
856+
[800, 600, 400, 200]
857+
]
858+
}
859+
},
860+
"pendulum_ddpg_per": {
861+
"problem": "Pendulum-v0",
862+
"Agent": "DDPG",
863+
"HyperOptimizer": "GridSearch",
864+
"Memory": "PrioritizedExperienceReplay",
865+
"Optimizer": "AdamOptimizer",
866+
"Policy": "NoNoisePolicy",
867+
"PreProcessor": "NoPreProcessor",
868+
"param": {
869+
"batch_size": 64,
870+
"n_epoch": 1,
871+
"tau": 0.001,
872+
"lr": 0.001,
873+
"critic_lr": 0.001,
874+
"exploration_anneal_episodes": 50,
875+
"gamma": 0.97,
876+
"hidden_layers": [400, 300],
877+
"hidden_layers_activation": "relu",
878+
"output_layer_activation": "tanh",
879+
"max_mem_len": 30000
880+
},
881+
"param_range": {
882+
"lr": [0.0001, 0.0005, 0.001],
883+
"critic_lr": [0.001, 0.005],
884+
"gamma": [0.95, 0.97, 0.99],
885+
"hidden_layers": [
886+
[400, 300],
887+
[800, 400, 200],
888+
[800, 600, 400, 200]
889+
]
890+
}
891+
},
892+
"pendulum_ddpg_per_linearnoise": {
893+
"problem": "Pendulum-v0",
894+
"Agent": "DDPG",
895+
"HyperOptimizer": "GridSearch",
896+
"Memory": "PrioritizedExperienceReplay",
897+
"Optimizer": "AdamOptimizer",
898+
"Policy": "LinearNoisePolicy",
899+
"PreProcessor": "NoPreProcessor",
900+
"param": {
901+
"batch_size": 64,
902+
"n_epoch": 1,
903+
"tau": 0.005,
904+
"lr": 0.001,
905+
"critic_lr": 0.001,
906+
"exploration_anneal_episodes": 100,
907+
"gamma": 0.97,
908+
"hidden_layers": [400, 300],
909+
"hidden_layers_activation": "relu",
910+
"output_layer_activation": "tanh",
911+
"max_mem_len": 30000
912+
},
913+
"param_range": {
914+
"lr": [0.0001, 0.0005],
915+
"critic_lr": [0.001, 0.005],
916+
"gamma": [0.95, 0.97, 0.99],
917+
"hidden_layers": [
918+
[200, 100],
919+
[400, 200],
920+
[400, 300],
921+
[800, 400]
922+
]
923+
}
924+
},
925+
"pendulum_ddpg_linearnoise": {
926+
"problem": "Pendulum-v0",
927+
"Agent": "DDPG",
928+
"HyperOptimizer": "GridSearch",
929+
"Memory": "LinearMemoryWithForgetting",
930+
"Optimizer": "AdamOptimizer",
931+
"Policy": "LinearNoisePolicy",
932+
"PreProcessor": "NoPreProcessor",
933+
"param": {
934+
"batch_size": 64,
935+
"n_epoch": 1,
936+
"tau": 0.005,
937+
"lr": 0.0001,
938+
"critic_lr": 0.005,
939+
"exploration_anneal_episodes": 50,
940+
"gamma": 0.97,
941+
"hidden_layers": [400, 300],
942+
"hidden_layers_activation": "relu",
943+
"output_layer_activation": "tanh"
944+
},
945+
"param_range": {
946+
"lr": [0.0001, 0.0005, 0.001],
947+
"critic_lr": [0.001, 0.005],
948+
"gamma": [0.95, 0.97, 0.99],
949+
"hidden_layers": [
950+
[400, 300],
951+
[800, 400, 200],
952+
[800, 600, 400, 200]
953+
]
954+
}
955+
},
956+
"pendulum_ddpg_ounoise": {
957+
"problem": "Pendulum-v0",
958+
"Agent": "DDPG",
959+
"HyperOptimizer": "GridSearch",
960+
"Memory": "LinearMemoryWithForgetting",
961+
"Optimizer": "AdamOptimizer",
962+
"Policy": "NoNoisePolicy",
963+
"PreProcessor": "NoPreProcessor",
964+
"param": {
965+
"batch_size": 64,
966+
"n_epoch": 1,
967+
"tau": 0.005,
968+
"lr": 0.001,
969+
"critic_lr": 0.001,
970+
"exploration_anneal_episodes": 50,
971+
"gamma": 0.97,
972+
"hidden_layers": [400, 300],
973+
"hidden_layers_activation": "relu",
974+
"output_layer_activation": "tanh"
975+
},
976+
"param_range": {
977+
"lr": [0.0001, 0.0005, 0.001],
978+
"critic_lr": [0.001, 0.005],
979+
"gamma": [0.95, 0.97, 0.99],
980+
"hidden_layers": [
981+
[400, 300],
982+
[800, 400, 200],
983+
[800, 600, 400, 200]
984+
]
985+
}
986+
},
987+
"pendulum_ddpg_gaussiannoise": {
988+
"problem": "Pendulum-v0",
989+
"Agent": "DDPG",
990+
"HyperOptimizer": "GridSearch",
991+
"Memory": "LinearMemoryWithForgetting",
992+
"Optimizer": "AdamOptimizer",
993+
"Policy": "GaussianWhiteNoisePolicy",
994+
"PreProcessor": "NoPreProcessor",
995+
"param": {
996+
"batch_size": 64,
997+
"n_epoch": 1,
998+
"tau": 0.005,
999+
"lr": 0.001,
1000+
"critic_lr": 0.001,
1001+
"exploration_anneal_episodes": 50,
1002+
"gamma": 0.97,
1003+
"hidden_layers": [400, 300],
1004+
"hidden_layers_activation": "relu",
1005+
"output_layer_activation": "tanh"
1006+
},
1007+
"param_range": {
1008+
"lr": [0.0001, 0.0005, 0.001],
1009+
"critic_lr": [0.001, 0.005],
1010+
"gamma": [0.95, 0.97, 0.99],
1011+
"hidden_layers": [
1012+
[400, 300],
1013+
[800, 400, 200],
1014+
[800, 600, 400, 200]
1015+
]
1016+
}
1017+
},
8291018
"mountain_dqn": {
8301019
"problem": "MountainCar-v0",
8311020
"Agent": "DQN",
@@ -880,13 +1069,13 @@
8801069
]
8811070
}
8821071
},
883-
"mountain_double_dqn_per": {
1072+
"mountain_dqn_per": {
8841073
"problem": "MountainCar-v0",
885-
"Agent": "DoubleDQN",
1074+
"Agent": "DQN",
8861075
"HyperOptimizer": "GridSearch",
8871076
"Memory": "PrioritizedExperienceReplay",
8881077
"Optimizer": "AdamOptimizer",
889-
"Policy": "DoubleDQNBoltzmannPolicy",
1078+
"Policy": "BoltzmannPolicy",
8901079
"PreProcessor": "NoPreProcessor",
8911080
"param": {
8921081
"batch_size": 32,
@@ -895,17 +1084,16 @@
8951084
"hidden_layers": [128, 64],
8961085
"hidden_layers_activation": "sigmoid",
8971086
"output_layer_activation": "linear",
898-
"exploration_anneal_episodes": 200,
899-
"epi_change_lr": 500,
900-
"max_mem_len": 20000
1087+
"exploration_anneal_episodes": 50,
1088+
"epi_change_lr": 150,
1089+
"max_mem_len": 30000
9011090
},
9021091
"param_range": {
903-
"lr": [0.01, 0.02, 0.03, 0.05],
1092+
"lr": [0.005, 0.01, 0.02, 0.05],
9041093
"gamma": [0.99, 0.999],
9051094
"hidden_layers": [
9061095
[400],
907-
[800],
908-
[400, 200]
1096+
[800]
9091097
]
9101098
}
9111099
},
@@ -951,12 +1139,11 @@
9511139
"hidden_layers_activation": "sigmoid"
9521140
},
9531141
"param_range": {
954-
"lr": [0.005, 0.01, 0.05, 0.1],
1142+
"lr": [0.001, 0.005, 0.01],
9551143
"gamma": [0.97, 0.99, 0.999],
9561144
"hidden_layers": [
9571145
[200],
958-
[400, 200],
959-
[400, 200, 100]
1146+
[400]
9601147
]
9611148
}
9621149
}

0 commit comments

Comments
 (0)