@@ -3,7 +3,6 @@ package = None
33env_name = None
44policy_name = Policy
55rnn_name = None
6- max_suggestion_cost = 3600
76
87[vec]
98backend = Multiprocessing
@@ -26,10 +25,11 @@ torch_deterministic = True
2625cpu_offload = False
2726device = cuda
2827optimizer = muon
29- anneal_lr = True
3028precision = float32
3129total_timesteps = 10_000_000
3230learning_rate = 0.015
31+ anneal_lr = True
32+ min_learning_rate = 0.0
3333gamma = 0.995
3434gae_lambda = 0.90
3535update_epochs = 1
@@ -64,6 +64,7 @@ prio_beta0 = 0.2
6464method = Protein
6565metric = score
6666goal = maximize
67+ max_suggestion_cost = 3600
6768downsample = 5
6869use_gpu = True
6970prune_pareto = True
@@ -72,85 +73,73 @@ prune_pareto = True
7273# distribution = uniform_pow2
7374# min = 1
7475# max = 16
75- # mean = 8
7676# scale = auto
7777
7878# TODO: Elim from base
7979[sweep.train.total_timesteps]
8080distribution = log_normal
8181min = 3e7
8282max = 1e10
83- mean = 2e8
8483scale = time
8584
8685[sweep.train.bptt_horizon]
8786distribution = uniform_pow2
8887min = 16
8988max = 64
90- mean = 64
9189scale = auto
9290
9391[sweep.train.minibatch_size]
9492distribution = uniform_pow2
9593min = 8192
9694max = 65536
97- mean = 32768
9895scale = auto
9996
10097[sweep.train.learning_rate]
10198distribution = log_normal
10299min = 0.00001
103- mean = 0.01
104100max = 0.1
105101scale = 0.5
106102
107103[sweep.train.ent_coef]
108104distribution = log_normal
109105min = 0.00001
110- mean = 0.01
111106max = 0.2
112107scale = auto
113108
114109[sweep.train.gamma]
115110distribution = logit_normal
116111min = 0.8
117- mean = 0.98
118112max = 0.9999
119113scale = auto
120114
121115[sweep.train.gae_lambda]
122116distribution = logit_normal
123117min = 0.6
124- mean = 0.95
125118max = 0.995
126119scale = auto
127120
128121[sweep.train.vtrace_rho_clip]
129122distribution = uniform
130- min = 0.0
123+ min = 0.1
131124max = 5.0
132- mean = 1.0
133125scale = auto
134126
135127[sweep.train.vtrace_c_clip]
136128distribution = uniform
137- min = 0.0
129+ min = 0.1
138130max = 5.0
139- mean = 1.0
140131scale = auto
141132
142133# [sweep.train.update_epochs]
143134# distribution = int_uniform
144135# min = 1
145136# max = 8
146- # mean = 1
147137# scale = 2.0
148138
149139[sweep.train.clip_coef]
150140distribution = uniform
151141min = 0.01
152142max = 1.0
153- mean = 0.2
154143scale = auto
155144
156145# Optimal vf clip can be lower than 0.1,
@@ -159,54 +148,46 @@ scale = auto
159148distribution = uniform
160149min = 0.1
161150max = 5.0
162- mean = 0.2
163151scale = auto
164152
165153[sweep.train.vf_coef]
166154distribution = uniform
167- min = 0.0
155+ min = 0.1
168156max = 5.0
169- mean = 2.0
170157scale = auto
171158
172159[sweep.train.max_grad_norm]
173160distribution = uniform
174- min = 0.0
175- mean = 1.0
161+ min = 0.1
176162max = 5.0
177163scale = auto
178164
179165[sweep.train.adam_beta1]
180166distribution = logit_normal
181167min = 0.5
182- mean = 0.9
183168max = 0.999
184169scale = auto
185170
186171[sweep.train.adam_beta2]
187172distribution = logit_normal
188173min = 0.9
189- mean = 0.999
190174max = 0.99999
191175scale = auto
192176
193177[sweep.train.adam_eps]
194178distribution = log_normal
195179min = 1e-14
196- mean = 1e-8
197180max = 1e-4
198181scale = auto
199182
200183[sweep.train.prio_alpha]
201184distribution = logit_normal
202185min = 0.1
203- mean = 0.85
204186max = 0.99
205187scale = auto
206188
207189[sweep.train.prio_beta0]
208190distribution = logit_normal
209191min = 0.1
210- mean = 0.85
211192max = 0.99
212193scale = auto
0 commit comments