Skip to content

Commit 17a0a1b

Browse files
authored
Merge pull request #416 from kywch/impswp
Sweep improvements
2 parents a9c02db + af3b19c commit 17a0a1b

File tree

4 files changed

+104
-72
lines changed

4 files changed

+104
-72
lines changed

pufferlib/config/default.ini

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package = None
33
env_name = None
44
policy_name = Policy
55
rnn_name = None
6-
max_suggestion_cost = 3600
76

87
[vec]
98
backend = Multiprocessing
@@ -26,10 +25,11 @@ torch_deterministic = True
2625
cpu_offload = False
2726
device = cuda
2827
optimizer = muon
29-
anneal_lr = True
3028
precision = float32
3129
total_timesteps = 10_000_000
3230
learning_rate = 0.015
31+
anneal_lr = True
32+
min_learning_rate = 0.0
3333
gamma = 0.995
3434
gae_lambda = 0.90
3535
update_epochs = 1
@@ -64,6 +64,7 @@ prio_beta0 = 0.2
6464
method = Protein
6565
metric = score
6666
goal = maximize
67+
max_suggestion_cost = 3600
6768
downsample = 5
6869
use_gpu = True
6970
prune_pareto = True
@@ -72,85 +73,73 @@ prune_pareto = True
7273
#distribution = uniform_pow2
7374
#min = 1
7475
#max = 16
75-
#mean = 8
7676
#scale = auto
7777

7878
# TODO: Elim from base
7979
[sweep.train.total_timesteps]
8080
distribution = log_normal
8181
min = 3e7
8282
max = 1e10
83-
mean = 2e8
8483
scale = time
8584

8685
[sweep.train.bptt_horizon]
8786
distribution = uniform_pow2
8887
min = 16
8988
max = 64
90-
mean = 64
9189
scale = auto
9290

9391
[sweep.train.minibatch_size]
9492
distribution = uniform_pow2
9593
min = 8192
9694
max = 65536
97-
mean = 32768
9895
scale = auto
9996

10097
[sweep.train.learning_rate]
10198
distribution = log_normal
10299
min = 0.00001
103-
mean = 0.01
104100
max = 0.1
105101
scale = 0.5
106102

107103
[sweep.train.ent_coef]
108104
distribution = log_normal
109105
min = 0.00001
110-
mean = 0.01
111106
max = 0.2
112107
scale = auto
113108

114109
[sweep.train.gamma]
115110
distribution = logit_normal
116111
min = 0.8
117-
mean = 0.98
118112
max = 0.9999
119113
scale = auto
120114

121115
[sweep.train.gae_lambda]
122116
distribution = logit_normal
123117
min = 0.6
124-
mean = 0.95
125118
max = 0.995
126119
scale = auto
127120

128121
[sweep.train.vtrace_rho_clip]
129122
distribution = uniform
130-
min = 0.0
123+
min = 0.1
131124
max = 5.0
132-
mean = 1.0
133125
scale = auto
134126

135127
[sweep.train.vtrace_c_clip]
136128
distribution = uniform
137-
min = 0.0
129+
min = 0.1
138130
max = 5.0
139-
mean = 1.0
140131
scale = auto
141132

142133
#[sweep.train.update_epochs]
143134
#distribution = int_uniform
144135
#min = 1
145136
#max = 8
146-
#mean = 1
147137
#scale = 2.0
148138

149139
[sweep.train.clip_coef]
150140
distribution = uniform
151141
min = 0.01
152142
max = 1.0
153-
mean = 0.2
154143
scale = auto
155144

156145
# Optimal vf clip can be lower than 0.1,
@@ -159,54 +148,46 @@ scale = auto
159148
distribution = uniform
160149
min = 0.1
161150
max = 5.0
162-
mean = 0.2
163151
scale = auto
164152

165153
[sweep.train.vf_coef]
166154
distribution = uniform
167-
min = 0.0
155+
min = 0.1
168156
max = 5.0
169-
mean = 2.0
170157
scale = auto
171158

172159
[sweep.train.max_grad_norm]
173160
distribution = uniform
174-
min = 0.0
175-
mean = 1.0
161+
min = 0.1
176162
max = 5.0
177163
scale = auto
178164

179165
[sweep.train.adam_beta1]
180166
distribution = logit_normal
181167
min = 0.5
182-
mean = 0.9
183168
max = 0.999
184169
scale = auto
185170

186171
[sweep.train.adam_beta2]
187172
distribution = logit_normal
188173
min = 0.9
189-
mean = 0.999
190174
max = 0.99999
191175
scale = auto
192176

193177
[sweep.train.adam_eps]
194178
distribution = log_normal
195179
min = 1e-14
196-
mean = 1e-8
197180
max = 1e-4
198181
scale = auto
199182

200183
[sweep.train.prio_alpha]
201184
distribution = logit_normal
202185
min = 0.1
203-
mean = 0.85
204186
max = 0.99
205187
scale = auto
206188

207189
[sweep.train.prio_beta0]
208190
distribution = logit_normal
209191
min = 0.1
210-
mean = 0.85
211192
max = 0.99
212193
scale = auto

pufferlib/pufferl.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ def __init__(self, config, vecenv, policy, logger=None):
184184

185185
# Learning rate scheduler
186186
epochs = config['total_timesteps'] // config['batch_size']
187-
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
187+
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
188+
optimizer, T_max=epochs, eta_min=config['min_learning_rate'])
188189
self.total_epochs = epochs
189190

190191
# Automatic mixed precision
@@ -1039,6 +1040,7 @@ def sweep(args=None, env_name=None):
10391040
args = args or load_config(env_name)
10401041
if not args['wandb'] and not args['neptune']:
10411042
raise pufferlib.APIUsageError('Sweeps require either wandb or neptune')
1043+
args['no_model_upload'] = True # Uploading trained model during sweep crashed wandb
10421044

10431045
method = args['sweep'].pop('method')
10441046
try:
@@ -1055,7 +1057,10 @@ def sweep(args=None, env_name=None):
10551057
np.random.seed(seed)
10561058
torch.manual_seed(seed)
10571059

1058-
sweep.suggest(args)
1060+
# In the first run, skip sweep and use the train args specified in the config
1061+
if i > 0:
1062+
sweep.suggest(args)
1063+
10591064
all_logs = train(env_name, args=args, should_stop_early=stop_if_loss_nan)
10601065
all_logs = [e for e in all_logs if target_key in e]
10611066

0 commit comments

Comments
 (0)