Skip to content

Commit e90546b

Browse files
committed
transfer sweep improvements from g2048
1 parent a9c02db commit e90546b

File tree

4 files changed

+94
-39
lines changed

4 files changed

+94
-39
lines changed

pufferlib/config/default.ini

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package = None
33
env_name = None
44
policy_name = Policy
55
rnn_name = None
6-
max_suggestion_cost = 3600
76

87
[vec]
98
backend = Multiprocessing
@@ -26,10 +25,11 @@ torch_deterministic = True
2625
cpu_offload = False
2726
device = cuda
2827
optimizer = muon
29-
anneal_lr = True
3028
precision = float32
3129
total_timesteps = 10_000_000
3230
learning_rate = 0.015
31+
anneal_lr = True
32+
min_learning_rate = 0.0
3333
gamma = 0.995
3434
gae_lambda = 0.90
3535
update_epochs = 1
@@ -64,6 +64,7 @@ prio_beta0 = 0.2
6464
method = Protein
6565
metric = score
6666
goal = maximize
67+
max_suggestion_cost = 3600
6768
downsample = 5
6869
use_gpu = True
6970
prune_pareto = True
@@ -127,14 +128,14 @@ scale = auto
127128

128129
[sweep.train.vtrace_rho_clip]
129130
distribution = uniform
130-
min = 0.0
131+
min = 0.1
131132
max = 5.0
132133
mean = 1.0
133134
scale = auto
134135

135136
[sweep.train.vtrace_c_clip]
136137
distribution = uniform
137-
min = 0.0
138+
min = 0.1
138139
max = 5.0
139140
mean = 1.0
140141
scale = auto
@@ -164,14 +165,14 @@ scale = auto
164165

165166
[sweep.train.vf_coef]
166167
distribution = uniform
167-
min = 0.0
168+
min = 0.1
168169
max = 5.0
169170
mean = 2.0
170171
scale = auto
171172

172173
[sweep.train.max_grad_norm]
173174
distribution = uniform
174-
min = 0.0
175+
min = 0.1
175176
mean = 1.0
176177
max = 5.0
177178
scale = auto

pufferlib/pufferl.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ def __init__(self, config, vecenv, policy, logger=None):
184184

185185
# Learning rate scheduler
186186
epochs = config['total_timesteps'] // config['batch_size']
187-
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
187+
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
188+
optimizer, T_max=epochs, eta_min=config['min_learning_rate'])
188189
self.total_epochs = epochs
189190

190191
# Automatic mixed precision
@@ -1055,7 +1056,10 @@ def sweep(args=None, env_name=None):
10551056
np.random.seed(seed)
10561057
torch.manual_seed(seed)
10571058

1058-
sweep.suggest(args)
1059+
# In the first run, skip sweep and use the train args specified in the config
1060+
if i > 0:
1061+
sweep.suggest(args)
1062+
10591063
all_logs = train(env_name, args=args, should_stop_early=stop_if_loss_nan)
10601064
all_logs = [e for e in all_logs if target_key in e]
10611065

pufferlib/sweep.py

Lines changed: 80 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from gpytorch.priors import LogNormalPrior
1818
from scipy.stats.qmc import Sobol
1919
from scipy.spatial import KDTree
20+
from sklearn.linear_model import LogisticRegression
2021

2122
EPSILON = 1e-6
2223

@@ -126,17 +127,24 @@ def unnormalize(self, value):
126127
log_spaced = zero_one*(math.log(1-self.max, self.base) - math.log(1-self.min, self.base)) + math.log(1-self.min, self.base)
127128
return 1 - self.base**log_spaced
128129

129-
def _params_from_puffer_sweep(sweep_config):
130+
def _params_from_puffer_sweep(sweep_config, only_include=None):
130131
param_spaces = {}
132+
133+
if 'sweep_only' in sweep_config:
134+
only_include = [p.strip() for p in sweep_config['sweep_only'].split(',')]
135+
131136
for name, param in sweep_config.items():
132-
if name in ('method', 'metric', 'goal', 'downsample', 'use_gpu', 'prune_pareto'):
137+
if name in ('method', 'metric', 'goal', 'downsample', 'use_gpu', 'prune_pareto', 'sweep_only', 'max_suggestion_cost'):
133138
continue
134139

135140
assert isinstance(param, dict)
136141
if any(isinstance(param[k], dict) for k in param):
137-
param_spaces[name] = _params_from_puffer_sweep(param)
142+
param_spaces[name] = _params_from_puffer_sweep(param, only_include)
138143
continue
139144

145+
if only_include and not any(k in name for k in only_include):
146+
continue
147+
140148
assert 'distribution' in param
141149
distribution = param['distribution']
142150
search_center = param['mean']
@@ -232,8 +240,8 @@ def _fill(self, params, spaces, flat_sample, idx=0):
232240
return idx
233241

234242
def get_flat_idx(self, flat_key):
235-
return list(self.flat_spaces.keys()).index(flat_key)
236-
243+
keys = list(self.flat_spaces.keys())
244+
return keys.index(flat_key) if flat_key in keys else None
237245

238246
def pareto_points(observations):
239247
if not observations:
@@ -421,7 +429,7 @@ def __init__(self,
421429
sweep_config,
422430
max_suggestion_cost = 3600,
423431
resample_frequency = 0,
424-
num_random_samples = 30,
432+
num_random_samples = 10,
425433
global_search_scale = 1,
426434
suggestions_per_pareto = 256,
427435
seed_with_search_center = True,
@@ -435,21 +443,27 @@ def __init__(self,
435443
cost_param = "train/total_timesteps",
436444
prune_pareto = True,
437445
):
438-
self.device = torch.device("cuda:0" if use_gpu and torch.cuda.is_available() else "cpu")
446+
# Process sweep config. NOTE: sweep_config takes precedence. It's not good.
447+
_use_gpu = sweep_config['use_gpu'] if 'use_gpu' in sweep_config else use_gpu
448+
_prune_pareto = sweep_config['prune_pareto'] if 'prune_pareto' in sweep_config else prune_pareto
449+
_max_suggestion_cost = sweep_config['max_suggestion_cost'] if 'max_suggestion_cost' in sweep_config else max_suggestion_cost
450+
451+
self.device = torch.device("cuda:0" if _use_gpu and torch.cuda.is_available() else "cpu")
439452
self.hyperparameters = Hyperparameters(sweep_config)
440453
self.global_search_scale = global_search_scale
441454
self.suggestions_per_pareto = suggestions_per_pareto
442455
self.seed_with_search_center = seed_with_search_center
443456
self.resample_frequency = resample_frequency
444-
self.max_suggestion_cost = max_suggestion_cost
457+
self.max_suggestion_cost = _max_suggestion_cost
445458
self.expansion_rate = expansion_rate
446459
self.gp_training_iter = gp_training_iter
447460
self.gp_learning_rate = gp_learning_rate
448461
self.optimizer_reset_frequency = optimizer_reset_frequency
449-
self.prune_pareto = prune_pareto
462+
self.prune_pareto = _prune_pareto
450463

451464
self.success_observations = []
452465
self.failure_observations = []
466+
453467
self.suggestion_idx = 0
454468
self.min_score, self.max_score = math.inf, -math.inf
455469
self.log_c_min, self.log_c_max = math.inf, -math.inf
@@ -462,11 +476,17 @@ def __init__(self,
462476
# self.num_random_samples = 3 * points_per_run * self.hyperparameters.num
463477

464478
self.cost_param_idx = self.hyperparameters.get_flat_idx(cost_param)
465-
self.cost_random_suggestion = self.hyperparameters.search_centers[self.cost_param_idx]
479+
self.cost_random_suggestion = None
480+
if self.cost_param_idx is not None:
481+
self.cost_random_suggestion = self.hyperparameters.search_centers[self.cost_param_idx]
466482

467483
self.gp_max_obs = gp_max_obs # train time bumps after 800?
468484
self.infer_batch_size = infer_batch_size
469485

486+
# Probably useful only when downsample=1 and each run is expensive.
487+
self.use_success_prob = sweep_config['downsample'] == 1
488+
self.success_classifier = LogisticRegression(class_weight='balanced')
489+
470490
# Use 64 bit for GP regression
471491
with default_tensor_dtype(torch.float64):
472492
# Params taken from HEBO: https://arxiv.org/abs/2012.03826
@@ -514,17 +534,29 @@ def _sample_observations(self, max_size=None, recent_ratio=0.5):
514534
if not self.success_observations:
515535
return []
516536

537+
observations = self.success_observations.copy()
538+
517539
# Update the stats using the full data
518-
y = np.array([e['output'] for e in self.success_observations])
540+
y = np.array([e['output'] for e in observations])
519541
self.min_score, self.max_score = y.min(), y.max()
520542

521-
c = np.array([e['cost'] for e in self.success_observations])
543+
c = np.array([e['cost'] for e in observations])
522544
log_c = np.log(np.maximum(c, EPSILON))
523545
self.log_c_min, self.log_c_max = log_c.min(), log_c.max()
524546

525-
params = np.array([e['input'] for e in self.success_observations])
547+
# When the data is scare, also use failed observations
548+
if len(observations) < 100 and self.failure_observations:
549+
# Give the min score for the failed obs, so this value will keep changing.
550+
for e in self.failure_observations:
551+
e['output'] = self.min_score
552+
553+
# NOTE: the order of obs matters since recent obs are always fed into gp training
554+
# So, putting the failure obs first.
555+
observations = self.failure_observations + observations
556+
557+
params = np.array([np.append(e['input'], [e['output'], e['cost']]) for e in observations])
526558
dedup_indices = self._filter_near_duplicates(params)
527-
observations = [self.success_observations[i] for i in dedup_indices]
559+
observations = [observations[i] for i in dedup_indices]
528560

529561
if max_size is None:
530562
max_size = self.gp_max_obs
@@ -574,16 +606,19 @@ def _train_gp_models(self):
574606
def suggest(self, fill):
575607
info = {}
576608
self.suggestion_idx += 1
577-
if len(self.success_observations) == 0 and self.seed_with_search_center:
578-
suggestion = self.hyperparameters.search_centers
579-
return self.hyperparameters.to_dict(suggestion, fill), info
609+
610+
# NOTE: Changed pufferl to use the train args, NOT the sweep hyperparam search center
611+
# if len(self.success_observations) == 0 and self.seed_with_search_center:
612+
# suggestion = self.hyperparameters.search_centers
613+
# return self.hyperparameters.to_dict(suggestion, fill), info
580614

581-
elif len(self.success_observations) < self.num_random_samples:
615+
if self.suggestion_idx <= self.num_random_samples:
582616
# Suggest the next point in the Sobol sequence
583617
zero_one = self.sobol.random(1)[0]
584618
suggestion = 2*zero_one - 1 # Scale from [0, 1) to [-1, 1)
585-
cost_suggestion = self.cost_random_suggestion + 0.1 * np.random.randn()
586-
suggestion[self.cost_param_idx] = np.clip(cost_suggestion, -1, 1) # limit the cost
619+
if self.cost_param_idx is not None:
620+
cost_suggestion = self.cost_random_suggestion + 0.1 * np.random.randn()
621+
suggestion[self.cost_param_idx] = np.clip(cost_suggestion, -1, 1) # limit the cost
587622
return self.hyperparameters.to_dict(suggestion, fill), info
588623

589624
elif self.resample_frequency and self.suggestion_idx % self.resample_frequency == 0:
@@ -601,14 +636,13 @@ def suggest(self, fill):
601636
self.cost_opt = torch.optim.Adam(self.gp_cost.parameters(), lr=self.gp_learning_rate, amsgrad=True)
602637

603638
candidates, pareto_idxs = pareto_points(self.success_observations)
604-
605639
if self.prune_pareto:
606640
candidates = prune_pareto_front(candidates)
607641

608642
### Sample suggestions
609643
search_centers = np.stack([e['input'] for e in candidates])
610-
suggestions = self.hyperparameters.sample(
611-
len(candidates)*self.suggestions_per_pareto, mu=search_centers)
644+
num_sample = len(candidates) * self.suggestions_per_pareto
645+
suggestions = self.hyperparameters.sample(num_sample, mu=search_centers)
612646

613647
dedup_indices = self._filter_near_duplicates(suggestions)
614648
suggestions = suggestions[dedup_indices]
@@ -655,16 +689,31 @@ def suggest(self, fill):
655689
gp_log_c = gp_log_c_norm*(self.log_c_max - self.log_c_min) + self.log_c_min
656690
gp_c = np.exp(gp_log_c)
657691

658-
max_c_mask = gp_c < self.max_suggestion_cost
692+
# Maximize score. (Tried upper confidence bounds, but it did more harm because gp was noisy)
693+
suggestion_scores = self.hyperparameters.optimize_direction * gp_y_norm
659694

695+
# Then, decide the budget for this session and favor closer suggestions
696+
max_c_mask = gp_c < self.max_suggestion_cost
660697
target = (1 + self.expansion_rate)*np.random.rand()
661698
weight = 1 - abs(target - gp_log_c_norm)
662-
663-
# NOTE: Tried upper confidence bounds, but it did more harm because gp was noisy
664-
score = gp_y_norm
665-
666-
suggestion_scores = self.hyperparameters.optimize_direction * max_c_mask * (
667-
score * weight)
699+
suggestion_scores *= max_c_mask * weight
700+
701+
# Then, consider the prob of training success, only when downsample = 1
702+
# NOTE: Useful only in limited scenarios, where each data point is expensive. So turn it off by default.
703+
if self.use_success_prob and len(self.success_observations) > 9 and len(self.failure_observations) > 9:
704+
success_params = np.array([e['input'] for e in self.success_observations])
705+
failure_params = np.array([e['input'] for e in self.failure_observations])
706+
X_train = np.vstack([success_params, failure_params])
707+
y_train = np.concatenate([
708+
np.ones(len(success_params)),
709+
np.zeros(len(failure_params))
710+
])
711+
if len(np.unique(y_train)) > 1:
712+
self.success_classifier.fit(X_train, y_train)
713+
with warnings.catch_warnings():
714+
warnings.simplefilter("ignore", UserWarning)
715+
p_success = self.success_classifier.predict_proba(suggestions)[:, 1]
716+
suggestion_scores *= p_success
668717

669718
best_idx = np.argmax(suggestion_scores)
670719
info = dict(
@@ -708,7 +757,7 @@ def observe(self, hypers, score, cost, is_failure=False):
708757
return
709758

710759
# Ignore obs that are below the minimum cost
711-
if params[self.cost_param_idx] <= -1:
760+
if self.cost_param_idx is not None and params[self.cost_param_idx] <= -1:
712761
return
713762

714763
self.success_observations.append(new_observation)

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ def run(self):
288288
'rich_argparse',
289289
'imageio',
290290
'gpytorch',
291+
'scikit-learn',
291292
'heavyball>=2.2.0', # contains relevant fixes compared to 1.7.2 and 2.1.1
292293
'neptune',
293294
'wandb',

0 commit comments

Comments
 (0)