Skip to content

Commit 91e372a

Browse files
committed
Merge branch 'impswp' into g2048-65k
2 parents 2b50129 + be7254d commit 91e372a

File tree

2 files changed

+13
-37
lines changed

2 files changed

+13
-37
lines changed

pufferlib/config/default.ini

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,85 +73,73 @@ prune_pareto = True
7373
#distribution = uniform_pow2
7474
#min = 1
7575
#max = 16
76-
#mean = 8
7776
#scale = auto
7877

7978
# TODO: Elim from base
8079
[sweep.train.total_timesteps]
8180
distribution = log_normal
8281
min = 3e7
8382
max = 1e10
84-
mean = 2e8
8583
scale = time
8684

8785
[sweep.train.bptt_horizon]
8886
distribution = uniform_pow2
8987
min = 16
9088
max = 64
91-
mean = 64
9289
scale = auto
9390

9491
[sweep.train.minibatch_size]
9592
distribution = uniform_pow2
9693
min = 8192
9794
max = 65536
98-
mean = 32768
9995
scale = auto
10096

10197
[sweep.train.learning_rate]
10298
distribution = log_normal
10399
min = 0.00001
104-
mean = 0.01
105100
max = 0.1
106101
scale = 0.5
107102

108103
[sweep.train.ent_coef]
109104
distribution = log_normal
110105
min = 0.00001
111-
mean = 0.01
112106
max = 0.2
113107
scale = auto
114108

115109
[sweep.train.gamma]
116110
distribution = logit_normal
117111
min = 0.8
118-
mean = 0.98
119112
max = 0.9999
120113
scale = auto
121114

122115
[sweep.train.gae_lambda]
123116
distribution = logit_normal
124117
min = 0.6
125-
mean = 0.95
126118
max = 0.995
127119
scale = auto
128120

129121
[sweep.train.vtrace_rho_clip]
130122
distribution = uniform
131123
min = 0.1
132124
max = 5.0
133-
mean = 1.0
134125
scale = auto
135126

136127
[sweep.train.vtrace_c_clip]
137128
distribution = uniform
138129
min = 0.1
139130
max = 5.0
140-
mean = 1.0
141131
scale = auto
142132

143133
#[sweep.train.update_epochs]
144134
#distribution = int_uniform
145135
#min = 1
146136
#max = 8
147-
#mean = 1
148137
#scale = 2.0
149138

150139
[sweep.train.clip_coef]
151140
distribution = uniform
152141
min = 0.01
153142
max = 1.0
154-
mean = 0.2
155143
scale = auto
156144

157145
# Optimal vf clip can be lower than 0.1,
@@ -160,54 +148,46 @@ scale = auto
160148
distribution = uniform
161149
min = 0.1
162150
max = 5.0
163-
mean = 0.2
164151
scale = auto
165152

166153
[sweep.train.vf_coef]
167154
distribution = uniform
168155
min = 0.1
169156
max = 5.0
170-
mean = 2.0
171157
scale = auto
172158

173159
[sweep.train.max_grad_norm]
174160
distribution = uniform
175161
min = 0.1
176-
mean = 1.0
177162
max = 5.0
178163
scale = auto
179164

180165
[sweep.train.adam_beta1]
181166
distribution = logit_normal
182167
min = 0.5
183-
mean = 0.9
184168
max = 0.999
185169
scale = auto
186170

187171
[sweep.train.adam_beta2]
188172
distribution = logit_normal
189173
min = 0.9
190-
mean = 0.999
191174
max = 0.99999
192175
scale = auto
193176

194177
[sweep.train.adam_eps]
195178
distribution = log_normal
196179
min = 1e-14
197-
mean = 1e-8
198180
max = 1e-4
199181
scale = auto
200182

201183
[sweep.train.prio_alpha]
202184
distribution = logit_normal
203185
min = 0.1
204-
mean = 0.85
205186
max = 0.99
206187
scale = auto
207188

208189
[sweep.train.prio_beta0]
209190
distribution = logit_normal
210191
min = 0.1
211-
mean = 0.85
212192
max = 0.99
213193
scale = auto

pufferlib/sweep.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,22 @@ def default_tensor_dtype(dtype):
3131
torch.set_default_dtype(old_dtype)
3232

3333
class Space:
34-
def __init__(self, min, max, scale, mean, is_integer=False):
34+
def __init__(self, min, max, scale, is_integer=False):
3535
self.min = min
3636
self.max = max
3737
self.scale = scale
38-
self.mean = mean # TODO: awkward to have just this normalized
3938
self.norm_min = self.normalize(min)
4039
self.norm_max = self.normalize(max)
41-
self.norm_mean = self.normalize(mean)
40+
# Since min/max are normalized from -1 to 1, just use 0 as a mean
41+
self.norm_mean = 0
4242
self.is_integer = is_integer
4343

4444
class Linear(Space):
45-
def __init__(self, min, max, scale, mean, is_integer=False):
45+
def __init__(self, min, max, scale, is_integer=False):
4646
if scale == 'auto':
4747
scale = 0.5
4848

49-
super().__init__(min, max, scale, mean, is_integer)
49+
super().__init__(min, max, scale, is_integer)
5050

5151
def normalize(self, value):
5252
#assert isinstance(value, (int, float))
@@ -61,12 +61,12 @@ def unnormalize(self, value):
6161
return value
6262

6363
class Pow2(Space):
64-
def __init__(self, min, max, scale, mean, is_integer=False):
64+
def __init__(self, min, max, scale, is_integer=False):
6565
if scale == 'auto':
6666
scale = 0.5
6767
#scale = 2 / (np.log2(max) - np.log2(min))
6868

69-
super().__init__(min, max, scale, mean, is_integer)
69+
super().__init__(min, max, scale, is_integer)
7070

7171
def normalize(self, value):
7272
#assert isinstance(value, (int, float))
@@ -83,14 +83,14 @@ def unnormalize(self, value):
8383
class Log(Space):
8484
base: int = 10
8585

86-
def __init__(self, min, max, scale, mean, is_integer=False):
86+
def __init__(self, min, max, scale, is_integer=False):
8787
if scale == 'time':
8888
# TODO: Set scaling param intuitively based on number of jumps from min to max
8989
scale = 1 / (np.log2(max) - np.log2(min))
9090
elif scale == 'auto':
9191
scale = 0.5
9292

93-
super().__init__(min, max, scale, mean, is_integer)
93+
super().__init__(min, max, scale, is_integer)
9494

9595
def normalize(self, value):
9696
#assert isinstance(value, (int, float))
@@ -109,11 +109,11 @@ def unnormalize(self, value):
109109
class Logit(Space):
110110
base: int = 10
111111

112-
def __init__(self, min, max, scale, mean, is_integer=False):
112+
def __init__(self, min, max, scale, is_integer=False):
113113
if scale == 'auto':
114114
scale = 0.5
115115

116-
super().__init__(min, max, scale, mean, is_integer)
116+
super().__init__(min, max, scale, is_integer)
117117

118118
def normalize(self, value):
119119
#assert isinstance(value, (int, float))
@@ -147,12 +147,10 @@ def _params_from_puffer_sweep(sweep_config, only_include=None):
147147

148148
assert 'distribution' in param
149149
distribution = param['distribution']
150-
search_center = param['mean']
151150
kwargs = dict(
152151
min=param['min'],
153152
max=param['max'],
154153
scale=param['scale'],
155-
mean=search_center,
156154
)
157155
if distribution == 'uniform':
158156
space = Linear(**kwargs)
@@ -432,7 +430,6 @@ def __init__(self,
432430
num_random_samples = 10,
433431
global_search_scale = 1,
434432
suggestions_per_pareto = 256,
435-
seed_with_search_center = True,
436433
expansion_rate = 0.25,
437434
gp_training_iter = 50,
438435
gp_learning_rate = 0.001,
@@ -452,7 +449,6 @@ def __init__(self,
452449
self.hyperparameters = Hyperparameters(sweep_config)
453450
self.global_search_scale = global_search_scale
454451
self.suggestions_per_pareto = suggestions_per_pareto
455-
self.seed_with_search_center = seed_with_search_center
456452
self.resample_frequency = resample_frequency
457453
self.max_suggestion_cost = _max_suggestion_cost
458454
self.expansion_rate = expansion_rate
@@ -641,8 +637,8 @@ def suggest(self, fill):
641637

642638
### Sample suggestions
643639
search_centers = np.stack([e['input'] for e in candidates])
644-
num_sample = len(candidates) * self.suggestions_per_pareto
645-
suggestions = self.hyperparameters.sample(num_sample, mu=search_centers)
640+
suggestions = self.hyperparameters.sample(
641+
len(candidates)*self.suggestions_per_pareto, mu=search_centers)
646642

647643
dedup_indices = self._filter_near_duplicates(suggestions)
648644
suggestions = suggestions[dedup_indices]

0 commit comments

Comments
 (0)