PufferAI
diff --git a/‎pufferlib/config/default.ini‎
Lines changed: 7 additions & 26 deletions b/‎pufferlib/config/default.ini‎
Lines changed: 7 additions & 26 deletions
diff --git a/‎pufferlib/pufferl.py‎
Lines changed: 7 additions & 2 deletions b/‎pufferlib/pufferl.py‎
Lines changed: 7 additions & 2 deletions
@@ -3,7 +3,6 @@ package = None
 env_name = None
 policy_name = Policy
 rnn_name = None
-max_suggestion_cost = 3600
 
 [vec]
 backend = Multiprocessing
@@ -26,10 +25,11 @@ torch_deterministic = True
 cpu_offload = False
 device = cuda
 optimizer = muon
-anneal_lr = True
 precision = float32
 total_timesteps = 10_000_000
 learning_rate = 0.015
+anneal_lr = True
+min_learning_rate = 0.0
 gamma = 0.995
 gae_lambda = 0.90
 update_epochs = 1
@@ -64,6 +64,7 @@ prio_beta0 = 0.2
 method = Protein 
 metric = score
 goal = maximize
+max_suggestion_cost = 3600
 downsample = 5
 use_gpu = True
 prune_pareto = True
@@ -72,85 +73,73 @@ prune_pareto = True
 #distribution = uniform_pow2
 #min = 1
 #max = 16
-#mean = 8
 #scale = auto
 
 # TODO: Elim from base
 [sweep.train.total_timesteps]
 distribution = log_normal
 min = 3e7
 max = 1e10
-mean = 2e8
 scale = time
 
 [sweep.train.bptt_horizon]
 distribution = uniform_pow2
 min = 16
 max = 64
-mean = 64
 scale = auto
 
 [sweep.train.minibatch_size]
 distribution = uniform_pow2
 min = 8192
 max = 65536
-mean = 32768
 scale = auto
 
 [sweep.train.learning_rate]
 distribution = log_normal
 min = 0.00001
-mean = 0.01
 max = 0.1
 scale = 0.5
 
 [sweep.train.ent_coef]
 distribution = log_normal
 min = 0.00001
-mean = 0.01
 max = 0.2
 scale = auto
 
 [sweep.train.gamma]
 distribution = logit_normal
 min = 0.8
-mean = 0.98
 max = 0.9999
 scale = auto
 
 [sweep.train.gae_lambda]
 distribution = logit_normal
 min = 0.6
-mean = 0.95
 max = 0.995
 scale = auto
 
 [sweep.train.vtrace_rho_clip]
 distribution = uniform
-min = 0.0
+min = 0.1
 max = 5.0
-mean = 1.0
 scale = auto
 
 [sweep.train.vtrace_c_clip]
 distribution = uniform
-min = 0.0
+min = 0.1
 max = 5.0
-mean = 1.0
 scale = auto
 
 #[sweep.train.update_epochs]
 #distribution = int_uniform
 #min = 1
 #max = 8
-#mean = 1
 #scale = 2.0
 
 [sweep.train.clip_coef]
 distribution = uniform
 min = 0.01
 max = 1.0
-mean = 0.2
 scale = auto
 
 # Optimal vf clip can be lower than 0.1,
@@ -159,54 +148,46 @@ scale = auto
 distribution = uniform
 min = 0.1
 max = 5.0
-mean = 0.2
 scale = auto
 
 [sweep.train.vf_coef]
 distribution = uniform
-min = 0.0
+min = 0.1
 max = 5.0
-mean = 2.0
 scale = auto
 
 [sweep.train.max_grad_norm]
 distribution = uniform
-min = 0.0
-mean = 1.0
+min = 0.1
 max = 5.0
 scale = auto
 
 [sweep.train.adam_beta1]
 distribution = logit_normal
 min = 0.5
-mean = 0.9
 max = 0.999
 scale = auto
 
 [sweep.train.adam_beta2]
 distribution = logit_normal
 min = 0.9
-mean = 0.999
 max = 0.99999
 scale = auto
 
 [sweep.train.adam_eps]
 distribution = log_normal
 min = 1e-14
-mean = 1e-8
 max = 1e-4
 scale = auto
 
 [sweep.train.prio_alpha]
 distribution = logit_normal
 min = 0.1
-mean = 0.85
 max = 0.99
 scale = auto
 
 [sweep.train.prio_beta0]
 distribution = logit_normal
 min = 0.1
-mean = 0.85
 max = 0.99
 scale = auto
@@ -184,7 +184,8 @@ def __init__(self, config, vecenv, policy, logger=None):
 
         # Learning rate scheduler
         epochs = config['total_timesteps'] // config['batch_size']
-        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+            optimizer, T_max=epochs, eta_min=config['min_learning_rate'])
         self.total_epochs = epochs
 
         # Automatic mixed precision
@@ -1039,6 +1040,7 @@ def sweep(args=None, env_name=None):
     args = args or load_config(env_name)
     if not args['wandb'] and not args['neptune']:
         raise pufferlib.APIUsageError('Sweeps require either wandb or neptune')
+    args['no_model_upload'] = True  # Uploading trained model during sweep crashed wandb
 
     method = args['sweep'].pop('method')
     try:
@@ -1055,7 +1057,10 @@ def sweep(args=None, env_name=None):
         np.random.seed(seed)
         torch.manual_seed(seed)
 
-        sweep.suggest(args)
+        # In the first run, skip sweep and use the train args specified in the config
+        if i > 0:
+            sweep.suggest(args)
+
         all_logs = train(env_name, args=args, should_stop_early=stop_if_loss_nan)
         all_logs = [e for e in all_logs if target_key in e]