@ physigym update rl folder i figure out TQC working while SAC does not

Dante-Berth · Dante-Berth · commit f9fc3450d78e · 2025-02-24T17:30:09.000+01:00
diff --git a/rl/sb/launch_sb_hyperopt.sh b/rl/sb/launch_sb_hyperopt.sh
@@ -2,12 +2,12 @@
 
 SCRIPT_PATH="rl/sb/sb_hyperopt_own.py"
 ALGO="tqc"
-NUM_INSTANCES=2
+NUM_INSTANCES=3
 NAME="${ALGO}_sb_hyperopt_own"
 
 for i in $(seq 1 $NUM_INSTANCES); do
     # Replace 0 by 255 (unclear in your script, so removed it)
-    nohup python "$SCRIPT_PATH" --algo "$ALGO" > "${NAME}_${i}.log" 2>&1 &
+    nohup python "$SCRIPT_PATH" --algo "$ALGO" --seed "$i" > "${NAME}_${i}.log" 2>&1 &
     echo "Instance $i launched with PID $!"
     sleep 10
 done
diff --git a/rl/sb/launch_stable_baselines.sh b/rl/sb/launch_stable_baselines.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+SCRIPT_PATH="rl/sb/stable_baselines.py"
+ALGO_NAME="SAC"
+NUM_INSTANCES=3
+NAME="${ALGO_NAME}_sb"
+
+for i in $(seq 1 $NUM_INSTANCES); do
+    # Replace 0 by 255 (unclear in your script, so removed it)
+    nohup python "$SCRIPT_PATH" --algo_name "$ALGO_NAME" --seed "$i" > "${NAME}_${i}.log" 2>&1 &
+    echo "Instance $i launched with PID $!"
+    sleep 10
+done
diff --git a/rl/sb/sb_hyperopt_own.py b/rl/sb/sb_hyperopt_own.py
@@ -160,12 +160,14 @@ class TunerConfig:
     wandb_entity: str = "corporate-manu-sureli"
     eval_frequency: int = int(2.5e4)
     observation_type: str = "image"
+    seed: int = 1 
 
 
 class RLHyperparamTuner:
     def __init__(self, algo="TQC", env_id="physigym/ModelPhysiCellEnv-v0", n_trials=300, total_timesteps=int(1e6), pruner_type="median", 
                  start_tracking_step=50000, mean_elements=int(1e2), policy="CnnPolicy", 
-                 wandb_project_name="IMAGE_TME_PHYSIGYM", wandb_entity="corporate-manu-sureli", eval_frequency=int(2.5e4), observation_type="image"):
+                 wandb_project_name="IMAGE_TME_PHYSIGYM", wandb_entity="corporate-manu-sureli", eval_frequency=int(2.5e4), observation_type="image",
+                 seed = 1):
         """
         Class to tune hyperparameters for RL algorithms using Optuna.
 
@@ -195,6 +197,7 @@ def __init__(self, algo="TQC", env_id="physigym/ModelPhysiCellEnv-v0", n_trials=
         os.makedirs(self.log_dir, exist_ok=True)
         self.storage_study = self.log_dir +"/"+self.study_name
         os.makedirs(self.storage_study, exist_ok=True)
+        self.seed = seed
         # Validate algorithm
         if self.algo not in HYPERPARAMS_SAMPLER:
             raise ValueError(f"Algorithm {self.algo} not supported. Choose from {list(HYPERPARAMS_SAMPLER.keys())}.")
@@ -249,8 +252,8 @@ def objective(self, trial: optuna.Trial):
             save_code=True,
         )
         os.makedirs(dir, exist_ok=True)
-        obs, info = env.reset(seed=1)
-        model = algorithm(self.policy, env, verbose=0, tensorboard_log=dir, **hyperparams, seed=1)
+        obs, info = env.reset(seed=self.seed)
+        model = algorithm(self.policy, env, verbose=0, tensorboard_log=dir, **hyperparams, seed=self.seed)
         new_logger = configure(dir, ["tensorboard"])
         model.set_logger(new_logger)
         pruning_callback = TrackingCallback(trial=trial, start_tracking_step=self.start_tracking_step, mean_elements=self.mean_elements, eval_frequency=self.eval_frequency)
diff --git a/rl/sb/stable_baselines.py b/rl/sb/stable_baselines.py
@@ -17,15 +17,17 @@
 
 @dataclass
 class Args:
-    algo_name: str = "TQC"
+    algo_name: str = "SAC"
     """the name of the algo"""
     wandb_project_name: str = "IMAGE_TME_PHYSIGYM"
     """the wandb's project name"""
     wandb_entity: str = "corporate-manu-sureli"
-    # Algorithm specific arguments
+    # Algorithm specific argumentswandb.finish()
     env_id: str = "physigym/ModelPhysiCellEnv-v0"
     """the id of the environment"""
     observation_type: str = "image"
+    """seed"""
+    seed: int = 1
 # ----------------------
 # 🏆 Initialize WandB
 # ----------------------
@@ -50,7 +52,7 @@ class Args:
 wandb.init(
     project=args.wandb_project_name,
     entity=args.wandb_entity,
-    name=f"{args.algo_name}: {args.observation_type}",
+    name=f"{args.algo_name}: observation {args.observation_type}, seed {args.seed}",
     sync_tensorboard=True,  # Sync TensorBoard logs
     config=config,
     monitor_gym=True,  # Monitor Gym environment
@@ -176,7 +178,7 @@ def step(self, action: np.ndarray):
 env = gym.wrappers.RescaleAction(env, min_action=-1, max_action=1)
 env = gym.wrappers.GrayscaleObservation(env)
 env = gym.wrappers.FrameStackObservation(env, stack_size=1)
-obs, info = env.reset()
+obs, info = env.reset(seed=args.seed)
 
 # ----------------------
 # 📂 Logging Setup
@@ -187,7 +189,7 @@ def step(self, action: np.ndarray):
 # ----------------------
 # 🏃 Train the Model (with WandB Callback)
 # ----------------------
-model = TQC("CnnPolicy", env, verbose=1, tensorboard_log=log_dir)
+model = algorithm("CnnPolicy", env, verbose=1, tensorboard_log=log_dir, seed=args.seed)
 new_logger = configure(log_dir, ["tensorboard"])
 model.set_logger(new_logger)
 model.learn(total_timesteps=int(2e6), log_interval=1, progress_bar=False, callback=TensorboardCallback())