Adds gradient cap for teacher student distillation (#91)

alessandroassirelli98 · alessandro.assirelli · web-flow · commit c6834de55550 · 2025-05-09T16:26:19.000+02:00
This PR adds a gradient cap to the teacher-student distillation setup.
The goal is to prevent excessively large gradients from destabilizing training.

📌 Changes
Introduced a clipping mechanism to cap the gradients during backpropagation in the distillation process.

Helps improve training stability, especially in early iterations.

---------

Co-authored-by: alessandro.assirelli &lt;alessandro.assirelli@agile-robots.com&gt;
diff --git a/rsl_rl/algorithms/distillation.py b/rsl_rl/algorithms/distillation.py
@@ -25,6 +25,7 @@ def __init__(
         num_learning_epochs=1,
         gradient_length=15,
         learning_rate=1e-3,
+        max_grad_norm=None,
         loss_type="mse",
         device="cpu",
         # Distributed training parameters
@@ -55,6 +56,7 @@ def __init__(
         self.num_learning_epochs = num_learning_epochs
         self.gradient_length = gradient_length
         self.learning_rate = learning_rate
+        self.max_grad_norm = max_grad_norm
 
         # initialize the loss function
         if loss_type == "mse":
@@ -127,6 +129,8 @@ def update(self):
                     loss.backward()
                     if self.is_multi_gpu:
                         self.reduce_parameters()
+                    if self.max_grad_norm:
+                        nn.utils.clip_grad_norm_(self.policy.student.parameters(), self.max_grad_norm)
                     self.optimizer.step()
                     self.policy.detach_hidden_states()
                     loss = 0
diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py
@@ -93,7 +93,9 @@ def __init__(self, env: VecEnv, train_cfg: dict, log_dir: str | None = None, dev
 
         # initialize algorithm
         alg_class = eval(self.alg_cfg.pop("class_name"))
-        self.alg: PPO | Distillation = alg_class(policy, device=self.device, **self.alg_cfg, multi_gpu_cfg=self.multi_gpu_cfg)
+        self.alg: PPO | Distillation = alg_class(
+            policy, device=self.device, **self.alg_cfg, multi_gpu_cfg=self.multi_gpu_cfg
+        )
 
         # store training configuration
         self.num_steps_per_env = self.cfg["num_steps_per_env"]
@@ -387,8 +389,13 @@ def log(self, locs: dict, width: int = 80, pad: int = 35):
             f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n"""
             f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n"""
             f"""{'Time elapsed:':>{pad}} {time.strftime("%H:%M:%S", time.gmtime(self.tot_time))}\n"""
-            f"""{'ETA:':>{pad}} {time.strftime("%H:%M:%S", time.gmtime(self.tot_time / (locs['it'] - locs['start_iter'] + 1) * (
-                               locs['start_iter'] + locs['num_learning_iterations'] - locs['it'])))}\n"""
+            f"""{'ETA:':>{pad}} {time.strftime(
+                "%H:%M:%S",
+                time.gmtime(
+                    self.tot_time / (locs['it'] - locs['start_iter'] + 1)
+                    * (locs['start_iter'] + locs['num_learning_iterations'] - locs['it'])
+                )
+            )}\n"""
         )
         print(log_string)
 
@@ -513,16 +520,20 @@ def _configure_multi_gpu(self):
 
         # check if user has device specified for local rank
         if self.device != f"cuda:{self.gpu_local_rank}":
-            raise ValueError(f"Device '{self.device}' does not match expected device for local rank '{self.gpu_local_rank}'.")
+            raise ValueError(
+                f"Device '{self.device}' does not match expected device for local rank '{self.gpu_local_rank}'."
+            )
         # validate multi-gpu configuration
         if self.gpu_local_rank >= self.gpu_world_size:
-            raise ValueError(f"Local rank '{self.gpu_local_rank}' is greater than or equal to world size '{self.gpu_world_size}'.")
+            raise ValueError(
+                f"Local rank '{self.gpu_local_rank}' is greater than or equal to world size '{self.gpu_world_size}'."
+            )
         if self.gpu_global_rank >= self.gpu_world_size:
-            raise ValueError(f"Global rank '{self.gpu_global_rank}' is greater than or equal to world size '{self.gpu_world_size}'.")
+            raise ValueError(
+                f"Global rank '{self.gpu_global_rank}' is greater than or equal to world size '{self.gpu_world_size}'."
+            )
 
         # initialize torch distributed
-        torch.distributed.init_process_group(
-            backend="nccl", rank=self.gpu_global_rank, world_size=self.gpu_world_size
-        )
+        torch.distributed.init_process_group(backend="nccl", rank=self.gpu_global_rank, world_size=self.gpu_world_size)
         # set device to the local rank
         torch.cuda.set_device(self.gpu_local_rank)