Fix link in docstring of RLOOTrainer (#4180)

albertvillanova · web-flow · commit 7a78320f589f · 2025-09-30T16:54:55.000+02:00
diff --git a/trl/trainer/rloo_trainer.py b/trl/trainer/rloo_trainer.py
@@ -97,8 +97,8 @@
 class RLOOTrainer(BaseTrainer):
     """
     Trainer for the Reinforce Leave One Out (RLOO) method. This algorithm was initially proposed in the paper [Back to
-    Basics: Revisiting REINFORCE Style Optimization for Learning from Human Feedback in LLMs]
-    (https://huggingface.co/papers/2402.14740).
+    Basics: Revisiting REINFORCE Style Optimization for Learning from Human Feedback in
+    LLMs](https://huggingface.co/papers/2402.14740).
 
     Example: