We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a0df357 commit 67e83aeCopy full SHA for 67e83ae
trl/trainer/nash_md_trainer.py
@@ -56,8 +56,10 @@
56
57
58
class NashMDTrainer(OnlineDPOTrainer):
59
- r"""
60
- Initialize NashMDTrainer as a subclass of [`OnlineDPOConfig`].
+ """
+ Trainer for the Nash-MD method.
61
+
62
+ It is implemented as a subclass of [`OnlineDPOTrainer`].
63
64
Args:
65
model (`transformers.PreTrainedModel`):
trl/trainer/xpo_trainer.py
class XPOTrainer(OnlineDPOTrainer):
- Initialize XPOTrainer as a subclass of [`OnlineDPOConfig`].
+ Trainer for Exploratory Preference Optimization (XPO).
0 commit comments