We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 615aa98 commit 43f7eaeCopy full SHA for 43f7eae
nemo_rl/algorithms/loss_functions.py
@@ -624,7 +624,7 @@ def _dpo_loss(
624
)
625
626
# TODO a cleaner typing fix would be required (probably that DPOLossFn should not inherit from PreferenceLoss)
627
- def __call__( # type: ignore
+ def __call__( # type: ignore
628
self,
629
next_token_logits: Tensor,
630
data: BatchedDataDict[DPOLossDataDict],
0 commit comments