Skip to content

Commit 78bf77a

Browse files
authored
🅰️ Remove apex (#4139)
1 parent 3b9ac65 commit 78bf77a

File tree

3 files changed

+4
-29
lines changed

3 files changed

+4
-29
lines changed

trl/trainer/nash_md_trainer.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
)
3131
from transformers.trainer_utils import EvalPrediction
3232
from transformers.training_args import OptimizerNames
33-
from transformers.utils import is_apex_available, is_peft_available
33+
from transformers.utils import is_peft_available
3434

3535
from ..data_utils import is_conversational, maybe_apply_chat_template
3636
from ..models.modeling_base import GeometricMixtureWrapper
@@ -47,10 +47,6 @@
4747
)
4848

4949

50-
if is_apex_available():
51-
from apex import amp
52-
53-
5450
if is_peft_available():
5551
from peft import PeftModel
5652

@@ -501,10 +497,6 @@ def training_step(
501497
if self.args.n_gpu > 1:
502498
loss = loss.mean() # mean() to average on multi-gpu parallel training
503499

504-
if self.use_apex:
505-
with amp.scale_loss(loss, self.optimizer) as scaled_loss:
506-
scaled_loss.backward()
507-
else:
508-
self.accelerator.backward(loss, **kwargs)
500+
self.accelerator.backward(loss, **kwargs)
509501

510502
return loss.detach() / self.args.gradient_accumulation_steps

trl/trainer/online_dpo_trainer.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
ProcessorMixin,
4444
Trainer,
4545
TrainerCallback,
46-
is_apex_available,
4746
)
4847
from transformers.models.auto.modeling_auto import MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
4948
from transformers.trainer_utils import EvalPrediction, seed_worker
@@ -78,9 +77,6 @@
7877
if is_peft_available():
7978
from peft import PeftConfig, PeftModel
8079

81-
if is_apex_available():
82-
from apex import amp
83-
8480

8581
if is_sagemaker_mp_enabled():
8682
from smdistributed.modelparallel import __version__ as SMP_VERSION
@@ -1457,11 +1453,7 @@ def training_step(
14571453
if self.args.n_gpu > 1:
14581454
loss = loss.mean() # mean() to average on multi-gpu parallel training
14591455

1460-
if self.use_apex:
1461-
with amp.scale_loss(loss, self.optimizer) as scaled_loss:
1462-
scaled_loss.backward()
1463-
else:
1464-
self.accelerator.backward(loss, **kwargs)
1456+
self.accelerator.backward(loss, **kwargs)
14651457

14661458
return loss.detach() / self.args.gradient_accumulation_steps
14671459

trl/trainer/xpo_trainer.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
PreTrainedTokenizerBase,
2828
ProcessorMixin,
2929
TrainerCallback,
30-
is_apex_available,
3130
)
3231
from transformers.trainer_utils import EvalPrediction
3332
from transformers.training_args import OptimizerNames
@@ -47,10 +46,6 @@
4746
from .xpo_config import XPOConfig
4847

4948

50-
if is_apex_available():
51-
from apex import amp
52-
53-
5449
if is_peft_available():
5550
from peft import PeftModel
5651

@@ -545,10 +540,6 @@ def training_step(
545540
if self.args.n_gpu > 1:
546541
loss = loss.mean() # mean() to average on multi-gpu parallel training
547542

548-
if self.use_apex:
549-
with amp.scale_loss(loss, self.optimizer) as scaled_loss:
550-
scaled_loss.backward()
551-
else:
552-
self.accelerator.backward(loss, **kwargs)
543+
self.accelerator.backward(loss, **kwargs)
553544

554545
return loss.detach() / self.args.gradient_accumulation_steps

0 commit comments

Comments
 (0)