We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent eafa066 commit ca79444Copy full SHA for ca79444
llm/llama/auto_parallel/run_pretrain_auto.py
@@ -27,6 +27,7 @@
27
import paddle
28
import paddle.distributed as dist
29
import paddle.distributed.auto_parallel as auto
30
+from paddle.base.data_feeder import convert_uint16_to_float
31
from paddle.profiler.utils import job_schedule_profiler_range
32
33
from paddlenlp.ops import Topology
@@ -668,7 +669,10 @@ def loss_func(loss, outputs):
668
669
outs = engine.run(micro_batch, mode="train")
670
671
if "loss" in outs:
- tr_loss_step = np.sum(outs["loss"])
672
+ if outs["loss"].dtype == np.uint16:
673
+ tr_loss_step = np.sum(convert_uint16_to_float(outs["loss"]))
674
+ else:
675
+ tr_loss_step = np.sum(outs["loss"])
676
else:
677
tr_loss_step = float(0)
678
0 commit comments