Skip to content

Commit 2413688

Browse files
authored
upload the deepspeed json to wandb (axolotl-ai-cloud#2593) [skip ci]
1 parent 5bb1f3d commit 2413688

File tree

1 file changed

+37
-3
lines changed

1 file changed

+37
-3
lines changed

src/axolotl/utils/callbacks/__init__.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
import gc
6+
import json
67
import logging
78
import os
89
import traceback
@@ -808,11 +809,44 @@ def on_train_begin(
808809
artifact.add_file(temp_file.name)
809810
wandb.log_artifact(artifact)
810811
wandb.save(temp_file.name)
811-
LOG.info(
812-
"The Axolotl config has been saved to the WandB run under files."
813-
)
812+
LOG.info(
813+
"The Axolotl config has been saved to the WandB run under files."
814+
)
814815
except (FileNotFoundError, ConnectionError) as err:
815816
LOG.warning(f"Error while saving Axolotl config to WandB: {err}")
817+
818+
if args.deepspeed:
819+
try:
820+
# sync config to top level in run, cannot delete file right away because wandb schedules it to be synced even w/policy = 'now', so let OS delete it later.
821+
with NamedTemporaryFile(
822+
mode="w",
823+
delete=False,
824+
suffix=".json",
825+
prefix="deepspeed_config_",
826+
) as temp_file:
827+
skip_upload = False
828+
if isinstance(args.deepspeed, dict):
829+
json.dump(args.deepspeed, temp_file, indent=4)
830+
elif isinstance(args.deepspeed, str) and os.path.exists(
831+
args.deepspeed
832+
):
833+
copyfile(args.deepspeed, temp_file.name)
834+
else:
835+
skip_upload = True
836+
if not skip_upload:
837+
artifact = wandb.Artifact(
838+
f"deepspeed-config-{wandb.run.id}",
839+
type="deepspeed-config",
840+
)
841+
artifact.add_file(temp_file.name)
842+
wandb.log_artifact(artifact)
843+
wandb.save(temp_file.name)
844+
LOG.info(
845+
"The DeepSpeed config has been saved to the WandB run under files."
846+
)
847+
except (FileNotFoundError, ConnectionError) as err:
848+
LOG.warning(f"Error while saving DeepSpeed config to WandB: {err}")
849+
816850
return control
817851

818852

0 commit comments

Comments
 (0)