Skip to content

Commit 6bfa612

Browse files
hyenalSebastien Ehrhardt
andauthored
Fix bug in W&B sweep handling. (#57)
Co-authored-by: Sebastien Ehrhardt <sebastien.ehrhardt@onfido.com>
1 parent 9e5556a commit 6bfa612

File tree

1 file changed

+11
-15
lines changed

1 file changed

+11
-15
lines changed

tfimm/train/train.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,17 @@ def run(cfg: Union[ExperimentConfig, dict], parse_args: bool = True):
7575
name=cfg.experiment_name,
7676
resume=False,
7777
)
78-
79-
# When using sweeps, wandb sets the env variable WANDB_RUNQUEUE_ITEM_ID.
80-
# The presence/absence of this variable allows us to detect if this run is
81-
# part of a sweep.
82-
sweep = bool(os.environ.get("WANDB_RUNQUEUE_ITEM_ID", False))
83-
if sweep:
84-
# If this run is part of a W&B hyperparameter sweep, we need to add
85-
# suffixes to the run names and checkpoint directories, because otherwise
86-
# all runs in the sweep will have the same name and the checkpoints will
87-
# overwrite each other.
88-
ckpt_dir = getattr(cfg.trainer, "ckpt_dir", "")
89-
if ckpt_dir:
90-
setattr(cfg.trainer, "ckpt_dir", os.path.join(ckpt_dir, wandb.run.id))
91-
wandb.run.name = wandb.run.name + f"{wandb.run.id}"
92-
wandb.run.save()
78+
if wandb.run.sweep_id:
79+
# If this run is part of a W&B hyperparameter sweep, we need to add
80+
# suffixes to the run names and checkpoint directories, because otherwise
81+
# all runs in the sweep will have the same name and the checkpoints will
82+
# overwrite each other.
83+
logging.info(f"Job running as part of sweep {wandb.run.sweep_id}.")
84+
ckpt_dir = getattr(cfg.trainer, "ckpt_dir", "")
85+
if ckpt_dir:
86+
setattr(cfg.trainer, "ckpt_dir", os.path.join(ckpt_dir, wandb.run.id))
87+
wandb.run.name = wandb.run.name + f"{wandb.run.id}"
88+
wandb.run.save()
9389

9490
# Construct constituent objects
9591
train_ds = get_class(cfg.train_dataset_class)(cfg=cfg.train_dataset)

0 commit comments

Comments
 (0)