Skip to content

Commit 5b3ccc8

Browse files
Minor fix (suryadheeshjith#5)
Fixes to README and logging in train during checkpoint saving.
1 parent e67b034 commit 5b3ccc8

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ cd Samudra
2727
Using conda:
2828
```bash
2929
conda env create -f environment.yml
30+
conda activate samudra
3031
```
3132

3233
Using [`uv`](https://docs.astral.sh/uv/):
@@ -47,7 +48,7 @@ A default training configuration is provided in the file configs/train_samudra_o
4748
> Note: Ensure your environment is activated before training.
4849
```bash
4950
# Train a new model
50-
python src/train.py --config path/to/train_config.yaml
51+
torchrun src/train.py --config path/to/train_config.yaml
5152
```
5253

5354
### Rollout

src/train.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,8 @@ def save_all_checkpoints(self, epoch, v_loss, inf_loss):
533533
save_best_checkpoint = False
534534
if v_loss <= self.best_val_loss:
535535
logging.info(
536-
f"Epoch validation loss ({v_loss}) is lower than "
537-
f"previous best validation loss ({self.best_val_loss})."
536+
f"Epoch validation loss ({v_loss:.3f}) is lower than "
537+
f"previous best validation loss ({self.best_val_loss:.3f})."
538538
)
539539
logging.info(
540540
"Saving lowest validation loss checkpoint to "
@@ -544,8 +544,8 @@ def save_all_checkpoints(self, epoch, v_loss, inf_loss):
544544
save_best_checkpoint = True # wait until inference error is updated
545545
if inf_loss is not None and (inf_loss <= self.best_inf_loss):
546546
logging.info(
547-
f"Epoch inference error ({inf_loss}) is lower than "
548-
f"previous best inference error ({self.best_inf_loss})."
547+
f"Epoch inference error ({inf_loss:.3f}) is lower than "
548+
f"previous best inference error ({self.best_inf_loss:.3f})."
549549
)
550550
logging.info(
551551
"Saving lowest inference error checkpoint to "

0 commit comments

Comments
 (0)