Fix numpy seed in finetuning.py

patrik-lambert · web-flow · commit f521c93d56c5 · 2024-10-15T15:25:01.000+02:00
Set numpy seed in finetuning.py to fix it during finetuning (including in custom_dataset.py) and have it set in functions such as Dataset.train_test_split. This avoids having different train/test splits in different ranks, which may cause NCCL collective operation timeout errors.
diff --git a/src/llama_recipes/finetuning.py b/src/llama_recipes/finetuning.py
@@ -9,6 +9,7 @@
 import random
 import torch
 import torch.optim as optim
+import numpy as np
 from peft import get_peft_model, PeftModel
 from torch.distributed.fsdp import (
     FullyShardedDataParallel as FSDP,
@@ -82,6 +83,7 @@ def main(**kwargs):
         torch.xpu.manual_seed(train_config.seed)
     torch.manual_seed(train_config.seed)
     random.seed(train_config.seed)
+    np.random.seed(train_config.seed)
 
     if train_config.enable_fsdp:
         setup()