diff --git a/lm_engine/finetune.py b/lm_engine/finetune.py index 2139915d6..a266fd66b 100644 --- a/lm_engine/finetune.py +++ b/lm_engine/finetune.py @@ -6,7 +6,6 @@ import torch from torch.distributed.tensor.parallel import loss_parallel -from transformers import set_seed from .arguments import TrainingArgs, get_args from .checkpointing import ensure_last_checkpoint_is_saved, load_checkpoint_for_training, save_checkpoint @@ -29,6 +28,7 @@ StepTracker, TorchProfiler, init_distributed, + set_seed, setup_tf32, ) diff --git a/lm_engine/pretrain.py b/lm_engine/pretrain.py index dbd44cbb3..23c5634a4 100644 --- a/lm_engine/pretrain.py +++ b/lm_engine/pretrain.py @@ -12,7 +12,6 @@ from torch.distributed.pipelining.schedules import _PipelineSchedule from torch.distributed.tensor.parallel import loss_parallel from torch.utils.data import DataLoader -from transformers import set_seed from .arguments import DistillationArgs, TrainingArgs, get_args from .checkpointing import ensure_last_checkpoint_is_saved, load_checkpoint_for_training, save_checkpoint @@ -39,6 +38,7 @@ is_torchao_available, log_environment, log_rank_0, + set_seed, setup_tf32, ) diff --git a/lm_engine/utils/__init__.py b/lm_engine/utils/__init__.py index 9d8f7a504..99db5a27b 100644 --- a/lm_engine/utils/__init__.py +++ b/lm_engine/utils/__init__.py @@ -34,6 +34,7 @@ from .parallel import ProcessGroupManager, get_pipeline_stage_ids_on_current_rank, run_rank_n from .profiler import TorchProfiler from .pydantic import BaseArgs +from .random import set_seed from .safetensors import SafeTensorsWeightsManager from .step_tracker import StepTracker from .tracking import ExperimentsTracker, ProgressBar diff --git a/lm_engine/utils/random.py b/lm_engine/utils/random.py new file mode 100644 index 000000000..c9c0ae60e --- /dev/null +++ b/lm_engine/utils/random.py @@ -0,0 +1,15 @@ +# ************************************************** +# Copyright (c) 2025, Mayank Mishra +# ************************************************** + +import random + +import numpy as np +import torch + + +def set_seed(seed: int) -> None: + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) diff --git a/tests/hf_models/multi_gpu/tensor_parallel/tensor_parallel_forward.py b/tests/hf_models/multi_gpu/tensor_parallel/tensor_parallel_forward.py index fce40795e..431ebba7d 100644 --- a/tests/hf_models/multi_gpu/tensor_parallel/tensor_parallel_forward.py +++ b/tests/hf_models/multi_gpu/tensor_parallel/tensor_parallel_forward.py @@ -7,12 +7,17 @@ import torch import torch.distributed -from transformers import set_seed from lm_engine.enums import Kernel from lm_engine.hf_models import GPTBaseConfig, get_model_parallel_class from lm_engine.kernels import enable_kernels -from lm_engine.utils import Communication, ProcessGroupManager, SafeTensorsWeightsManager, string_to_torch_dtype +from lm_engine.utils import ( + Communication, + ProcessGroupManager, + SafeTensorsWeightsManager, + set_seed, + string_to_torch_dtype, +) from ...test_common import TestCommons diff --git a/tests/hf_models/single_gpu/gpt_base_test.py b/tests/hf_models/single_gpu/gpt_base_test.py index 579437443..9477c212c 100644 --- a/tests/hf_models/single_gpu/gpt_base_test.py +++ b/tests/hf_models/single_gpu/gpt_base_test.py @@ -6,10 +6,10 @@ import torch from parameterized import parameterized -from transformers import set_seed from lm_engine.enums import Kernel from lm_engine.kernels import enable_kernels +from lm_engine.utils import set_seed from ..test_common import TestCommons diff --git a/tests/hf_models/single_gpu/multihead_latent_attention_test.py b/tests/hf_models/single_gpu/multihead_latent_attention_test.py index 4accd572b..f20ad9281 100644 --- a/tests/hf_models/single_gpu/multihead_latent_attention_test.py +++ b/tests/hf_models/single_gpu/multihead_latent_attention_test.py @@ -4,11 +4,11 @@ import torch from parameterized import parameterized -from transformers import set_seed from lm_engine.enums import Kernel from lm_engine.hf_models import GPTBaseConfig from lm_engine.kernels import enable_kernels +from lm_engine.utils import set_seed from ..test_common import TestCommons diff --git a/tests/hf_models/single_gpu/scattermoe_test.py b/tests/hf_models/single_gpu/scattermoe_test.py index 0bfedaf79..b0234966e 100644 --- a/tests/hf_models/single_gpu/scattermoe_test.py +++ b/tests/hf_models/single_gpu/scattermoe_test.py @@ -4,10 +4,10 @@ import torch from parameterized import parameterized -from transformers import set_seed from lm_engine.enums import Kernel from lm_engine.kernels import enable_kernels +from lm_engine.utils import set_seed from ..test_common import TestCommons diff --git a/tests/training/params_group/efficient_init_test.py b/tests/training/params_group/efficient_init_test.py index 183205e6a..31dfac81c 100644 --- a/tests/training/params_group/efficient_init_test.py +++ b/tests/training/params_group/efficient_init_test.py @@ -5,13 +5,12 @@ import os import torch -from transformers import set_seed from lm_engine.arguments import UnshardingArgs from lm_engine.checkpointing import load_checkpoint_and_unshard, save_checkpoint from lm_engine.distributed import wrap_model_container_for_distributed_training from lm_engine.model_wrapper import get_model_container -from lm_engine.utils import ProcessGroupManager, load_yaml +from lm_engine.utils import ProcessGroupManager, load_yaml, set_seed from ..test_commons import TestCommons