From e8b7cbff5854cc061747a8028e9c2af9db5d8e1a Mon Sep 17 00:00:00 2001 From: tsuiusi Date: Tue, 4 Nov 2025 18:07:40 +0100 Subject: [PATCH 1/2] fix: spelling mistake --- megatron/arguments.py | 8 ++++---- megatron/initialize.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index fbd0a590532..2dfaca2ab09 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1487,8 +1487,8 @@ def _add_zero_args(parser): group.add_argument("--zero-stage", type=int, default=1.0) group.add_argument('--zero-reduce-scatter', action='store_true', help='Use reduce scatter if specified') - group.add_argument('--zero-contigious-gradients', action='store_true', - help='Use contigious memory optimizaiton if specified') + group.add_argument('--zero-contiguous-gradients', action='store_true', + help='Use contiguous memory optimizaiton if specified') group.add_argument("--zero-reduce-bucket-size", type=int, default=0.0) group.add_argument("--zero-allgather-bucket-size", type=int, default=0.0) group.add_argument('--remote-device', type=str, default='none', choices=['none', 'cpu', 'nvme'], @@ -1525,8 +1525,8 @@ def _add_activation_checkpoint_args(parser): help='uses activation checkpointing from deepspeed') group.add_argument('--partition-activations', action='store_true', help='partition Activations across GPUs before checkpointing.') - group.add_argument('--contigious-checkpointing', action='store_true', - help='Contigious memory checkpointing for activatoins.') + group.add_argument('--contiguous-checkpointing', action='store_true', + help='contiguous memory checkpointing for activatoins.') group.add_argument('--checkpoint-in-cpu', action='store_true', help='Move the activation checkpoints to CPU.') group.add_argument('--synchronize-each-layer', action='store_true', diff --git a/megatron/initialize.py b/megatron/initialize.py index 6ed5ba9e4b6..606b284c270 100644 --- a/megatron/initialize.py +++ b/megatron/initialize.py @@ -187,7 +187,7 @@ def setup_deepspeed_random_and_activation_checkpointing(args): mpu, deepspeed_config=args.deepspeed_config, partition_activations=args.partition_activations, - contiguous_checkpointing=args.contigious_checkpointing, + contiguous_checkpointing=args.contiguous_checkpointing, num_checkpoints=num_layers, checkpoint_in_cpu=args.checkpoint_in_cpu, synchronize=args.synchronize_each_layer, From ba8cfb482fb478908dbfda1d9873a7422915a556 Mon Sep 17 00:00:00 2001 From: tsuiusi Date: Tue, 4 Nov 2025 18:11:31 +0100 Subject: [PATCH 2/2] style: another spelling issue --- megatron/arguments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/arguments.py b/megatron/arguments.py index 2dfaca2ab09..327c5ff7d04 100644 --- a/megatron/arguments.py +++ b/megatron/arguments.py @@ -1526,7 +1526,7 @@ def _add_activation_checkpoint_args(parser): group.add_argument('--partition-activations', action='store_true', help='partition Activations across GPUs before checkpointing.') group.add_argument('--contiguous-checkpointing', action='store_true', - help='contiguous memory checkpointing for activatoins.') + help='contiguous memory checkpointing for activations.') group.add_argument('--checkpoint-in-cpu', action='store_true', help='Move the activation checkpoints to CPU.') group.add_argument('--synchronize-each-layer', action='store_true',