We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 28c7221 commit ba456fdCopy full SHA for ba456fd
megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
@@ -3177,7 +3177,7 @@ def _bucket_group_gradient_reduce(
3177
# Scale gradients.
3178
scaling_factor = gbuf.gradient_scaling_factor
3179
reduce_op = gradient_reduce_preprocessing(
3180
- gbuf.data, scaling_factor, gbuf.ddp_config
+ bucket.data, scaling_factor, gbuf.ddp_config
3181
)
3182
if not gbuf.is_data_distributed:
3183
# All-reduce the gradients on every rank. No scattering
0 commit comments