We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 7919369 commit da191cdCopy full SHA for da191cd
swift/megatron/utils/utils.py
@@ -179,6 +179,12 @@ def prepare_adapter(model):
179
for n, p in model.named_parameters():
180
if '.ref_adapter.' in n:
181
p.requires_grad = False
182
+ # setting average_gradients_across_tp_domain
183
+ if args.is_multimodal:
184
+ visual_model = model.visual
185
+ for n, p in visual_model.named_parameters():
186
+ if p.requires_grad:
187
+ p.average_gradients_across_tp_domain = True
188
return model
189
190
0 commit comments