We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent de32d30 commit 086a9f9Copy full SHA for 086a9f9
swift/megatron/model/gpt_model.py
@@ -79,7 +79,7 @@ def __init__(
79
use_cpu_initialization=config.use_cpu_initialization,
80
)
81
# save memory
82
- for i in range(config.num_layers):
+ for i in range(len(self.decoder.layers)):
83
if hasattr(self.decoder.layers[i].self_attention, 'rotary_pos_emb'):
84
del self.decoder.layers[i].self_attention.rotary_pos_emb
85
self.attention_scaling = 1.
0 commit comments