We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 2a1ccfa commit f275189Copy full SHA for f275189
megatron/core/models/gpt/fine_grained_callables.py
@@ -431,7 +431,9 @@ def submodule_combine_forward(
431
"""
432
residual = node.layer_state.residual
433
434
- output = layer.mlp.combine(output, shared_expert_output)
+ output = layer.mlp.combine(output)
435
+ output = layer.mlp.postprocess(output, shared_expert_output)
436
+
437
mlp_output_with_bias = (output, None)
438
439
with layer.bias_dropout_add_exec_handler():
0 commit comments