We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c82a5a1 commit ebea9f2Copy full SHA for ebea9f2
megatron/core/tensor_parallel/layers.py
@@ -279,8 +279,7 @@ def backward(ctx, grad_output):
279
# https://github.com/pytorch/pytorch/blob/c47cf9bc7f9e02f649ab4ed53fe4d35732c92ab6/torch/_refs/__init__.py#L2761
280
grad_output = grad_output.contiguous()
281
# Convert the tensor shapes to 2D for execution compatibility
282
- # TODO: Is the reshape preventing us from getting a speedup here?
283
- grad_output = grad_output.reshape(grad_output.shape[0] * grad_output.shape[1],
+ grad_output = grad_output.view(grad_output.shape[0] * grad_output.shape[1],
284
grad_output.shape[2])
285
total_input = total_input.view(total_input.shape[0] * total_input.shape[1],
286
total_input.shape[2])
0 commit comments