diff --git a/beginner_source/basics/optimization_tutorial.py b/beginner_source/basics/optimization_tutorial.py
index c6c327f8511..b5918dc09a1 100644
--- a/beginner_source/basics/optimization_tutorial.py
+++ b/beginner_source/basics/optimization_tutorial.py
@@ -134,9 +134,9 @@ def forward(self, x):
 
 #####################################
 # Inside the training loop, optimization happens in three steps:
-#  * Call ``optimizer.zero_grad()`` to reset the gradients of model parameters. Gradients by default add up; to prevent double-counting, we explicitly zero them at each iteration.
 #  * Backpropagate the prediction loss with a call to ``loss.backward()``. PyTorch deposits the gradients of the loss w.r.t. each parameter.
 #  * Once we have our gradients, we call ``optimizer.step()`` to adjust the parameters by the gradients collected in the backward pass.
+#  * Call ``optimizer.zero_grad()`` to reset the gradients of model parameters. Gradients by default add up; to prevent double-counting, we explicitly zero them at each iteration.
 
 
 ########################################