diff --git a/intermediate_source/scaled_dot_product_attention_tutorial.py b/intermediate_source/scaled_dot_product_attention_tutorial.py
index 666d240ece1..35b1ba7be4e 100644
--- a/intermediate_source/scaled_dot_product_attention_tutorial.py
+++ b/intermediate_source/scaled_dot_product_attention_tutorial.py
@@ -244,7 +244,7 @@ def generate_rand_batch(
 
 ######################################################################
 # Using SDPA with ``torch.compile``
-# =================================
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # With the release of PyTorch 2.0, a new feature called
 # ``torch.compile()`` has been introduced, which can provide
@@ -324,9 +324,9 @@ def generate_rand_batch(
 #
 
 ######################################################################
-# Using SDPA with attn_bias subclasses`
-# ==========================================
-#
+# Using SDPA with attn_bias subclasses
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 # As of PyTorch 2.3, we have added a new submodule that contains tensor subclasses.
 # Designed to be used with ``torch.nn.functional.scaled_dot_product_attention``.
 # The module is named ``torch.nn.attention.bias`` and contains the following two
@@ -394,7 +394,7 @@ def generate_rand_batch(
 
 ######################################################################
 # Conclusion
-# ==========
+# ~~~~~~~~~~~
 #
 # In this tutorial, we have demonstrated the basic usage of
 # ``torch.nn.functional.scaled_dot_product_attention``. We have shown how