diff --git a/intermediate_source/scaled_dot_product_attention_tutorial.py b/intermediate_source/scaled_dot_product_attention_tutorial.py index 666d240ece1..35b1ba7be4e 100644 --- a/intermediate_source/scaled_dot_product_attention_tutorial.py +++ b/intermediate_source/scaled_dot_product_attention_tutorial.py @@ -244,7 +244,7 @@ def generate_rand_batch( ###################################################################### # Using SDPA with ``torch.compile`` -# ================================= +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # With the release of PyTorch 2.0, a new feature called # ``torch.compile()`` has been introduced, which can provide @@ -324,9 +324,9 @@ def generate_rand_batch( # ###################################################################### -# Using SDPA with attn_bias subclasses` -# ========================================== -# +# Using SDPA with attn_bias subclasses +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # As of PyTorch 2.3, we have added a new submodule that contains tensor subclasses. # Designed to be used with ``torch.nn.functional.scaled_dot_product_attention``. # The module is named ``torch.nn.attention.bias`` and contains the following two @@ -394,7 +394,7 @@ def generate_rand_batch( ###################################################################### # Conclusion -# ========== +# ~~~~~~~~~~~ # # In this tutorial, we have demonstrated the basic usage of # ``torch.nn.functional.scaled_dot_product_attention``. We have shown how