Add warning about inactive NJT development to related docs

jbschlosser · jbschlosser · commit 370b06cd3e6a · 2026-01-16T14:10:01.000-05:00
diff --git a/intermediate_source/transformer_building_blocks.py b/intermediate_source/transformer_building_blocks.py
@@ -79,6 +79,10 @@
 # sequence lengths. They eliminate the need for the bug-prone practices of explicit
 # padding and masking (think ``key_padding_mask`` in ``nn.MultiHeadAttention``).
 #
+# ```{warning}
+# Nested tensors are not currently under active development. Use at your own risk.
+# ```
+#
 # * `scaled_dot_product_attention <https://pytorch.org/tutorials/intermediate/scaled_dot_product_attention_tutorial.html>`_
 #
 # ``scaled_dot_product_attention`` is a primitive for
diff --git a/unstable_source/nestedtensor.py b/unstable_source/nestedtensor.py
@@ -3,6 +3,8 @@
 Getting Started with Nested Tensors
 ===============================================================
 
+**Warning: Nested tensors are not currently under active development. Use at your own risk.**
+
 Nested tensors generalize the shape of regular dense tensors, allowing for representation
 of ragged-sized data.
 
@@ -21,8 +23,6 @@
 they are invaluable for building transformers that can efficiently operate on ragged sequential
 inputs. Below, we present an implementation of multi-head attention using nested tensors that,
 combined usage of ``torch.compile``, out-performs operating naively on tensors with padding.
-
-Nested tensors are currently a prototype feature and are subject to change.
 """
 
 import numpy as np

Original file line number	Diff line number	Diff line change
`@@ -79,6 +79,10 @@`
`79`	`79`	`# sequence lengths. They eliminate the need for the bug-prone practices of explicit`
`80`	`80`	# padding and masking (think ``key_padding_mask`` in ``nn.MultiHeadAttention``).
`81`	`81`	`#`
	`82`	+# ```{warning}
	`83`	`+# Nested tensors are not currently under active development. Use at your own risk.`
	`84`	+# ```
	`85`	`+#`
`82`	`86`	# * `scaled_dot_product_attention <https://pytorch.org/tutorials/intermediate/scaled_dot_product_attention_tutorial.html>`_
`83`	`87`	`#`
`84`	`88`	# ``scaled_dot_product_attention`` is a primitive for