Replace automatic assert removal with explicit # nodoc marker

tarekziade · tarekziade · commit 68bef39ad08d · 2026-03-02T09:35:45.000+01:00
Instead of implicitly stripping all assert statements, lines must now
be explicitly marked with a # nodoc comment to be hidden from rendered
documentation. This handles single lines, multi-line statements (parens/
brackets), and block openers (for/if/while/with) whose entire indented
body is removed along with the opener.
diff --git a/src/doc_builder/convert_md_to_mdx.py b/src/doc_builder/convert_md_to_mdx.py
@@ -273,9 +273,7 @@ def _process_link(match):
 
 
 def _should_hide_line(stripped):
-    """Check if a line is an assert or is marked with ``# nodoc``."""
-    if stripped.startswith(("assert ", "assert(")):
-        return True
+    """Check if a line is marked with ``# nodoc``."""
     if stripped.endswith("# nodoc") or "# nodoc " in stripped:
         return True
     return False
@@ -291,17 +289,26 @@ def _clean_code_for_doc(code):
     """
     Remove lines that should not appear in rendered documentation:
 
-    * ``assert`` statements (including multi-line ones).
     * Any line (or multi-line statement) annotated with a ``# nodoc`` comment.
-    * Block openers (``for``/``if``/``while``/``with``) whose body was
-      entirely removed by the rules above.
+    * When ``# nodoc`` appears on a block opener (``for``/``if``/etc.),
+      the entire indented body is removed as well.
     """
     lines = code.split("\n")
     result = []
     paren_depth = 0
     skipping = False
+    # When a block opener is marked # nodoc, skip all lines indented deeper.
+    skip_block_indent = -1
     for line in lines:
         stripped = line.lstrip()
+        indent = len(line) - len(stripped)
+
+        # Skip body of a # nodoc block opener
+        if skip_block_indent >= 0:
+            if stripped == "" or indent > skip_block_indent:
+                continue
+            # Back to same or lesser indent — stop skipping
+            skip_block_indent = -1
 
         if skipping:
             # Track parentheses / brackets to find end of multi-line statement
@@ -313,12 +320,14 @@ def _clean_code_for_doc(code):
             continue
 
         if _should_hide_line(stripped):
-            indent = len(line) - len(stripped)
             if _is_multiline(stripped):
                 paren_depth = (
                     stripped.count("(") - stripped.count(")") + stripped.count("[") - stripped.count("]")
                 )
                 skipping = True
+            elif _re_block_opener.match(stripped):
+                # Block opener with # nodoc — skip the entire indented body
+                skip_block_indent = indent
             _remove_empty_block_opener(result, indent)
             continue
 
diff --git a/tests/test_convert_md_to_mdx.py b/tests/test_convert_md_to_mdx.py
@@ -346,29 +346,22 @@ def test_strip_md_extension_from_internal_links(self):
         expected = "See [Local](./local) and [External](https://example.com/page.md)"
         self.assertEqual(strip_md_extension_from_internal_links(text), expected)
 
-    def test_clean_runnable_blocks_basic(self):
-        text = """```py runnable:test_basic
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
-assert tokenizer is not None
-output = tokenizer("Hello world")
-assert "input_ids" in output
-print(output)
+    def test_clean_runnable_blocks_strips_annotation(self):
+        text = """```py runnable:test_clean
+from transformers import pipeline
+pipe = pipeline("sentiment-analysis")
+print(pipe("I love this!"))
 ```"""
         expected = """```py
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
-output = tokenizer("Hello world")
-print(output)
+from transformers import pipeline
+pipe = pipeline("sentiment-analysis")
+print(pipe("I love this!"))
 ```"""
         self.assertEqual(clean_runnable_blocks(text), expected)
 
     def test_clean_runnable_blocks_python_fence(self):
         text = """```python runnable:test_python
 x = 1
-assert x == 1
 print(x)
 ```"""
         expected = """```python
@@ -377,24 +370,36 @@ def test_clean_runnable_blocks_python_fence(self):
 ```"""
         self.assertEqual(clean_runnable_blocks(text), expected)
 
-    def test_clean_runnable_blocks_multiline_assert(self):
-        text = """```py runnable:test_multi
-result = do_something()
-assert (
-    result.shape == (1, 10)
-)
-print(result)
+    def test_clean_runnable_blocks_leaves_normal_blocks(self):
+        text = """```py
+x = 1  # nodoc
+print(x)
 ```"""
-        expected = """```py
-result = do_something()
-print(result)
+        # Normal blocks without runnable: should be untouched
+        self.assertEqual(clean_runnable_blocks(text), text)
+
+    def test_clean_runnable_blocks_backticks_in_string(self):
+        """Triple backticks inside a string literal should not close the block early."""
+        text = '''```py runnable:test_backticks
+x = """```
+not a fence
 ```"""
+print(x)
+```'''
+        expected = '''```py
+x = """```
+not a fence
+```"""
+print(x)
+```'''
         self.assertEqual(clean_runnable_blocks(text), expected)
 
-    def test_clean_runnable_blocks_no_asserts(self):
-        text = """```py runnable:test_clean
+    def test_clean_runnable_blocks_nodoc_single_line(self):
+        """A line marked with # nodoc is removed."""
+        text = """```py runnable:test_nodoc
 from transformers import pipeline
 pipe = pipeline("sentiment-analysis")
+result = pipe("test")  # nodoc
 print(pipe("I love this!"))
 ```"""
         expected = """```py
@@ -404,60 +409,76 @@ def test_clean_runnable_blocks_no_asserts(self):
 ```"""
         self.assertEqual(clean_runnable_blocks(text), expected)
 
-    def test_clean_runnable_blocks_leaves_normal_blocks(self):
-        text = """```py
-assert x == 1
-print(x)
-```"""
-        # Normal blocks without runnable: should be untouched
-        self.assertEqual(clean_runnable_blocks(text), text)
-
-    def test_clean_runnable_blocks_collapses_blank_lines(self):
-        text = """```py runnable:test_blanks
-x = 1
+    def test_clean_runnable_blocks_nodoc_multiline_parens(self):
+        """A multi-line statement marked with # nodoc is fully removed."""
+        text = """```py runnable:test_nodoc_multi
+result = compute()
 
-assert x == 1
+EXPECTED_OUTPUT = (  # nodoc
+    "first value"
+    + "second value"
+)
 
-y = 2
+print(result)
 ```"""
         expected = """```py
-x = 1
+result = compute()
 
-y = 2
+print(result)
 ```"""
         self.assertEqual(clean_runnable_blocks(text), expected)
 
-    def test_clean_runnable_blocks_assert_with_parens(self):
-        text = """```py runnable:test_parens
-x = compute()
-assert(x > 0)
+    def test_clean_runnable_blocks_nodoc_multiline_brackets(self):
+        """Multi-line list with # nodoc tracked via bracket depth."""
+        text = """```py runnable:test_nodoc_brackets
+x = do_work()
+expected = [  # nodoc
+    1,
+    2,
+    3,
+]
 print(x)
 ```"""
         expected = """```py
-x = compute()
+x = do_work()
 print(x)
 ```"""
         self.assertEqual(clean_runnable_blocks(text), expected)
 
-    def test_clean_runnable_blocks_for_loop_with_assert_only(self):
-        """A for-loop whose body is only an assert should be removed entirely."""
-        text = """```py runnable:test_for_assert
+    def test_clean_runnable_blocks_nodoc_for_loop(self):
+        """A for-loop marked with # nodoc is removed with its body."""
+        text = """```py runnable:test_nodoc_for
 inputs = prepare()
 
-for key in inputs:
-    assert torch.equal(inputs[key], inputs_transcription[key])
+for key in inputs:  # nodoc
+    do_something(inputs[key])
 
 outputs = model.generate(**inputs)
 ```"""
         expected = """```py
 inputs = prepare()
 
 outputs = model.generate(**inputs)
+```"""
+        self.assertEqual(clean_runnable_blocks(text), expected)
+
+    def test_clean_runnable_blocks_nodoc_collapses_blank_lines(self):
+        text = """```py runnable:test_blanks
+x = 1
+
+y = 2  # nodoc
+
+z = 3
+```"""
+        expected = """```py
+x = 1
+
+z = 3
 ```"""
         self.assertEqual(clean_runnable_blocks(text), expected)
 
     def test_clean_runnable_blocks_glmasr_batched(self):
-        """Real-world test from huggingface/transformers PR #44277 — test_batched block."""
+        """Real-world test from huggingface/transformers PR #44277 — test_batched block with # nodoc."""
         text = '''```py runnable:test_batched
 import torch
 from transformers import AutoProcessor, GlmAsrForConditionalGeneration
@@ -498,14 +519,14 @@ def test_clean_runnable_blocks_glmasr_batched(self):
     conversation, tokenize=True, add_generation_prompt=True, return_dict=True
 ).to(model.device, dtype=model.dtype)
 
-inputs_transcription = processor.apply_transcription_request(
+inputs_transcription = processor.apply_transcription_request(  # nodoc
     [
         "https://huggingface.co/datasets/eustlb/audio-samples/resolve/main/bcn_weather.mp3",
         "https://huggingface.co/datasets/eustlb/audio-samples/resolve/main/obama2.mp3",
     ],
 ).to(model.device, dtype=model.dtype)
 
-for key in inputs:
+for key in inputs:  # nodoc
     assert torch.equal(inputs[key], inputs_transcription[key])
 
 outputs = model.generate(**inputs, do_sample=False, max_new_tokens=500)
@@ -514,11 +535,11 @@ def test_clean_runnable_blocks_glmasr_batched(self):
     outputs[:, inputs.input_ids.shape[1] :], skip_special_tokens=True
 )
 
-EXPECTED_OUTPUT = [
+EXPECTED_OUTPUT = [  # nodoc
     "Yesterday it was thirty five degrees in Barcelona, but today the temperature will go down to minus twenty degrees.",
     "This week, I traveled to Chicago to deliver my final farewell address to the nation.",
 ]
-assert decoded_outputs == EXPECTED_OUTPUT
+assert decoded_outputs == EXPECTED_OUTPUT  # nodoc
 ```'''
         expected = '''```py
 import torch
@@ -560,110 +581,12 @@ def test_clean_runnable_blocks_glmasr_batched(self):
     conversation, tokenize=True, add_generation_prompt=True, return_dict=True
 ).to(model.device, dtype=model.dtype)
 
-inputs_transcription = processor.apply_transcription_request(
-    [
-        "https://huggingface.co/datasets/eustlb/audio-samples/resolve/main/bcn_weather.mp3",
-        "https://huggingface.co/datasets/eustlb/audio-samples/resolve/main/obama2.mp3",
-    ],
-).to(model.device, dtype=model.dtype)
-
 outputs = model.generate(**inputs, do_sample=False, max_new_tokens=500)
 
 decoded_outputs = processor.batch_decode(
     outputs[:, inputs.input_ids.shape[1] :], skip_special_tokens=True
 )
-
-EXPECTED_OUTPUT = [
-    "Yesterday it was thirty five degrees in Barcelona, but today the temperature will go down to minus twenty degrees.",
-    "This week, I traveled to Chicago to deliver my final farewell address to the nation.",
-]
-```'''
-        self.assertEqual(clean_runnable_blocks(text), expected)
-
-    def test_clean_runnable_blocks_backticks_in_string(self):
-        """Triple backticks inside a string literal should not close the block early."""
-        text = '''```py runnable:test_backticks
-x = """```
-not a fence
-```"""
-assert x is not None
-print(x)
 ```'''
-        expected = '''```py
-x = """```
-not a fence
-```"""
-print(x)
-```'''
-        self.assertEqual(clean_runnable_blocks(text), expected)
-
-    def test_clean_runnable_blocks_nodoc_single_line(self):
-        """A line marked with # nodoc is removed."""
-        text = """```py runnable:test_nodoc
-from transformers import pipeline
-pipe = pipeline("sentiment-analysis")
-result = pipe("test")  # nodoc
-print(pipe("I love this!"))
-```"""
-        expected = """```py
-from transformers import pipeline
-pipe = pipeline("sentiment-analysis")
-print(pipe("I love this!"))
-```"""
-        self.assertEqual(clean_runnable_blocks(text), expected)
-
-    def test_clean_runnable_blocks_nodoc_multiline_parens(self):
-        """A multi-line statement marked with # nodoc is fully removed."""
-        text = """```py runnable:test_nodoc_multi
-result = compute()
-
-EXPECTED_OUTPUT = [  # nodoc
-    "first value",
-    "second value",
-]
-assert result == EXPECTED_OUTPUT
-
-print(result)
-```"""
-        expected = """```py
-result = compute()
-
-print(result)
-```"""
-        self.assertEqual(clean_runnable_blocks(text), expected)
-
-    def test_clean_runnable_blocks_nodoc_for_loop(self):
-        """A for-loop marked with # nodoc is removed with its body."""
-        text = """```py runnable:test_nodoc_for
-inputs = prepare()
-
-for key in inputs:  # nodoc
-    assert torch.equal(inputs[key], other[key])
-
-outputs = model.generate(**inputs)
-```"""
-        expected = """```py
-inputs = prepare()
-
-outputs = model.generate(**inputs)
-```"""
-        self.assertEqual(clean_runnable_blocks(text), expected)
-
-    def test_clean_runnable_blocks_nodoc_multiline_brackets(self):
-        """Multi-line list with # nodoc tracked via bracket depth."""
-        text = """```py runnable:test_nodoc_brackets
-x = do_work()
-expected = [  # nodoc
-    1,
-    2,
-    3,
-]
-print(x)
-```"""
-        expected = """```py
-x = do_work()
-print(x)
-```"""
         self.assertEqual(clean_runnable_blocks(text), expected)
 
     def test_clean_runnable_blocks_glmasr_basic(self):