Skip to content

Commit 68bef39

Browse files
committed
Replace automatic assert removal with explicit # nodoc marker
Instead of implicitly stripping all assert statements, lines must now be explicitly marked with a # nodoc comment to be hidden from rendered documentation. This handles single lines, multi-line statements (parens/ brackets), and block openers (for/if/while/with) whose entire indented body is removed along with the opener.
1 parent 7b6b46a commit 68bef39

File tree

2 files changed

+94
-162
lines changed

2 files changed

+94
-162
lines changed

src/doc_builder/convert_md_to_mdx.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,7 @@ def _process_link(match):
273273

274274

275275
def _should_hide_line(stripped):
276-
"""Check if a line is an assert or is marked with ``# nodoc``."""
277-
if stripped.startswith(("assert ", "assert(")):
278-
return True
276+
"""Check if a line is marked with ``# nodoc``."""
279277
if stripped.endswith("# nodoc") or "# nodoc " in stripped:
280278
return True
281279
return False
@@ -291,17 +289,26 @@ def _clean_code_for_doc(code):
291289
"""
292290
Remove lines that should not appear in rendered documentation:
293291
294-
* ``assert`` statements (including multi-line ones).
295292
* Any line (or multi-line statement) annotated with a ``# nodoc`` comment.
296-
* Block openers (``for``/``if``/``while``/``with``) whose body was
297-
entirely removed by the rules above.
293+
* When ``# nodoc`` appears on a block opener (``for``/``if``/etc.),
294+
the entire indented body is removed as well.
298295
"""
299296
lines = code.split("\n")
300297
result = []
301298
paren_depth = 0
302299
skipping = False
300+
# When a block opener is marked # nodoc, skip all lines indented deeper.
301+
skip_block_indent = -1
303302
for line in lines:
304303
stripped = line.lstrip()
304+
indent = len(line) - len(stripped)
305+
306+
# Skip body of a # nodoc block opener
307+
if skip_block_indent >= 0:
308+
if stripped == "" or indent > skip_block_indent:
309+
continue
310+
# Back to same or lesser indent — stop skipping
311+
skip_block_indent = -1
305312

306313
if skipping:
307314
# Track parentheses / brackets to find end of multi-line statement
@@ -313,12 +320,14 @@ def _clean_code_for_doc(code):
313320
continue
314321

315322
if _should_hide_line(stripped):
316-
indent = len(line) - len(stripped)
317323
if _is_multiline(stripped):
318324
paren_depth = (
319325
stripped.count("(") - stripped.count(")") + stripped.count("[") - stripped.count("]")
320326
)
321327
skipping = True
328+
elif _re_block_opener.match(stripped):
329+
# Block opener with # nodoc — skip the entire indented body
330+
skip_block_indent = indent
322331
_remove_empty_block_opener(result, indent)
323332
continue
324333

tests/test_convert_md_to_mdx.py

Lines changed: 78 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -346,29 +346,22 @@ def test_strip_md_extension_from_internal_links(self):
346346
expected = "See [Local](./local) and [External](https://example.com/page.md)"
347347
self.assertEqual(strip_md_extension_from_internal_links(text), expected)
348348

349-
def test_clean_runnable_blocks_basic(self):
350-
text = """```py runnable:test_basic
351-
from transformers import AutoTokenizer
352-
353-
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
354-
assert tokenizer is not None
355-
output = tokenizer("Hello world")
356-
assert "input_ids" in output
357-
print(output)
349+
def test_clean_runnable_blocks_strips_annotation(self):
350+
text = """```py runnable:test_clean
351+
from transformers import pipeline
352+
pipe = pipeline("sentiment-analysis")
353+
print(pipe("I love this!"))
358354
```"""
359355
expected = """```py
360-
from transformers import AutoTokenizer
361-
362-
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
363-
output = tokenizer("Hello world")
364-
print(output)
356+
from transformers import pipeline
357+
pipe = pipeline("sentiment-analysis")
358+
print(pipe("I love this!"))
365359
```"""
366360
self.assertEqual(clean_runnable_blocks(text), expected)
367361

368362
def test_clean_runnable_blocks_python_fence(self):
369363
text = """```python runnable:test_python
370364
x = 1
371-
assert x == 1
372365
print(x)
373366
```"""
374367
expected = """```python
@@ -377,24 +370,36 @@ def test_clean_runnable_blocks_python_fence(self):
377370
```"""
378371
self.assertEqual(clean_runnable_blocks(text), expected)
379372

380-
def test_clean_runnable_blocks_multiline_assert(self):
381-
text = """```py runnable:test_multi
382-
result = do_something()
383-
assert (
384-
result.shape == (1, 10)
385-
)
386-
print(result)
373+
def test_clean_runnable_blocks_leaves_normal_blocks(self):
374+
text = """```py
375+
x = 1 # nodoc
376+
print(x)
387377
```"""
388-
expected = """```py
389-
result = do_something()
390-
print(result)
378+
# Normal blocks without runnable: should be untouched
379+
self.assertEqual(clean_runnable_blocks(text), text)
380+
381+
def test_clean_runnable_blocks_backticks_in_string(self):
382+
"""Triple backticks inside a string literal should not close the block early."""
383+
text = '''```py runnable:test_backticks
384+
x = """```
385+
not a fence
391386
```"""
387+
print(x)
388+
```'''
389+
expected = '''```py
390+
x = """```
391+
not a fence
392+
```"""
393+
print(x)
394+
```'''
392395
self.assertEqual(clean_runnable_blocks(text), expected)
393396

394-
def test_clean_runnable_blocks_no_asserts(self):
395-
text = """```py runnable:test_clean
397+
def test_clean_runnable_blocks_nodoc_single_line(self):
398+
"""A line marked with # nodoc is removed."""
399+
text = """```py runnable:test_nodoc
396400
from transformers import pipeline
397401
pipe = pipeline("sentiment-analysis")
402+
result = pipe("test") # nodoc
398403
print(pipe("I love this!"))
399404
```"""
400405
expected = """```py
@@ -404,60 +409,76 @@ def test_clean_runnable_blocks_no_asserts(self):
404409
```"""
405410
self.assertEqual(clean_runnable_blocks(text), expected)
406411

407-
def test_clean_runnable_blocks_leaves_normal_blocks(self):
408-
text = """```py
409-
assert x == 1
410-
print(x)
411-
```"""
412-
# Normal blocks without runnable: should be untouched
413-
self.assertEqual(clean_runnable_blocks(text), text)
414-
415-
def test_clean_runnable_blocks_collapses_blank_lines(self):
416-
text = """```py runnable:test_blanks
417-
x = 1
412+
def test_clean_runnable_blocks_nodoc_multiline_parens(self):
413+
"""A multi-line statement marked with # nodoc is fully removed."""
414+
text = """```py runnable:test_nodoc_multi
415+
result = compute()
418416
419-
assert x == 1
417+
EXPECTED_OUTPUT = ( # nodoc
418+
"first value"
419+
+ "second value"
420+
)
420421
421-
y = 2
422+
print(result)
422423
```"""
423424
expected = """```py
424-
x = 1
425+
result = compute()
425426
426-
y = 2
427+
print(result)
427428
```"""
428429
self.assertEqual(clean_runnable_blocks(text), expected)
429430

430-
def test_clean_runnable_blocks_assert_with_parens(self):
431-
text = """```py runnable:test_parens
432-
x = compute()
433-
assert(x > 0)
431+
def test_clean_runnable_blocks_nodoc_multiline_brackets(self):
432+
"""Multi-line list with # nodoc tracked via bracket depth."""
433+
text = """```py runnable:test_nodoc_brackets
434+
x = do_work()
435+
expected = [ # nodoc
436+
1,
437+
2,
438+
3,
439+
]
434440
print(x)
435441
```"""
436442
expected = """```py
437-
x = compute()
443+
x = do_work()
438444
print(x)
439445
```"""
440446
self.assertEqual(clean_runnable_blocks(text), expected)
441447

442-
def test_clean_runnable_blocks_for_loop_with_assert_only(self):
443-
"""A for-loop whose body is only an assert should be removed entirely."""
444-
text = """```py runnable:test_for_assert
448+
def test_clean_runnable_blocks_nodoc_for_loop(self):
449+
"""A for-loop marked with # nodoc is removed with its body."""
450+
text = """```py runnable:test_nodoc_for
445451
inputs = prepare()
446452
447-
for key in inputs:
448-
assert torch.equal(inputs[key], inputs_transcription[key])
453+
for key in inputs: # nodoc
454+
do_something(inputs[key])
449455
450456
outputs = model.generate(**inputs)
451457
```"""
452458
expected = """```py
453459
inputs = prepare()
454460
455461
outputs = model.generate(**inputs)
462+
```"""
463+
self.assertEqual(clean_runnable_blocks(text), expected)
464+
465+
def test_clean_runnable_blocks_nodoc_collapses_blank_lines(self):
466+
text = """```py runnable:test_blanks
467+
x = 1
468+
469+
y = 2 # nodoc
470+
471+
z = 3
472+
```"""
473+
expected = """```py
474+
x = 1
475+
476+
z = 3
456477
```"""
457478
self.assertEqual(clean_runnable_blocks(text), expected)
458479

459480
def test_clean_runnable_blocks_glmasr_batched(self):
460-
"""Real-world test from huggingface/transformers PR #44277 — test_batched block."""
481+
"""Real-world test from huggingface/transformers PR #44277 — test_batched block with # nodoc."""
461482
text = '''```py runnable:test_batched
462483
import torch
463484
from transformers import AutoProcessor, GlmAsrForConditionalGeneration
@@ -498,14 +519,14 @@ def test_clean_runnable_blocks_glmasr_batched(self):
498519
conversation, tokenize=True, add_generation_prompt=True, return_dict=True
499520
).to(model.device, dtype=model.dtype)
500521
501-
inputs_transcription = processor.apply_transcription_request(
522+
inputs_transcription = processor.apply_transcription_request( # nodoc
502523
[
503524
"https://huggingface.co/datasets/eustlb/audio-samples/resolve/main/bcn_weather.mp3",
504525
"https://huggingface.co/datasets/eustlb/audio-samples/resolve/main/obama2.mp3",
505526
],
506527
).to(model.device, dtype=model.dtype)
507528
508-
for key in inputs:
529+
for key in inputs: # nodoc
509530
assert torch.equal(inputs[key], inputs_transcription[key])
510531
511532
outputs = model.generate(**inputs, do_sample=False, max_new_tokens=500)
@@ -514,11 +535,11 @@ def test_clean_runnable_blocks_glmasr_batched(self):
514535
outputs[:, inputs.input_ids.shape[1] :], skip_special_tokens=True
515536
)
516537
517-
EXPECTED_OUTPUT = [
538+
EXPECTED_OUTPUT = [ # nodoc
518539
"Yesterday it was thirty five degrees in Barcelona, but today the temperature will go down to minus twenty degrees.",
519540
"This week, I traveled to Chicago to deliver my final farewell address to the nation.",
520541
]
521-
assert decoded_outputs == EXPECTED_OUTPUT
542+
assert decoded_outputs == EXPECTED_OUTPUT # nodoc
522543
```'''
523544
expected = '''```py
524545
import torch
@@ -560,110 +581,12 @@ def test_clean_runnable_blocks_glmasr_batched(self):
560581
conversation, tokenize=True, add_generation_prompt=True, return_dict=True
561582
).to(model.device, dtype=model.dtype)
562583
563-
inputs_transcription = processor.apply_transcription_request(
564-
[
565-
"https://huggingface.co/datasets/eustlb/audio-samples/resolve/main/bcn_weather.mp3",
566-
"https://huggingface.co/datasets/eustlb/audio-samples/resolve/main/obama2.mp3",
567-
],
568-
).to(model.device, dtype=model.dtype)
569-
570584
outputs = model.generate(**inputs, do_sample=False, max_new_tokens=500)
571585
572586
decoded_outputs = processor.batch_decode(
573587
outputs[:, inputs.input_ids.shape[1] :], skip_special_tokens=True
574588
)
575-
576-
EXPECTED_OUTPUT = [
577-
"Yesterday it was thirty five degrees in Barcelona, but today the temperature will go down to minus twenty degrees.",
578-
"This week, I traveled to Chicago to deliver my final farewell address to the nation.",
579-
]
580-
```'''
581-
self.assertEqual(clean_runnable_blocks(text), expected)
582-
583-
def test_clean_runnable_blocks_backticks_in_string(self):
584-
"""Triple backticks inside a string literal should not close the block early."""
585-
text = '''```py runnable:test_backticks
586-
x = """```
587-
not a fence
588-
```"""
589-
assert x is not None
590-
print(x)
591589
```'''
592-
expected = '''```py
593-
x = """```
594-
not a fence
595-
```"""
596-
print(x)
597-
```'''
598-
self.assertEqual(clean_runnable_blocks(text), expected)
599-
600-
def test_clean_runnable_blocks_nodoc_single_line(self):
601-
"""A line marked with # nodoc is removed."""
602-
text = """```py runnable:test_nodoc
603-
from transformers import pipeline
604-
pipe = pipeline("sentiment-analysis")
605-
result = pipe("test") # nodoc
606-
print(pipe("I love this!"))
607-
```"""
608-
expected = """```py
609-
from transformers import pipeline
610-
pipe = pipeline("sentiment-analysis")
611-
print(pipe("I love this!"))
612-
```"""
613-
self.assertEqual(clean_runnable_blocks(text), expected)
614-
615-
def test_clean_runnable_blocks_nodoc_multiline_parens(self):
616-
"""A multi-line statement marked with # nodoc is fully removed."""
617-
text = """```py runnable:test_nodoc_multi
618-
result = compute()
619-
620-
EXPECTED_OUTPUT = [ # nodoc
621-
"first value",
622-
"second value",
623-
]
624-
assert result == EXPECTED_OUTPUT
625-
626-
print(result)
627-
```"""
628-
expected = """```py
629-
result = compute()
630-
631-
print(result)
632-
```"""
633-
self.assertEqual(clean_runnable_blocks(text), expected)
634-
635-
def test_clean_runnable_blocks_nodoc_for_loop(self):
636-
"""A for-loop marked with # nodoc is removed with its body."""
637-
text = """```py runnable:test_nodoc_for
638-
inputs = prepare()
639-
640-
for key in inputs: # nodoc
641-
assert torch.equal(inputs[key], other[key])
642-
643-
outputs = model.generate(**inputs)
644-
```"""
645-
expected = """```py
646-
inputs = prepare()
647-
648-
outputs = model.generate(**inputs)
649-
```"""
650-
self.assertEqual(clean_runnable_blocks(text), expected)
651-
652-
def test_clean_runnable_blocks_nodoc_multiline_brackets(self):
653-
"""Multi-line list with # nodoc tracked via bracket depth."""
654-
text = """```py runnable:test_nodoc_brackets
655-
x = do_work()
656-
expected = [ # nodoc
657-
1,
658-
2,
659-
3,
660-
]
661-
print(x)
662-
```"""
663-
expected = """```py
664-
x = do_work()
665-
print(x)
666-
```"""
667590
self.assertEqual(clean_runnable_blocks(text), expected)
668591

669592
def test_clean_runnable_blocks_glmasr_basic(self):

0 commit comments

Comments
 (0)