Skip to content

Commit fba08d0

Browse files
committed
feat: add backtick standardization to clean up markdown-code-runner tags in output files and protect them when doing an in-place replacement.
Needed because, systems like mkdocs, pandoc are unable to parse fenced blocks if they have extra arguments other than the language to be highlighted.
1 parent ab5e928 commit fba08d0

File tree

8 files changed

+457
-51
lines changed

8 files changed

+457
-51
lines changed

.gitignore

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Python
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
*.so
6+
.Python
7+
build/
8+
develop-eggs/
9+
dist/
10+
downloads/
11+
eggs/
12+
.eggs/
13+
lib/
14+
lib64/
15+
parts/
16+
sdist/
17+
var/
18+
wheels/
19+
*.egg-info/
20+
.installed.cfg
21+
*.egg
22+
23+
# Virtual Environment
24+
venv/
25+
venv-*
26+
env/
27+
ENV/
28+
29+
# IDE
30+
.idea/
31+
.vscode/
32+
*.swp
33+
*.swo
34+
35+
# Misc
36+
.DS_Store
37+
.env
38+
.coverage
39+
htmlcov/

markdown_code_runner.py

Lines changed: 104 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
```bash markdown-code-runner
3535
echo "Hello, world!"
3636
```
37-
Which will similarly print the output of the code block between next to the output markers.
37+
Which will similarly print the output of the code block between the next output markers.
3838
3939
"""
4040

@@ -45,6 +45,7 @@
4545
import io
4646
import os
4747
import re
48+
import sys
4849
import subprocess
4950
from dataclasses import dataclass, field
5051
from importlib.metadata import PackageNotFoundError, version
@@ -78,6 +79,12 @@ def md_comment(text: str) -> str:
7879
"code:backticks:end": "```",
7980
}
8081

82+
# List of all start markers for easier checking
83+
START_MARKERS = [
84+
marker for marker in MARKERS
85+
if marker.endswith(":start")
86+
]
87+
8188

8289
def markers_to_patterns() -> dict[str, re.Pattern]:
8390
"""Convert the markers to regular expressions."""
@@ -165,21 +172,30 @@ def _extract_backtick_options(line: str) -> dict[str, str]:
165172
"""Extract extra information from a line."""
166173
if "```" not in line:
167174
return {}
175+
176+
# First try to match with markdown-code-runner
168177
language_pattern = r"```(?P<language>\w+) markdown-code-runner"
169-
extra_pattern = r"(?P<key>\w+)=(?P<value>\S+)"
170-
171178
language_match = re.search(language_pattern, line)
172-
assert language_match is not None
179+
180+
# If no match, try to match just the language
181+
if language_match is None:
182+
language_pattern = r"```(?P<language>\w+)"
183+
language_match = re.search(language_pattern, line)
184+
if language_match is None:
185+
return {}
186+
173187
language = language_match.group("language")
174188
result = {"language": language}
175-
176-
extra_str = line[language_match.end() :]
177-
extra_matches = re.finditer(extra_pattern, extra_str)
178-
179-
for match in extra_matches:
180-
key, value = match.group("key"), match.group("value")
181-
result[key] = value
182-
189+
190+
# Only look for extra options if markdown-code-runner is present
191+
if "markdown-code-runner" in line:
192+
extra_pattern = r"(?P<key>\w+)=(?P<value>\S+)"
193+
extra_str = line[language_match.end() :]
194+
extra_matches = re.finditer(extra_pattern, extra_str)
195+
for match in extra_matches:
196+
key, value = match.group("key"), match.group("value")
197+
result[key] = value
198+
183199
return result
184200

185201

@@ -203,6 +219,7 @@ class ProcessingState:
203219
output: list[str] | None = None
204220
new_lines: list[str] = field(default_factory=list)
205221
backtick_options: dict[str, Any] = field(default_factory=dict)
222+
backtick_standardize: bool = True
206223

207224
def process_line(self, line: str, *, verbose: bool = False) -> None:
208225
"""Process a line of the Markdown file."""
@@ -219,19 +236,46 @@ def process_line(self, line: str, *, verbose: bool = False) -> None:
219236
elif self.section == "output":
220237
self.original_output.append(line)
221238
else:
222-
self._process_start_markers(line)
239+
processed_line = self._process_start_markers(line, verbose=verbose)
240+
if processed_line:
241+
line = processed_line
223242

224243
if self.section != "output":
225244
self.new_lines.append(line)
226245

227-
def _process_start_markers(self, line: str) -> None:
228-
for marker in MARKERS:
229-
if marker.endswith(":start") and is_marker(line, marker):
246+
def _process_start_markers(self, line: str, verbose: bool = False) -> None:
247+
for marker in START_MARKERS:
248+
if is_marker(line, marker):
230249
# reset output in case previous output wasn't displayed
231250
self.output = None
232251
self.backtick_options = _extract_backtick_options(line)
233252
self.section, _ = marker.rsplit(":", 1) # type: ignore[assignment]
234-
return
253+
processed_line = line
254+
if marker == "code:backticks:start":
255+
if verbose:
256+
print(f"Found marker {marker} in line {line}")
257+
processed_line = self._process_backticks_start(line)
258+
return processed_line
259+
260+
def _process_backticks_start(self, line: str) -> str:
261+
"""Process backticks start marker and standardize if needed.
262+
263+
Args:
264+
line: The line containing backticks start marker
265+
266+
Returns:
267+
Processed line with markdown-code-runner removed if standardization is enabled
268+
"""
269+
language_match = re.search(r"```(?P<language>\w+)", line)
270+
if not (language_match and self.backtick_standardize):
271+
return line
272+
273+
if "markdown-code-runner" not in line:
274+
return line
275+
276+
# Remove markdown-code-runner and any text after it from the line
277+
processed_line = re.sub(r'\smarkdown-code-runner.*(?=```|$)', '', line)
278+
return processed_line
235279

236280
def _process_output_start(self, line: str) -> None:
237281
self.section = "output"
@@ -292,7 +336,7 @@ def _process_backtick_code(self, line: str, *, verbose: bool) -> None:
292336
self._process_code(line, "code:backticks:end", language, verbose=verbose)
293337

294338

295-
def process_markdown(content: list[str], *, verbose: bool = False) -> list[str]:
339+
def process_markdown(content: list[str], *, verbose: bool = False, backtick_standardize: bool = True) -> list[str]:
296340
"""Executes code blocks in a list of Markdown-formatted strings and returns the modified list.
297341
298342
Parameters
@@ -301,6 +345,8 @@ def process_markdown(content: list[str], *, verbose: bool = False) -> list[str]:
301345
A list of Markdown-formatted strings.
302346
verbose
303347
If True, print every line that is processed.
348+
backtick_standardize
349+
If True, clean up markdown-code-runner string from backtick code blocks.
304350
305351
Returns
306352
-------
@@ -309,14 +355,13 @@ def process_markdown(content: list[str], *, verbose: bool = False) -> list[str]:
309355
310356
"""
311357
assert isinstance(content, list), "Input must be a list"
312-
state = ProcessingState()
358+
state = ProcessingState(backtick_standardize=backtick_standardize)
313359

314360
for i, line in enumerate(content):
315361
if verbose:
316362
nr = _bold(f"line {i:4d}")
317363
print(f"{nr}: {line}")
318-
state.process_line(line, verbose=verbose)
319-
364+
line = state.process_line(line, verbose=verbose)
320365
return state.new_lines
321366

322367

@@ -325,15 +370,28 @@ def update_markdown_file(
325370
output_filepath: Path | str | None = None,
326371
*,
327372
verbose: bool = False,
373+
backtick_standardize: bool = True,
328374
) -> None:
329-
"""Rewrite a Markdown file by executing and updating code blocks."""
375+
"""Rewrite a Markdown file by executing and updating code blocks.
376+
377+
Parameters
378+
----------
379+
input_filepath : Path | str
380+
Path to the input Markdown file.
381+
output_filepath : Path | str | None
382+
Path to the output Markdown file. If None, overwrites input file.
383+
verbose : bool
384+
If True, print every line that is processed.
385+
backtick_standardize : bool
386+
If True, clean up markdown-code-runner string from backtick code blocks.
387+
"""
330388
if isinstance(input_filepath, str): # pragma: no cover
331389
input_filepath = Path(input_filepath)
332390
with input_filepath.open() as f:
333391
original_lines = [line.rstrip("\n") for line in f.readlines()]
334392
if verbose:
335393
print(f"Processing input file: {input_filepath}")
336-
new_lines = process_markdown(original_lines, verbose=verbose)
394+
new_lines = process_markdown(original_lines, verbose=verbose, backtick_standardize=backtick_standardize)
337395
updated_content = "\n".join(new_lines).rstrip() + "\n"
338396
if verbose:
339397
print(f"Writing output to: {output_filepath}")
@@ -375,12 +433,34 @@ def main() -> None:
375433
action="version",
376434
version=f"%(prog)s {__version__}",
377435
)
436+
parser.add_argument(
437+
"--backtick-standardize",
438+
action="store_true",
439+
help="Clean up markdown-code-runner string from backtick code blocks (default: True when output file is specified)",
440+
default=None,
441+
)
442+
parser.add_argument(
443+
"--force-overwrite",
444+
action="store_true",
445+
help="Required when using backtick-standardize option with input file overwrite",
446+
default=False,
447+
)
378448

379449
args = parser.parse_args()
380450

381451
input_filepath = Path(args.input)
382452
output_filepath = Path(args.output) if args.output is not None else input_filepath
383-
update_markdown_file(input_filepath, output_filepath, verbose=args.verbose)
453+
454+
# Determine backtick standardization
455+
if args.output is None: # Overwriting input file
456+
if args.backtick_standardize and not args.force_overwrite:
457+
print("Error: This will overwrite your file. Please use the --force-overwrite option in conjunction to set backtick-standardize option to true", file=sys.stderr)
458+
sys.exit(1)
459+
backtick_standardize = args.backtick_standardize and args.force_overwrite
460+
else: # Writing to different output file
461+
backtick_standardize = args.backtick_standardize if args.backtick_standardize is not None else True
462+
463+
update_markdown_file(input_filepath, output_filepath, verbose=args.verbose, backtick_standardize=backtick_standardize)
384464

385465

386466
if __name__ == "__main__":

tests/test_backtick_stdz.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Backtick Standardization Test
2+
3+
This file tests various backtick code block scenarios.
4+
5+
## Basic Code Block
6+
A simple Python code block with markdown-code-runner:
7+
Currently no options are supported for backtick code blocks. May be used in the future.
8+
9+
```python markdown-code-runner filename=test1.py
10+
print("Basic test")
11+
```
12+
13+
## Code Block with Multiple Options
14+
Testing multiple options in the backtick header:
15+
16+
```javascript markdown-code-runner filename=test2.js debug=true skip=false
17+
console.log("Multiple options test")
18+
```
19+
20+
## Language-only Block
21+
This block should remain unchanged during standardization:
22+
23+
```rust
24+
fn main() {
25+
println!("No markdown-code-runner");
26+
}
27+
```
28+
29+
## Complex Options Block
30+
Testing complex options and spacing:
31+
32+
```python markdown-code-runner filename=test3.py debug=true skip=false
33+
print("Testing spaces in options")
34+
```
35+
36+
## Empty Language Block
37+
Testing block with no language:
38+
39+
```markdown-code-runner filename=test4.txt
40+
Just some plain text
41+
```
42+
43+
## Mixed Content Block
44+
Testing with additional content after options:
45+
46+
```python markdown-code-runner filename=test5.py some random text here
47+
print("Mixed content test")
48+
```

0 commit comments

Comments
 (0)