diff --git a/docs/advanced-options.md b/docs/advanced-options.md index c4514a85..cd7b7bf4 100644 --- a/docs/advanced-options.md +++ b/docs/advanced-options.md @@ -33,6 +33,33 @@ Finally, note that you can _hide_ the notebook metadata in an HTML comment in `. In the `percent` and `light` script formats, magic commands (Jupyter commands prefixed by `%` or `%%`) are commented out in scripts. You can change this by using the `comment_magics` option, either in the `jupytext.toml` file or at the command line with `jupytext --opt`. +## Custom language magics + +In Markdown and R Markdown notebooks, code blocks in languages with built-in Jupyter magic support (e.g. `javascript`, `bash`, `sql`) are automatically converted to code cells using the corresponding `%%language` magic. However, code blocks in languages not covered by built-in magics (e.g. `jsx`) remain as Markdown cells by default. + +If you have written custom Jupyter magics for additional languages, you can tell Jupytext about them using the `custom_language_magics` option. For example, if you have a custom `%%jsx` magic, add the following to your [`jupytext.toml`](config.md) file: + +```toml +custom_language_magics = ["jsx"] +``` + +With this option set, Markdown code blocks like: + +````markdown +```jsx +const Hello = () => (Hello); +``` +```` + +will be converted to code cells with the `%%jsx` magic: + +``` +%%jsx +const Hello = () => (Hello); +``` + +When converting a notebook (`.ipynb`) to Markdown, Jupytext automatically detects cells using non-standard language magics and records them in the `custom_language_magics` notebook metadata, ensuring that the round-trip conversion (`.ipynb` → `.md` → `.ipynb`) preserves such cells correctly. + ## Active and inactive cells You might want to make some cell active only when the notebook is run in Jupyter, or active only when the `.py` file is interpreted by Python. To do so, add an `active-ipynb` tag to the cells that should only be executed in the `.ipynb` file, and an `active-py` tag to the cells that should be executed only in the Python script. diff --git a/src/jupytext/cell_reader.py b/src/jupytext/cell_reader.py index dc60af21..26226572 100644 --- a/src/jupytext/cell_reader.py +++ b/src/jupytext/cell_reader.py @@ -320,10 +320,20 @@ def __init__(self, fmt=None, default_language=None): self.split_at_heading = (fmt or {}).get("split_at_heading", False) self.in_region = False self.in_raw = False + custom_language_magics = (fmt or {}).get("custom_language_magics", []) + if isinstance(custom_language_magics, str): + custom_language_magics = [m for m in custom_language_magics.split(",") if m] + # Store the set of custom language magics (including upper-case variants) for matching + self.custom_language_magics = set(custom_language_magics) | {lang.upper() for lang in custom_language_magics} if self.format_version in ["1.0", "1.1"] and self.ext != ".Rmd": # Restore the pattern used in Markdown <= 1.1 self.start_code_re = re.compile(r"^```(.*)") self.non_jupyter_code_re = re.compile(r"^```\{") + elif self.custom_language_magics: + all_languages = _JUPYTER_LANGUAGES_LOWER_AND_UPPER | self.custom_language_magics + self.start_code_re = re.compile( + r"^```(`*)(\s*)({})($|\s.*$)".format("|".join(all_languages).replace("+", "\\+")) + ) def metadata_and_language_from_option_line(self, line): match_region = self.start_region_re.match(line) @@ -352,7 +362,13 @@ def metadata_and_language_from_option_line(self, line): def options_to_metadata(self, options): if isinstance(options, tuple): - self.end_code_re = re.compile("```" + options[0]) + # The start_code_re groups are: (extra_backticks, space, language, rest) + extra_backticks, _space, language, rest = options + self.end_code_re = re.compile("```" + extra_backticks) + if language in self.custom_language_magics: + self.cell_metadata_json = self.cell_metadata_json or is_json_metadata(rest) + _title, meta = text_to_metadata(rest) + return language, meta options = " ".join(options[1:]) else: self.end_code_re = re.compile(r"^```\s*$") @@ -398,7 +414,10 @@ def find_cell_end(self, lines): language, metadata = self.options_to_metadata(self.start_code_re.findall(line)[0]) # Cells with a .noeval attribute are markdown cells #347 # R Markdown notebooks can have bibliography and index blocks, cf #1161 and #1429 - if language not in _JUPYTER_LANGUAGES_LOWER_AND_UPPER or metadata.get(".noeval", "") is None: + if ( + language not in _JUPYTER_LANGUAGES_LOWER_AND_UPPER + and language not in self.custom_language_magics + ) or metadata.get(".noeval", "") is None: in_explicit_code_block = True prev_blank = 0 continue diff --git a/src/jupytext/cell_to_text.py b/src/jupytext/cell_to_text.py index 5972850f..b16069d5 100644 --- a/src/jupytext/cell_to_text.py +++ b/src/jupytext/cell_to_text.py @@ -65,7 +65,10 @@ def __init__(self, cell, default_language, fmt=None, unsupported_keys=None): ) if self.parse_cell_language: custom_cell_magics = self.fmt.get("custom_cell_magics", "").split(",") - self.language, magic_args = cell_language(self.source, default_language, custom_cell_magics) + custom_language_magics = self.fmt.get("custom_language_magics", []) + if isinstance(custom_language_magics, str): + custom_language_magics = [m for m in custom_language_magics.split(",") if m] + self.language, magic_args = cell_language(self.source, default_language, custom_cell_magics + list(custom_language_magics)) if magic_args: self.metadata["magic_args"] = magic_args diff --git a/src/jupytext/config.py b/src/jupytext/config.py index 331040ca..29864bde 100644 --- a/src/jupytext/config.py +++ b/src/jupytext/config.py @@ -173,6 +173,15 @@ class JupytextConfiguration(Configurable): config=True, ) + custom_language_magics = List( + Unicode(), + help="A list of additional language magics. Use e.g. " + 'custom_language_magics = ["jsx", "tsx"] if you have custom magics for those languages ' + "and want Markdown code blocks in those languages to be converted to code cells " + "with the appropriate cell magic.", + config=True, + ) + def set_default_format_options(self, format_options, read=False): """Set default format option""" if self.default_notebook_metadata_filter: @@ -216,6 +225,8 @@ def set_default_format_options(self, format_options, read=False): format_options.setdefault("rst2md", self.sphinx_convert_rst2md) if self.custom_cell_magics: format_options.setdefault("custom_cell_magics", self.custom_cell_magics) + if self.custom_language_magics: + format_options.setdefault("custom_language_magics", self.custom_language_magics) def default_formats(self, path): """Return the default formats, if they apply to the current path #157""" diff --git a/src/jupytext/formats.py b/src/jupytext/formats.py index a5234248..3a164eaf 100644 --- a/src/jupytext/formats.py +++ b/src/jupytext/formats.py @@ -706,6 +706,7 @@ def short_form_multiple_formats(jupytext_formats: list[dict[str, str]]) -> str: "cell_metadata_filter", "cell_markers", "custom_cell_magics", + "custom_language_magics", ] _VALID_FORMAT_NAMES = {fmt.format_name for fmt in JUPYTEXT_FORMATS} diff --git a/src/jupytext/jupytext.py b/src/jupytext/jupytext.py index f714d5e8..21657abe 100644 --- a/src/jupytext/jupytext.py +++ b/src/jupytext/jupytext.py @@ -34,6 +34,7 @@ metadata_to_metadata_and_cell, ) from .languages import ( + _JUPYTER_LANGUAGES_LOWER_AND_UPPER, _SCRIPT_EXTENSIONS, default_language_from_metadata_and_ext, set_main_and_cell_language, @@ -146,7 +147,10 @@ def reads(self, s, **_): lines = lines[pos:] custom_cell_magics = self.fmt.get("custom_cell_magics", "").split(",") - set_main_and_cell_language(metadata, cells, self.implementation.extension, custom_cell_magics) + custom_language_magics = self.fmt.get("custom_language_magics", []) + if isinstance(custom_language_magics, str): + custom_language_magics = [m for m in custom_language_magics.split(",") if m] + set_main_and_cell_language(metadata, cells, self.implementation.extension, custom_cell_magics + list(custom_language_magics)) cell_metadata = set() for cell in cells: cell_metadata.update(cell.metadata.keys()) @@ -242,6 +246,24 @@ def writes(self, nb, metadata=None, **kwargs): self.fmt["use_runtools"] = True break + # Auto-detect non-standard language magics and record them in custom_language_magics + if self.ext in [".md", ".markdown", ".Rmd"]: + custom_cell_magics_list = self.fmt.get("custom_cell_magics", "").split(",") + existing_custom_language_magics = self.fmt.get("custom_language_magics", []) + if isinstance(existing_custom_language_magics, str): + existing_custom_language_magics = [m for m in existing_custom_language_magics.split(",") if m] + detected_magics = set(existing_custom_language_magics) + for cell in nb.cells: + if cell.cell_type == "code" and cell.source: + first_line = cell.source.split("\n")[0] + if first_line.startswith("%%"): + lang = first_line[2:].split(" ")[0].strip() + if lang and lang not in _JUPYTER_LANGUAGES_LOWER_AND_UPPER and lang not in custom_cell_magics_list: + detected_magics.add(lang) + if detected_magics != set(existing_custom_language_magics): + self.fmt["custom_language_magics"] = sorted(detected_magics) + metadata.setdefault("jupytext", {})["custom_language_magics"] = self.fmt["custom_language_magics"] + header = encoding_and_executable(nb, metadata, self.ext) unsupported_keys = set() header_content, header_lines_to_next_cell = metadata_and_cell_to_header( diff --git a/tests/functional/others/test_custom_language_magics.py b/tests/functional/others/test_custom_language_magics.py new file mode 100644 index 00000000..6c4e0644 --- /dev/null +++ b/tests/functional/others/test_custom_language_magics.py @@ -0,0 +1,111 @@ +import nbformat +import pytest +from nbformat.v4.nbbase import new_code_cell, new_markdown_cell, new_notebook + +import jupytext +from jupytext.cli import jupytext as jupytext_cli +from jupytext.compare import compare_notebooks +from jupytext.languages import _JUPYTER_LANGUAGES_LOWER_AND_UPPER + + +def test_custom_language_magics_md_to_ipynb(tmpdir): + """Code blocks in custom language magics should be converted to code cells""" + assert "jsx" not in _JUPYTER_LANGUAGES_LOWER_AND_UPPER + + # Pass custom_language_magics directly via fmt options + md = """```python +print("hello") +``` + +```jsx +const Hello = () => (Hello); +``` +""" + nb = jupytext.reads(md, fmt={"extension": ".md", "custom_language_magics": ["jsx"]}) + assert len(nb.cells) == 2 + assert nb.cells[0].cell_type == "code" + assert nb.cells[0].source == 'print("hello")' + assert nb.cells[1].cell_type == "code" + assert nb.cells[1].source == "%%jsx\nconst Hello = () => (Hello);" + + +def test_custom_language_magics_ipynb_to_md(tmpdir): + """When writing to markdown with non-standard magic, custom_language_magics is auto-detected""" + assert "jsx" not in _JUPYTER_LANGUAGES_LOWER_AND_UPPER + + nb = new_notebook( + cells=[ + new_code_cell('print("hello")'), + new_code_cell("%%jsx\nconst Hello = () => (Hello);"), + ] + ) + + md = jupytext.writes(nb, fmt="md") + assert "custom_language_magics:" in md + assert "jsx" in md + assert "```jsx" in md + + +def test_custom_language_magics_roundtrip(tmpdir): + """Round-trip: md with custom language magic -> ipynb -> md should preserve the code cell""" + assert "jsx" not in _JUPYTER_LANGUAGES_LOWER_AND_UPPER + + nb = new_notebook( + cells=[ + new_code_cell('print("hello")'), + new_code_cell("%%jsx\nconst Hello = () => (Hello);"), + ] + ) + + # ipynb -> md + md = jupytext.writes(nb, fmt="md") + assert "```jsx" in md + + # md -> ipynb + nb2 = jupytext.reads(md, fmt="md") + assert len(nb2.cells) == 2 + assert nb2.cells[1].cell_type == "code" + assert nb2.cells[1].source == "%%jsx\nconst Hello = () => (Hello);" + + +def test_custom_language_magics_config_file(tmpdir): + """custom_language_magics from jupytext.toml should allow md -> ipynb conversion""" + assert "jsx" not in _JUPYTER_LANGUAGES_LOWER_AND_UPPER + + cfg_file = tmpdir.join("jupytext.toml") + cfg_file.write('custom_language_magics = ["jsx"]') + + md_file = tmpdir.join("notebook.md") + nb_file = tmpdir.join("notebook.ipynb") + + md_file.write( + """```python +print("hello") +``` + +```jsx +const Hello = () => (Hello); +``` +""" + ) + + jupytext_cli([str(md_file), "--to", "notebook"]) + nb = nbformat.read(str(nb_file), as_version=4) + + assert len(nb.cells) == 2 + assert nb.cells[1].cell_type == "code" + assert nb.cells[1].source == "%%jsx\nconst Hello = () => (Hello);" + + +def test_custom_language_magics_not_markdown_cell(tmpdir): + """Without custom_language_magics, jsx code blocks should remain as markdown cells""" + assert "jsx" not in _JUPYTER_LANGUAGES_LOWER_AND_UPPER + + md = """```jsx +const Hello = () => (Hello); +``` +""" + nb = jupytext.reads(md, fmt="md") + # Without custom_language_magics, this should be a markdown cell + assert len(nb.cells) == 1 + assert nb.cells[0].cell_type == "markdown"