Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
eefaee9
Draft the singlemarkdown builder
akaihola May 17, 2025
4e5e8c8
Implement SingleFileMarkdownBuilder for consolidated output
akaihola May 17, 2025
1ee2773
Fix singlemarkdown builder registration to prevent duplicate registra…
akaihola May 17, 2025
cf783ab
Fix: Add render_partial method to SingleFileMarkdownBuilder class
akaihola May 17, 2025
e57d242
Add comprehensive type hints to improve static type checking
akaihola May 17, 2025
37106b4
Add type annotations and improve docstrings in translator modules
akaihola May 17, 2025
33af65c
Add comprehensive tests for singlemarkdown builder and improve type a…
akaihola May 17, 2025
32c3774
Add integration tests for singlemarkdown builder
akaihola May 17, 2025
c9e1f5c
Improve type annotations and use Path objects consistently in test_si…
akaihola May 17, 2025
bae63d0
Make type hints Python 3.9 compatible
akaihola Jul 16, 2025
b45f7a7
This PR requires Python 3.9, make the package py39+ dependent
akaihola Jul 16, 2025
e78f773
Ensure tests leave no temporary directories behind
akaihola Oct 31, 2025
ff039c1
Add two failing tests for singlemarkdown
akaihola Oct 31, 2025
bed1bd7
Merge remote-tracking branch 'liran-funaro/main' into singlemarkdown
AdrianAtZyte Mar 24, 2026
fb90c46
Add tip support
AdrianAtZyte Mar 24, 2026
4699559
KISS
AdrianAtZyte Mar 24, 2026
7d93a8e
Add an expected single file to tests
AdrianAtZyte Mar 24, 2026
d809c7f
Include single.md in the built markdown folder for diff checks
AdrianAtZyte Mar 24, 2026
66bf07b
Fix only not affecting singlemarkdown
AdrianAtZyte Mar 24, 2026
e54b9ac
Add singlemarkdown_flavor with support for llm
AdrianAtZyte Mar 24, 2026
71490a7
Adjust header levels
AdrianAtZyte Mar 24, 2026
1ee7ac7
Render admonitions like .. contents
AdrianAtZyte Mar 24, 2026
fc9d850
Fix the document order
AdrianAtZyte Mar 24, 2026
2050ffa
Address linting issues
AdrianAtZyte Mar 24, 2026
54daafe
Fix Sphinx 7 support
AdrianAtZyte Mar 26, 2026
a90b3d8
Support Sphinx 7
AdrianAtZyte Mar 26, 2026
f689370
Improve syntax support
AdrianAtZyte Mar 26, 2026
8b654c2
Additional fixes
AdrianAtZyte Mar 30, 2026
204b6b2
Fix generation
AdrianAtZyte Mar 30, 2026
ac9f8ba
Prevent MagicMock folder generation
AdrianAtZyte Mar 30, 2026
9400a76
Support the llm flavor in the regular builder as well
AdrianAtZyte Apr 9, 2026
3b9144e
Provide a nice rendering for sphinx-design grid items
AdrianAtZyte Apr 10, 2026
ebb72ab
Support (extended) footnote markdown syntax
AdrianAtZyte Apr 10, 2026
ea6b0d0
Do not add <br> for llm flavor on |
AdrianAtZyte Apr 10, 2026
d3c69da
Remove br tags from titles in llm flavor
AdrianAtZyte Apr 10, 2026
4e9ccc6
Remove br tags from table cells in llm flavor
AdrianAtZyte Apr 10, 2026
2927a78
Remove internal links from LLM-flavored single file
AdrianAtZyte Apr 10, 2026
d12c2d1
Update test expectations
AdrianAtZyte Apr 10, 2026
383edca
Address lint issues
AdrianAtZyte Apr 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,31 @@ clean:
# Catch-all target: route all unknown targets to Sphinx using the new "make mode" option.
# $(O) is meant as a shortcut for $(SPHINX_OPTS).
doc-%:
@$(SPHINX_BUILD) -M $* "$(SOURCE_DIR)" "$(BUILD_DIR)" $(SPHINX_OPTS) $(O) -a -t Partners
@$(SPHINX_BUILD) -M $* "$(SOURCE_DIR)" "$(BUILD_DIR)" $(SPHINX_OPTS) $(O) -a -t Partners -j 8


docs: doc-markdown
docs: doc-markdown doc-singlemarkdown


test-diff:
@echo "Building markdown..."
@$(SPHINX_BUILD) -M markdown "$(SOURCE_DIR)" "$(BUILD_DIR)" $(SPHINX_OPTS) $(O) -a -t Partners -j 8
@echo "Building docs..."
@$(MAKE) docs

@echo "Building markdown with configuration overrides..."
@$(SPHINX_BUILD) -M markdown "$(SOURCE_DIR)" "$(BUILD_DIR)/overrides" $(SPHINX_OPTS) $(O) -a \
-D markdown_http_base="https://localhost" -D markdown_uri_doc_suffix=".html" \
-D markdown_docinfo=1 -D markdown_anchor_sections=1 -D markdown_anchor_signatures=1 \
-D autodoc_typehints=signature -D markdown_bullet=- -D markdown_flavor=github

@echo "Building singlemarkdown llms-full output..."
@$(SPHINX_BUILD) -M singlemarkdown "$(SOURCE_DIR)" "$(BUILD_DIR)/llm" $(SPHINX_OPTS) $(O) -a \
-D singlemarkdown_flavor=llm

@# Copy just the files for verification
@cp "$(BUILD_DIR)/overrides/markdown/auto-summery.md" "$(BUILD_DIR)/markdown/overrides-auto-summery.md"
@cp "$(BUILD_DIR)/overrides/markdown/auto-module.md" "$(BUILD_DIR)/markdown/overrides-auto-module.md"
@cp "$(BUILD_DIR)/singlemarkdown/index.md" "$(BUILD_DIR)/markdown/single.md"
@cp "$(BUILD_DIR)/llm/singlemarkdown/index.md" "$(BUILD_DIR)/markdown/llms-full.txt"

@echo "Verifies outputs..."
@diff --recursive --color=always --side-by-side --text --suppress-common-lines \
Expand Down
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ Build markdown files with `sphinx-build` command
sphinx-build -M markdown ./docs ./build
```

Build a single markdown file, containing all your documentation, with:
```sh
sphinx-build -M singlemarkdown ./docs ./build
```

## Configurations

You can add the following configurations to your `conf.py` file:
Expand All @@ -38,7 +43,11 @@ You can add the following configurations to your `conf.py` file:
* `markdown_uri_doc_suffix`: If set, all references will link to documents with this suffix.
* `markdown_file_suffix`: Sets the file extension for generated markdown files (default: `.md`).
* `markdown_bullet`: Sets the bullet marker.
* `markdown_flavor`: If set to `github`, output will suit GitHub's flavor of Markdown.
* `markdown_flavor`:
* If set to `github`, output will suit GitHub's flavor of Markdown.
* If set to `llm`, output will be optimized for LLM consumption.
* `singlemarkdown_flavor`: `markdown_flavor` override for the
single-markdown-file builder.

For example, if your `conf.py` file have the following configuration:

Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ description = "A Sphinx extension to add markdown generation support."
readme = "README.md"
authors = [{ name = "Liran Funaro", email = "liran.funaro@gmail.com" }]
license = "MIT"
license-files = ["LICENCE"]
license-files = ["LICENSE"]
classifiers = [
"Framework :: Sphinx :: Extension",
"Programming Language :: Python",
Expand All @@ -27,9 +27,11 @@ requires-python = ">=3.7"

[tool.poetry.plugins."sphinx.builders"]
"markdown" = "sphinx_markdown_builder"
"singlemarkdown" = "sphinx_markdown_builder.singlemarkdown"

[project.entry-points."sphinx.builders"]
"markdown" = "sphinx_markdown_builder"
"singlemarkdown" = "sphinx_markdown_builder.singlemarkdown"

[project.optional-dependencies]
dev = [
Expand Down
3 changes: 3 additions & 0 deletions sphinx_markdown_builder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
"""

from sphinx_markdown_builder.builder import MarkdownBuilder
from sphinx_markdown_builder.singlemarkdown import SingleFileMarkdownBuilder

__version__ = "0.6.10"
__docformat__ = "reStructuredText"


def setup(app):
app.add_builder(MarkdownBuilder)
app.add_builder(SingleFileMarkdownBuilder)
app.add_config_value("markdown_http_base", "", "html", str)
app.add_config_value("markdown_uri_doc_suffix", ".md", "html", str)
app.add_config_value("markdown_file_suffix", ".md", "html", str)
Expand All @@ -18,6 +20,7 @@ def setup(app):
app.add_config_value("markdown_docinfo", False, "html", bool)
app.add_config_value("markdown_bullet", "*", "html", str)
app.add_config_value("markdown_flavor", "", "html", str)
app.add_config_value("singlemarkdown_flavor", "", "html", str)

return {
"version": __version__,
Expand Down
3 changes: 3 additions & 0 deletions sphinx_markdown_builder/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from sphinx.util import logging
from sphinx.util.osutil import ensuredir, os_path

from sphinx_markdown_builder.llm import prepare_doctree_for_llm
from sphinx_markdown_builder.translator import MarkdownTranslator
from sphinx_markdown_builder.writer import MarkdownWriter

Expand Down Expand Up @@ -89,6 +90,8 @@ def write_doc(self, docname: str, doctree: nodes.document):
self.current_doc_name = docname
self.sec_numbers = self.env.toc_secnumbers.get(docname, {})
destination = StringOutput(encoding="utf-8")
if self.config.markdown_flavor == "llm":
doctree = prepare_doctree_for_llm(doctree)
self.writer.write(doctree, destination)
out_filename = os.path.join(self.outdir, f"{os_path(docname)}{self.out_suffix}")
ensuredir(os.path.dirname(out_filename))
Expand Down
57 changes: 33 additions & 24 deletions sphinx_markdown_builder/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class ContextStatus:
list_marker: Optional[ListMarker] = None # Current list marker
desc_type: Optional[str] = None # Current descriptor type
default_ref_internal: bool = False # Current default for internal reference
code_language: Optional[str] = None # Default language for subsequent code blocks


class SubContext:
Expand Down Expand Up @@ -215,44 +216,45 @@ def make(self):


class TableContext(SubContext):
def __init__(self, params=SubContextParams()):
def __init__(self, params=SubContextParams(), cell_breaker: str = "<br/>"):
super().__init__(params)
self.cell_breaker = cell_breaker
self.body: List[List[List[str]]] = []
self.headers: List[List[List[str]]] = []
self.internal_context = SubContext()

self.is_entry = False
self.is_header = False
self.is_body = False
# Pack boolean state flags into a single mapping to reduce the
# number of instance attributes (pylint R0902).
self._flags = {"entry": False, "header": False, "body": False}

@property
def active_output(self) -> List[List[List[str]]]:
if self.is_header:
if self._flags["header"]:
return self.headers
assert self.is_body
assert self._flags["body"]
return self.body

@property
def content(self):
if self.is_entry:
if self._flags["entry"]:
return self.active_output[-1][-1]
return self.internal_context.content

def enter_head(self):
assert not self.is_header and not self.is_body
self.is_header = True
assert not self._flags["header"] and not self._flags["body"]
self._flags["header"] = True

def exit_head(self):
assert self.is_header and not self.is_body
self.is_header = False
assert self._flags["header"] and not self._flags["body"]
self._flags["header"] = False

def enter_body(self):
assert not self.is_header and not self.is_body
self.is_body = True
assert not self._flags["header"] and not self._flags["body"]
self._flags["body"] = True

def exit_body(self):
assert self.is_body and not self.is_header
self.is_body = False
assert self._flags["body"] and not self._flags["header"]
self._flags["body"] = False

def enter_row(self):
self.active_output.append([])
Expand All @@ -261,17 +263,16 @@ def exit_row(self):
pass

def enter_entry(self):
self.is_entry = True
self._flags["entry"] = True
self.active_output[-1].append([])
self.ensure_eol_count = 0

def exit_entry(self):
assert self.is_entry
self.is_entry = False
assert self._flags["entry"]
self._flags["entry"] = False

@staticmethod
def make_row(row):
return ["".join(entries).replace("\n", "<br/>") for entries in row]
def make_row(self, row):
return ["".join(entries).replace("\n", getattr(self, "cell_breaker", "<br/>")) for entries in row]

def make(self):
ctx = SubContext()
Expand Down Expand Up @@ -327,8 +328,8 @@ def make(self):


class TitleContext(NoLineBreakContext):
def __init__(self, level: int, params=SubContextParams(2, 2)):
super().__init__("<br/>", params)
def __init__(self, level: int, params=SubContextParams(2, 2), breaker: str = "<br/>"):
super().__init__(breaker, params)
self.level = level

@property
Expand Down Expand Up @@ -377,7 +378,15 @@ def depart_label(self):
def make(self):
content = super().make()
label = self.label_body.make() or self.names
return f"* <a id='{self.ids}'>**[{label}]**</a> {content}"
# https://www.markdownguide.org/extended-syntax/#footnotes
lab = label.strip()
if not lab:
# Fallback to using the raw ids if label is empty
ids = self.ids
if isinstance(ids, (list, tuple)):
ids = ",".join(ids)
lab = str(ids)
return f"[^{lab}]: {content}"


_ContextT = TypeVar("_ContextT", bound=SubContext)
Expand Down
56 changes: 56 additions & 0 deletions sphinx_markdown_builder/llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Helpers to prune and normalize a docutils doctree for LLM-friendly output."""

from __future__ import annotations

from typing import cast

from docutils import nodes

_NAV_ARTIFACT_TEXTS = frozenset({"genindex", "modindex", "search"})


def _remove_node(node: nodes.Node) -> None:
if node.parent is not None:
node.parent.remove(node)


def _is_nav_artifact_list_item(node: nodes.list_item) -> bool:
text = " ".join(node.astext().split()).strip().lower()
return text in _NAV_ARTIFACT_TEXTS


def _remove_nav_artifact_lists(doc: nodes.document) -> None:
for bullet_list in list(doc.findall(nodes.bullet_list)):
list_items = [child for child in bullet_list.children if isinstance(child, nodes.list_item)]
if list_items and all(_is_nav_artifact_list_item(item) for item in list_items):
_remove_node(bullet_list)


def _prune_empty_containers(doc: nodes.document) -> None:
changed = True
while changed:
changed = False

for bullet_list in list(doc.findall(nodes.bullet_list)):
if len(bullet_list.children) == 0:
_remove_node(bullet_list)
changed = True

for section in list(doc.findall(nodes.section)):
children_without_title = [child for child in section.children if not isinstance(child, nodes.title)]
if len(children_without_title) == 0:
_remove_node(section)
changed = True


def prepare_doctree_for_llm(doc: nodes.document) -> nodes.document:
llm_doc = cast(nodes.document, doc.deepcopy())
for target in list(llm_doc.findall(nodes.target)):
_remove_node(target)
for transition in list(llm_doc.findall(nodes.transition)):
_remove_node(transition)
for comment in list(llm_doc.findall(nodes.comment)):
_remove_node(comment)
_remove_nav_artifact_lists(llm_doc)
_prune_empty_containers(llm_doc)
return llm_doc
Loading
Loading