diff --git a/docstring_to_markdown/__init__.py b/docstring_to_markdown/__init__.py index a9367f0..a70ed84 100644 --- a/docstring_to_markdown/__init__.py +++ b/docstring_to_markdown/__init__.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: from importlib_metadata import EntryPoint -__version__ = "0.16" +__version__ = "0.17" class UnknownFormatError(Exception): diff --git a/docstring_to_markdown/rst.py b/docstring_to_markdown/rst.py index 4149773..040b26d 100644 --- a/docstring_to_markdown/rst.py +++ b/docstring_to_markdown/rst.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod from enum import IntEnum, auto +from textwrap import dedent from types import SimpleNamespace from typing import Callable, Match, Union, List, Dict import re @@ -299,8 +300,8 @@ def inline_markdown(self): SECTION_DIRECTIVES: Dict[str, List[Directive]] = { 'Parameters': [ Directive( - pattern=r'^(?P\*\*kwargs|\*args)$', - replacement=r'- `\g`' + pattern=r'^(?P(\w[\w\d_\.]*)|\*\*kwargs|\*args)$', + replacement=r'- `\g`:' ), Directive( pattern=r'^(?P[^:\s]+\d), (?P[^:\s]+\d), \.\.\. : (?P.+)$', @@ -336,6 +337,7 @@ def _find_directive_pattern(name: str): def looks_like_rst(value: str) -> bool: + value = dedent(value) # check if any of the characteristic sections (and the properly formatted underline) is there for section in _RST_SECTIONS: if (section + '\n' + '-' * len(section) + '\n') in value: @@ -542,10 +544,20 @@ class BlockParser(IParser): follower: Union['IParser', None] = None _buffer: List[str] _block_started: bool + _indent: Union[int, None] + should_measure_indent = True def __init__(self): self._buffer = [] self._block_started = False + self._indent = None + + def measure_indent(self, line: str): + line_indent = len(line) - len(line.lstrip()) + if self._indent is None: + self._indent = line_indent + else: + self._indent = min(line_indent, self._indent) @abstractmethod def can_parse(self, line: str) -> bool: @@ -558,6 +570,8 @@ def _start_block(self, language: str): def consume(self, line: str): if not self._block_started: raise ValueError('Block has not started') # pragma: no cover + if self.should_measure_indent: + self.measure_indent(line) self._buffer.append(line) def finish_consumption(self, final: bool) -> str: @@ -565,17 +579,24 @@ def finish_consumption(self, final: bool) -> str: if self._buffer[len(self._buffer) - 1].strip() == '': self._buffer.pop() self._buffer.append(self.enclosure + '\n') - result = '\n'.join(self._buffer) + indent = " " * (self._indent or 0) + intermediate = '\n'.join(self._buffer) + result = '\n'.join([ + (indent + line) if line else line + for line in intermediate.splitlines() + ]) if indent else intermediate if not final: result += '\n' self._buffer = [] self._block_started = False + self._indent = None return result class IndentedBlockParser(BlockParser, ABC): _is_block_beginning: bool _block_indent_size: Union[int, None] + should_measure_indent = False def __init__(self): super(IndentedBlockParser, self).__init__() @@ -599,6 +620,7 @@ def consume(self, line: str): return if self._block_indent_size is None: self._block_indent_size = len(line) - len(line.lstrip()) + self.measure_indent(line) super().consume(line[self._block_indent_size:]) def finish_consumption(self, final: bool) -> str: @@ -684,6 +706,7 @@ def can_parse(self, line: str): return line.strip() in self.directives def initiate_parsing(self, line: str, current_language: str): + self.measure_indent(line) admonition = self.directives[line.strip()] self._start_block(f'\n{admonition.block_markdown}\n') return IBlockBeginning(remainder='') @@ -694,6 +717,7 @@ def can_parse(self, line: str) -> bool: return re.match(CODE_BLOCK_PATTERN, line) is not None def initiate_parsing(self, line: str, current_language: str) -> IBlockBeginning: + self.measure_indent(line) match = re.match(CODE_BLOCK_PATTERN, line) # already checked in can_parse assert match @@ -753,6 +777,8 @@ def rst_to_markdown(text: str, extract_signature: bool = True) -> str: most_recent_section: Union[str, None] = None is_first_line = True + text = dedent(text) + def flush_buffer(): nonlocal lines_buffer lines = '\n'.join(lines_buffer) @@ -766,7 +792,8 @@ def flush_buffer(): lines_buffer = [] return lines - for line in text.split('\n'): + lines = text.split('\n') + for i, line in enumerate(lines): if is_first_line: if extract_signature: signature_match = re.match(r'^(?P\S+)\((?P.*)\)$', line) @@ -809,7 +836,9 @@ def flush_buffer(): else: if most_recent_section in SECTION_DIRECTIVES: for section_directive in SECTION_DIRECTIVES[most_recent_section]: - if re.match(section_directive.pattern, trimmed_line): + next_line = lines[i + 1] if i + 1 < len(lines) else "" + is_next_line_section = set(next_line.strip()) == {"-"} + if re.match(section_directive.pattern, line) and not is_next_line_section: line = re.sub(section_directive.pattern, section_directive.replacement, trimmed_line) break if trimmed_line.rstrip() in RST_SECTIONS: diff --git a/tests/test_rst.py b/tests/test_rst.py index c645def..4bafe26 100644 --- a/tests/test_rst.py +++ b/tests/test_rst.py @@ -337,7 +337,7 @@ def func(): pass - `x`: array_like Input array. -- `**kwargs` +- `**kwargs`: For other keyword-only arguments, see the ufunc docs. """ @@ -638,6 +638,119 @@ def func(): pass """ +# this format is often used by polars +PARAMETERS_WITHOUT_TYPE = """ +Parameters +---------- +source + Path(s) to a file or directory + When needing to authenticate for scanning cloud locations, see the + `storage_options` parameter. +columns + Columns to select. Accepts a list of column indices (starting at zero) or a list + of column names. +n_rows + Stop reading from parquet file after reading `n_rows`. + Only valid when `use_pyarrow=False`. + +Returns +------- +DataFrame +""" + +PARAMETERS_WITHOUT_TYPE_MARKDOWN = """ +#### Parameters + +- `source`: + Path(s) to a file or directory + When needing to authenticate for scanning cloud locations, see the + `storage_options` parameter. +- `columns`: + Columns to select. Accepts a list of column indices (starting at zero) or a list + of column names. +- `n_rows`: + Stop reading from parquet file after reading `n_rows`. + Only valid when `use_pyarrow=False`. + +#### Returns + +DataFrame +""" + +INDENTED_DOCSTRING = """ + Parameters + ---------- + glob + Expand path given via globbing rules. +""" + +INDENTED_DOCSTRING_MARKDOWN = """ +#### Parameters + +- `glob`: + Expand path given via globbing rules. +""" + + +WARNINGS_IN_PARAMETERS = """ +Parameters +---------- +glob + Expand path given via globbing rules. +schema + Specify the datatypes of the columns. The datatypes must match the + datatypes in the file(s). If there are extra columns that are not in the + file(s), consider also enabling `allow_missing_columns`. + + .. warning:: + This functionality is considered **unstable**. It may be changed + at any point without it being considered a breaking change. +hive_schema + The column names and data types of the columns by which the data is partitioned. + If set to `None` (default), the schema of the Hive partitions is inferred. + + .. warning:: + This functionality is considered **unstable**. It may be changed + at any point without it being considered a breaking change. +try_parse_hive_dates + Whether to try parsing hive values as date/datetime types. +""" + + +WARNINGS_IN_PARAMETERS_MARKDOWN = """ +#### Parameters + +- `glob`: + Expand path given via globbing rules. +- `schema`: + Specify the datatypes of the columns. The datatypes must match the + datatypes in the file(s). If there are extra columns that are not in the + file(s), consider also enabling `allow_missing_columns`. + + + --- + ⚠️ **Warning** + + This functionality is considered **unstable**. It may be changed + at any point without it being considered a breaking change. + + --- +- `hive_schema`: + The column names and data types of the columns by which the data is partitioned. + If set to `None` (default), the schema of the Hive partitions is inferred. + + + --- + ⚠️ **Warning** + + This functionality is considered **unstable**. It may be changed + at any point without it being considered a breaking change. + + --- +- `try_parse_hive_dates`: + Whether to try parsing hive values as date/datetime types. +""" + NESTED_PARAMETERS = """ Parameters ---------- @@ -887,6 +1000,18 @@ def foo(): 'rst': NESTED_PARAMETERS, 'md': NESTED_PARAMETERS_MARKDOWN }, + 'converts parameter without type': { + 'rst': PARAMETERS_WITHOUT_TYPE, + 'md': PARAMETERS_WITHOUT_TYPE_MARKDOWN + }, + 'converts indented parameters lists': { + 'rst': INDENTED_DOCSTRING, + 'md': INDENTED_DOCSTRING_MARKDOWN + }, + 'converts warnings in parameters lists': { + 'rst': WARNINGS_IN_PARAMETERS, + 'md': WARNINGS_IN_PARAMETERS_MARKDOWN + }, 'converts sphinx signatures': { 'rst': SPHINX_SIGNATURE, 'md': SPHINX_SIGNATURE_MARKDOWN