Skip to content

Commit d2ff31a

Browse files
committed
Rewrites the documentation parsing.
This should make it easier to extend the parsing documentation functionality of FORD and Doxygen in the future
1 parent 6aac856 commit d2ff31a

File tree

2 files changed

+114
-51
lines changed

2 files changed

+114
-51
lines changed

fortls/objects.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1859,7 +1859,7 @@ def __init__(self, file_obj=None):
18591859
self.END_SCOPE_REGEX: Pattern = None
18601860
self.enc_scope_name: str = None
18611861
self.last_obj = None
1862-
self.pending_doc = None
1862+
self.pending_doc: str = None
18631863

18641864
def create_none_scope(self):
18651865
"""Create empty scope to hold non-module contained items"""

fortls/parse_fortran.py

Lines changed: 113 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,7 +1238,7 @@ def parse(
12381238

12391239
line_number = 0
12401240
block_id_stack = []
1241-
doc_string: str = None
1241+
docs: list[str] = [] # list used to temporarily store docstrings
12421242
counters = Counter(
12431243
do=0,
12441244
ifs=0,
@@ -1263,17 +1263,13 @@ def parse(
12631263

12641264
if line == "":
12651265
continue # Skip empty lines
1266-
# Parse Documentation comments and skip all other comments
1267-
# this function should also nullify doc_string
1268-
idx = self._parse_docs(line, line_number, file_ast, doc_string)
1266+
1267+
# Parse documentation strings to AST nodes, this implicitly operates
1268+
# on docs, i.e. appends or nullifies it
1269+
idx = self.parse_docs(line, line_number, file_ast, docs)
12691270
if idx:
1270-
line_number = idx[0]
1271-
doc_string = idx[1]
1271+
line_number = idx
12721272
continue
1273-
if doc_string:
1274-
file_ast.add_doc("!! " + doc_string)
1275-
self.parser_debug("Doc", doc_string, line_number)
1276-
doc_string = None
12771273
# Handle preprocessing regions
12781274
do_skip = False
12791275
for pp_reg in pp_skips:
@@ -1298,12 +1294,8 @@ def parse(
12981294
comm_ind = line_stripped.find("!")
12991295
if comm_ind >= 0:
13001296
line_no_comment = line[:comm_ind]
1301-
line_post_comment = line[comm_ind:]
13021297
line_stripped = line_stripped[:comm_ind]
1303-
# Look for trailing doc string
1304-
doc_match = FRegex.FREE_DOC.match(line_post_comment)
1305-
if doc_match:
1306-
doc_string = line_post_comment[doc_match.end(0) :].strip()
1298+
docs = self.get_single_line_docstring(line[comm_ind:])
13071299
else:
13081300
line_no_comment = line
13091301
# Split lines with semicolons, place the multiple lines into a stack
@@ -1729,48 +1721,119 @@ def _parse_contains(self, line: str, ln: int, file_ast: fortran_ast):
17291721
self.parser_debug("CONTAINS", self.line, ln)
17301722
return True
17311723

1732-
def _parse_docs(
1733-
self,
1734-
line: str,
1735-
ln: int,
1736-
file_ast: fortran_ast,
1737-
doc_string: str,
1738-
):
1739-
match = self.COMMENT_LINE_MATCH.match(line)
1740-
if not match:
1724+
def parse_docs(self, line: str, ln: int, file_ast: fortran_ast, docs: list[str]):
1725+
"""Parse documentation stings of style Doxygen or FORD.
1726+
Multiline docstrings are detected if the first comment starts with `!>`
1727+
docstring continuations are detected with either `!>`, `!<` or `!!`
1728+
1729+
Parameters
1730+
----------
1731+
line : str
1732+
Document line
1733+
ln : int
1734+
Line number
1735+
file_ast : fortran_ast
1736+
AST object
1737+
docs : list[str]
1738+
Docstrings that are pending processing e.g. single line docstrings
1739+
"""
1740+
1741+
def format(docs: list[str]) -> str:
1742+
if len(docs) == 1:
1743+
return f"!! {docs[0]}"
1744+
return "!! " + "\n!! ".join(docs)
1745+
1746+
def add_line_comment(file_ast: fortran_ast, docs: list[str]):
1747+
# Handle dangling comments from previous line
1748+
if docs:
1749+
file_ast.add_doc(format(docs))
1750+
log.debug(f"{format(docs)} !!! Doc string({ln})")
1751+
docs[:] = [] # empty the documentation stack
1752+
1753+
# Check for comments in line
1754+
if not self.COMMENT_LINE_MATCH.match(line):
1755+
add_line_comment(file_ast, docs)
17411756
return False
17421757
# Check for documentation
17431758
doc_match = self.DOC_COMMENT_MATCH.match(line)
17441759
if not doc_match:
1745-
return ln, doc_string
1746-
doc_lines = [line[doc_match.end(0) :].strip()]
1747-
if doc_match.group(1) == ">":
1748-
doc_forward = True
1749-
else:
1750-
if doc_string:
1751-
doc_lines = [doc_string] + doc_lines
1752-
doc_string = None
1753-
doc_forward = False
1760+
add_line_comment(file_ast, docs)
1761+
return False
1762+
17541763
_ln = ln
1755-
if ln < self.nLines:
1756-
for i in range(ln, self.nLines):
1757-
# @note this gets the next line, index is 0-based
1758-
next_line = self.get_line(i, pp_content=True)
1759-
match = self.DOC_COMMENT_MATCH.match(next_line)
1760-
if not match:
1761-
ln = i # move the line number at the end of the docstring
1762-
break
1763-
doc_lines.append(next_line[match.end(0) :].strip())
1764+
ln, docs[:], predocmark = self.get_docstring(ln, line, doc_match, docs)
17641765

1765-
# Count the total length of all the stings in doc_lines
1766+
# Count the total length of all the stings in docs
17661767
# most efficient implementation, see: shorturl.at/dfmyV
1767-
if len("".join(doc_lines)) > 0:
1768-
file_ast.add_doc("!! " + "\n!! ".join(doc_lines), forward=doc_forward)
1769-
# if debug:
1770-
for (i, doc_line) in enumerate(doc_lines):
1768+
if len("".join(docs)) > 0:
1769+
file_ast.add_doc(format(docs), forward=predocmark)
1770+
for (i, doc_line) in enumerate(docs):
17711771
log.debug(f"{doc_line} !!! Doc string({_ln + i})")
1772-
# self.parser_debug("Doc", doc_line, line_number + i)
1773-
return ln, doc_string
1772+
docs[:] = []
1773+
return ln
1774+
1775+
def get_docstring(
1776+
self, ln: int, line: str, match: Pattern, docs: list[str]
1777+
) -> tuple[int, list[str], bool]:
1778+
"""Extract entire documentation strings from the current file position
1779+
1780+
Parameters
1781+
----------
1782+
ln : int
1783+
Line number
1784+
line : str
1785+
Document line, not necessarily produced by `get_line()`
1786+
match : Pattern
1787+
Regular expression DOC match
1788+
docs : list[str]
1789+
Docstrings that are pending processing e.g. single line docstrings
1790+
1791+
Returns
1792+
-------
1793+
tuple[int, list[str], bool]
1794+
The new line number at the end of the docstring, the docstring and
1795+
a boolean flag indicating whether the docstring precedes the AST node
1796+
(Doxygen style) or succeeds it (traditional FORD style)
1797+
"""
1798+
docstring: list[str] = docs
1799+
docstring.append(line[match.end(0) :].strip())
1800+
predocmark = True if match.group(1) == ">" else False
1801+
1802+
if ln >= self.nLines:
1803+
return ln, docstring, predocmark
1804+
1805+
# @note line index is 0-based
1806+
# Start from the current line until EOF and check for docs
1807+
for i in range(ln, self.nLines):
1808+
next_line = self.get_line(i, pp_content=True)
1809+
match = self.DOC_COMMENT_MATCH.match(next_line)
1810+
if not match:
1811+
ln = i
1812+
break
1813+
docstring.append(next_line[match.end(0) :].strip())
1814+
return ln, docstring, predocmark
1815+
1816+
def get_single_line_docstring(self, line: str) -> list[str]:
1817+
"""Get a docstring of a single line. This is the same for both Legacy
1818+
and Modern Fortran
1819+
1820+
Parameters
1821+
----------
1822+
line : str
1823+
Line of code
1824+
1825+
Returns
1826+
-------
1827+
list[str]
1828+
A list containing the docstring. List will be empty if there is no
1829+
match or the match is an empty string itself
1830+
"""
1831+
match = FRegex.FREE_DOC.match(line)
1832+
if not match:
1833+
return []
1834+
# if the string is empty return an empty list instead
1835+
doc = line[match.end(0) :].strip()
1836+
return [doc] if doc else []
17741837

17751838
@staticmethod
17761839
def parser_debug(msg: str, line: str, ln: int, scope: bool = False):
@@ -1779,7 +1842,7 @@ def parser_debug(msg: str, line: str, ln: int, scope: bool = False):
17791842
else:
17801843
log.debug(f"{line.strip()} !!! {msg} statement({ln})")
17811844

1782-
def get_comment_regexs(self):
1845+
def get_comment_regexs(self) -> tuple[Pattern, Pattern]:
17831846
if self.fixed:
17841847
return FRegex.FIXED_COMMENT, FRegex.FIXED_DOC
17851848
return FRegex.FREE_COMMENT, FRegex.FREE_DOC

0 commit comments

Comments
 (0)