Skip to content

Commit a3dce8d

Browse files
committed
Refactor the Fortran parser
- Swaps multi-line splits with a stack object for performance - Removes all the indices that were used for lines and comments - Simplifies documentation parsing in preparation of rewrite
1 parent b81446a commit a3dce8d

File tree

1 file changed

+43
-67
lines changed

1 file changed

+43
-67
lines changed

fortls/parse_fortran.py

Lines changed: 43 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,47 +1237,40 @@ def parse(
12371237
pp_skips = []
12381238
pp_defines = []
12391239

1240-
line_ind = 0
1241-
next_line_ind = 0
1242-
line_number = 1
1240+
line_number = 0
12431241
block_id_stack = []
1244-
semi_split = []
12451242
doc_string: str = None
12461243
counters = Counter(
1247-
# line_no=1,
1248-
# line_idx=0,
1249-
# next_line_idx=0,
12501244
do=0,
12511245
ifs=0,
12521246
block=0,
12531247
select=0,
12541248
interface=0,
12551249
)
1250+
multi_lines = deque()
12561251
self.COMMENT_LINE_MATCH, self.DOC_COMMENT_MATCH = self.get_comment_regexs()
1257-
while (next_line_ind < self.nLines) or (len(semi_split) > 0):
1252+
while (line_number < self.nLines) or multi_lines:
12581253
# Get next line
1259-
if len(semi_split) > 0:
1260-
line = semi_split[0]
1261-
semi_split = semi_split[1:]
1262-
get_full = False
1263-
else:
1264-
line_ind = next_line_ind
1265-
line_number = line_ind + 1
1266-
line = self.get_line(line_ind, pp_content=True)
1267-
next_line_ind = line_ind + 1
1254+
# Get a normal line, i.e. the stack is empty
1255+
if not multi_lines:
1256+
# get_line has a 0-based index
1257+
line = self.get_line(line_number, pp_content=True)
1258+
line_number += 1
12681259
get_full = True
1260+
# Line is part of a multi-line construct, i.e. contained ';'
1261+
else:
1262+
line = multi_lines.pop()
1263+
get_full = False
1264+
12691265
if line == "":
12701266
continue # Skip empty lines
12711267
# Parse Documentation comments and skip all other comments
12721268
# this function should also nullify doc_string
1273-
idx = self._parse_documentation(
1274-
line, line_number, file_ast, doc_string, next_line_ind
1275-
)
1269+
idx = self._parse_docs(line, line_number, file_ast, doc_string)
12761270
if idx:
1277-
next_line_ind = idx[0]
1271+
line_number = idx[0]
12781272
doc_string = idx[1]
12791273
continue
1280-
# Handle trailing doc strings
12811274
if doc_string:
12821275
file_ast.add_doc("!! " + doc_string)
12831276
self.parser_debug("Doc", doc_string, line_number)
@@ -1292,14 +1285,14 @@ def parse(
12921285
do_skip = True
12931286
if do_skip:
12941287
continue
1295-
# Get full line
1288+
# Get full line, seek forward for code lines
1289+
# @note line_number-1 refers to the array index for the current line
12961290
if get_full:
12971291
_, line, post_lines = self.get_code_line(
1298-
line_ind, backward=False, pp_content=True
1292+
line_number - 1, backward=False, pp_content=True
12991293
)
1300-
next_line_ind += len(post_lines)
1294+
line_number += len(post_lines)
13011295
line = "".join([line] + post_lines)
1302-
# print(line)
13031296
line, line_label = strip_line_label(line)
13041297
line_stripped = strip_strings(line, maintain_len=True)
13051298
# Find trailing comments
@@ -1308,29 +1301,17 @@ def parse(
13081301
line_no_comment = line[:comm_ind]
13091302
line_post_comment = line[comm_ind:]
13101303
line_stripped = line_stripped[:comm_ind]
1304+
# Look for trailing doc string
1305+
doc_match = FRegex.FREE_DOC.match(line_post_comment)
1306+
if doc_match:
1307+
doc_string = line_post_comment[doc_match.end(0) :].strip()
13111308
else:
13121309
line_no_comment = line
1313-
line_post_comment = None
1314-
# Split lines with semicolons
1315-
semi_colon_ind = line_stripped.find(";")
1316-
if semi_colon_ind > 0:
1317-
semi_inds = []
1318-
tmp_line = line_stripped
1319-
while semi_colon_ind >= 0:
1320-
semi_inds.append(semi_colon_ind)
1321-
tmp_line = tmp_line[semi_colon_ind + 1 :]
1322-
semi_colon_ind = tmp_line.find(";")
1323-
i0 = 0
1324-
for semi_colon_ind in semi_inds:
1325-
semi_split.append(line[i0 : i0 + semi_colon_ind])
1326-
i0 += semi_colon_ind + 1
1327-
if len(semi_split) > 0:
1328-
semi_split.append(line[i0:])
1329-
line = semi_split[0]
1330-
semi_split = semi_split[1:]
1331-
line_stripped = strip_strings(line, maintain_len=True)
1332-
line_no_comment = line
1333-
line_post_comment = None
1310+
# Split lines with semicolons, place the multiple lines into a stack
1311+
if line_stripped.find(";") >= 0:
1312+
multi_lines.extendleft(line_stripped.split(";"))
1313+
line = multi_lines.pop()
1314+
line_stripped = line
13341315
self.line = line
13351316
# Test for scope end
13361317
if file_ast.END_SCOPE_REGEX is not None:
@@ -1355,11 +1336,6 @@ def parse(
13551336
# Mark contains statement
13561337
if self._parse_contains(line_no_comment, line_number, file_ast):
13571338
continue
1358-
# Look for trailing doc string
1359-
if line_post_comment:
1360-
doc_match = FRegex.FREE_DOC.match(line_post_comment)
1361-
if doc_match:
1362-
doc_string = line_post_comment[doc_match.end(0) :].strip()
13631339
# Loop through tests
13641340
obj_read = self.get_fortran_definition(line)
13651341
# Move to next line if nothing in the definition tests matches
@@ -1754,21 +1730,20 @@ def _parse_contains(self, line: str, ln: int, file_ast: fortran_ast):
17541730
self.parser_debug("CONTAINS", self.line, ln)
17551731
return True
17561732

1757-
def _parse_documentation(
1733+
def _parse_docs(
17581734
self,
17591735
line: str,
17601736
ln: int,
17611737
file_ast: fortran_ast,
17621738
doc_string: str,
1763-
next_ln_idx: int,
17641739
):
17651740
match = self.COMMENT_LINE_MATCH.match(line)
17661741
if not match:
17671742
return False
17681743
# Check for documentation
17691744
doc_match = self.DOC_COMMENT_MATCH.match(line)
17701745
if not doc_match:
1771-
return next_ln_idx, doc_string
1746+
return ln, doc_string
17721747
doc_lines = [line[doc_match.end(0) :].strip()]
17731748
if doc_match.group(1) == ">":
17741749
doc_forward = True
@@ -1777,25 +1752,26 @@ def _parse_documentation(
17771752
doc_lines = [doc_string] + doc_lines
17781753
doc_string = None
17791754
doc_forward = False
1780-
if next_ln_idx < self.nLines:
1781-
next_line = self.get_line(next_ln_idx, pp_content=True)
1782-
next_ln_idx += 1
1783-
doc_match = self.DOC_COMMENT_MATCH.match(next_line)
1784-
while doc_match and (next_ln_idx < self.nLines):
1785-
doc_lines.append(next_line[doc_match.end(0) :].strip())
1786-
next_line = self.get_line(next_ln_idx, pp_content=True)
1787-
next_ln_idx += 1
1788-
doc_match = self.DOC_COMMENT_MATCH.match(next_line)
1789-
next_ln_idx -= 1
1755+
_ln = ln
1756+
if ln < self.nLines:
1757+
for i in range(ln, self.nLines):
1758+
# @note this gets the next line, index is 0-based
1759+
next_line = self.get_line(i, pp_content=True)
1760+
match = self.DOC_COMMENT_MATCH.match(next_line)
1761+
if not match:
1762+
ln = i # move the line number at the end of the docstring
1763+
break
1764+
doc_lines.append(next_line[match.end(0) :].strip())
1765+
17901766
# Count the total length of all the stings in doc_lines
17911767
# most efficient implementation, see: shorturl.at/dfmyV
17921768
if len("".join(doc_lines)) > 0:
17931769
file_ast.add_doc("!! " + "\n!! ".join(doc_lines), forward=doc_forward)
17941770
# if debug:
17951771
for (i, doc_line) in enumerate(doc_lines):
1796-
log.debug(f"{doc_line} !!! Doc string({ln + i})")
1772+
log.debug(f"{doc_line} !!! Doc string({_ln + i})")
17971773
# self.parser_debug("Doc", doc_line, line_number + i)
1798-
return next_ln_idx, doc_string
1774+
return ln, doc_string
17991775

18001776
@staticmethod
18011777
def parser_debug(msg: str, line: str, ln: int, scope: bool = False):

0 commit comments

Comments
 (0)