55import functools
66import re
77from html import unescape
8- from typing import List , Tuple
8+ from typing import List , Optional , Tuple
99
1010# Sentence delimiter, split on a period followed by any type of
1111# whitespace (space, new line, tab, etc.)
12- REGEX_SENTENCE_DELIMITER = re .compile (r"\W (?:\s|$)" )
12+ REGEX_SENTENCE_DELIMITER = re .compile (r"\. (?:\s|$)" , flags = re . M )
1313
1414# Matches on pattern __prefix__ at the beginning of a description
1515# or after a comma
16- REGEX_TECHDOCS_PREFIX = re .compile (r"(?:, |\A)__([\w- ]+)__" )
16+ REGEX_TECHDOCS_PREFIX = re .compile (r"(?:, |\A)__([^_ ]+)__" )
1717
1818# Matches on pattern [link title](https://.../)
1919REGEX_MARKDOWN_LINK = re .compile (r"\[(?P<text>.*?)]\((?P<link>.*?)\)" )
@@ -121,23 +121,35 @@ def get_short_description(description: str) -> str:
121121 :rtype: set
122122 """
123123
124- target_lines = description .splitlines ()
125- relevant_lines = None
126-
127- for i , line in enumerate (target_lines ):
124+ def __simplify (sentence : str ) -> Optional [str ]:
128125 # Edge case for descriptions starting with a note
129- if line .lower ().startswith ("__note__" ):
130- continue
126+ if sentence .lower ().startswith ("__note__" ):
127+ return None
128+
129+ sentence = strip_techdocs_prefixes (sentence )
131130
132- relevant_lines = target_lines [i :]
133- break
131+ # Check that the sentence still has content after stripping prefixes
132+ if len (sentence ) < 2 :
133+ return None
134134
135- if relevant_lines is None :
135+ return sentence + "."
136+
137+ # Find the first relevant sentence
138+ result = next (
139+ simplified
140+ for simplified in iter (
141+ __simplify (sentence )
142+ for sentence in REGEX_SENTENCE_DELIMITER .split (description )
143+ )
144+ if simplified is not None
145+ )
146+
147+ if result is None :
136148 raise ValueError (
137149 f"description does not contain any relevant lines: { description } " ,
138150 )
139151
140- return REGEX_SENTENCE_DELIMITER . split ( " \n " . join ( relevant_lines ), 1 )[ 0 ] + "."
152+ return result
141153
142154
143155def strip_techdocs_prefixes (description : str ) -> str :
@@ -150,14 +162,10 @@ def strip_techdocs_prefixes(description: str) -> str:
150162 :returns: The stripped description
151163 :rtype: str
152164 """
153- result_description = REGEX_TECHDOCS_PREFIX .sub (
154- "" , description .lstrip ()
155- ).lstrip ()
156-
157- return result_description
165+ return REGEX_TECHDOCS_PREFIX .sub ("" , description .lstrip ()).lstrip ()
158166
159167
160- def process_arg_description (description : str ) -> Tuple [str , str ]:
168+ def simplify_description (description : str ) -> Tuple [str , str ]:
161169 """
162170 Processes the given raw request argument description into one suitable
163171 for help pages, etc.
@@ -173,12 +181,12 @@ def process_arg_description(description: str) -> Tuple[str, str]:
173181 return "" , ""
174182
175183 result = get_short_description (description )
176- result = strip_techdocs_prefixes (result )
177184 result = result .replace ("\n " , " " ).replace ("\r " , " " )
178185
179- description , links = extract_markdown_links (result )
186+ # NOTE: Links should only be separated from Rich Markdown links
187+ result_no_links , links = extract_markdown_links (result )
180188
181189 if len (links ) > 0 :
182- description += f" See: { '; ' .join (links )} "
190+ result_no_links += f" See: { '; ' .join (links )} "
183191
184- return unescape (markdown_to_rich_markup (description )), unescape (description )
192+ return unescape (markdown_to_rich_markup (result_no_links )), unescape (result )
0 commit comments