Improve parsing & formatting of help page descriptions (#680)

lgarber-akamai · web-flow · commit 41aee2471186 · 2024-11-19T13:33:52.000-05:00
diff --git a/linodecli/baked/operation.py b/linodecli/baked/operation.py
@@ -17,6 +17,7 @@
 import openapi3.paths
 from openapi3.paths import Operation, Parameter
 
+from linodecli.baked.parsing import simplify_description
 from linodecli.baked.request import OpenAPIFilteringRequest, OpenAPIRequest
 from linodecli.baked.response import OpenAPIResponse
 from linodecli.exit_codes import ExitCodes
@@ -356,8 +357,12 @@ def __init__(
             self.action_aliases = {}
             self.action = action
 
-        self.summary = operation.summary
-        self.description = operation.description.split(".")[0]
+        # Ensure the summary has punctuation
+        self.summary = operation.summary.rstrip(".") + "."
+
+        self.description_rich, self.description = simplify_description(
+            operation.description or ""
+        )
 
         # The apiVersion attribute should not be specified as a positional argument
         self.params = [
diff --git a/linodecli/baked/parsing.py b/linodecli/baked/parsing.py
@@ -5,15 +5,15 @@
 import functools
 import re
 from html import unescape
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 
 # Sentence delimiter, split on a period followed by any type of
 # whitespace (space, new line, tab, etc.)
-REGEX_SENTENCE_DELIMITER = re.compile(r"\W(?:\s|$)")
+REGEX_SENTENCE_DELIMITER = re.compile(r"\.(?:\s|$)", flags=re.M)
 
 # Matches on pattern __prefix__ at the beginning of a description
 # or after a comma
-REGEX_TECHDOCS_PREFIX = re.compile(r"(?:, |\A)__([\w-]+)__")
+REGEX_TECHDOCS_PREFIX = re.compile(r"(?:, |\A)__([^_]+)__")
 
 # Matches on pattern [link title](https://.../)
 REGEX_MARKDOWN_LINK = re.compile(r"\[(?P<text>.*?)]\((?P<link>.*?)\)")
@@ -121,23 +121,35 @@ def get_short_description(description: str) -> str:
     :rtype: set
     """
 
-    target_lines = description.splitlines()
-    relevant_lines = None
-
-    for i, line in enumerate(target_lines):
+    def __simplify(sentence: str) -> Optional[str]:
         # Edge case for descriptions starting with a note
-        if line.lower().startswith("__note__"):
-            continue
+        if sentence.lower().startswith("__note__"):
+            return None
+
+        sentence = strip_techdocs_prefixes(sentence)
 
-        relevant_lines = target_lines[i:]
-        break
+        # Check that the sentence still has content after stripping prefixes
+        if len(sentence) < 2:
+            return None
 
-    if relevant_lines is None:
+        return sentence + "."
+
+    # Find the first relevant sentence
+    result = next(
+        simplified
+        for simplified in iter(
+            __simplify(sentence)
+            for sentence in REGEX_SENTENCE_DELIMITER.split(description)
+        )
+        if simplified is not None
+    )
+
+    if result is None:
         raise ValueError(
             f"description does not contain any relevant lines: {description}",
         )
 
-    return REGEX_SENTENCE_DELIMITER.split("\n".join(relevant_lines), 1)[0] + "."
+    return result
 
 
 def strip_techdocs_prefixes(description: str) -> str:
@@ -150,14 +162,10 @@ def strip_techdocs_prefixes(description: str) -> str:
     :returns: The stripped description
     :rtype: str
     """
-    result_description = REGEX_TECHDOCS_PREFIX.sub(
-        "", description.lstrip()
-    ).lstrip()
-
-    return result_description
+    return REGEX_TECHDOCS_PREFIX.sub("", description.lstrip()).lstrip()
 
 
-def process_arg_description(description: str) -> Tuple[str, str]:
+def simplify_description(description: str) -> Tuple[str, str]:
     """
     Processes the given raw request argument description into one suitable
     for help pages, etc.
@@ -173,12 +181,12 @@ def process_arg_description(description: str) -> Tuple[str, str]:
         return "", ""
 
     result = get_short_description(description)
-    result = strip_techdocs_prefixes(result)
     result = result.replace("\n", " ").replace("\r", " ")
 
-    description, links = extract_markdown_links(result)
+    # NOTE: Links should only be separated from Rich Markdown links
+    result_no_links, links = extract_markdown_links(result)
 
     if len(links) > 0:
-        description += f" See: {'; '.join(links)}"
+        result_no_links += f" See: {'; '.join(links)}"
 
-    return unescape(markdown_to_rich_markup(description)), unescape(description)
+    return unescape(markdown_to_rich_markup(result_no_links)), unescape(result)
diff --git a/linodecli/baked/request.py b/linodecli/baked/request.py
@@ -2,7 +2,7 @@
 Request details for a CLI Operation
 """
 
-from linodecli.baked.parsing import process_arg_description
+from linodecli.baked.parsing import simplify_description
 
 
 class OpenAPIRequestArg:
@@ -46,7 +46,7 @@ def __init__(
         #: the larger response model
         self.path = prefix + "." + name if prefix else name
 
-        description_rich, description = process_arg_description(
+        description_rich, description = simplify_description(
             schema.description or ""
         )
 
diff --git a/tests/unit/test_parsing.py b/tests/unit/test_parsing.py
@@ -2,6 +2,7 @@
     extract_markdown_links,
     get_short_description,
     markdown_to_rich_markup,
+    simplify_description,
     strip_techdocs_prefixes,
 )
 
@@ -65,7 +66,7 @@ def test_get_first_sentence(self):
 
         assert (
             get_short_description(
-                "__Note__. This might be a sentence.\nThis is a sentence."
+                "__Note__ This might be a sentence.\nThis is a sentence."
             )
             == "This is a sentence."
         )
@@ -101,3 +102,29 @@ def test_markdown_to_rich_markup(self):
             == "very [i]cool[/] [b]test[/] [i]string[/]*\n[b]wow[/] [i]cool[/]* "
             "[italic deep_pink3 on grey15]code block[/] `"
         )
+
+    def test_simplify_description(self):
+        # This description was not parsed correctly prior to PR #680.
+        assert simplify_description(
+            "The authentication methods that are allowed when connecting to "
+            "[the Linode Shell (Lish)](https://www.linode.com/docs/guides/lish/).\n"
+            "\n"
+            "- `keys_only` is the most secure if you intend to use Lish.\n"
+            "- `disabled` is recommended if you do not intend to use Lish at all.\n"
+            "- If this account's Cloud Manager authentication type is set to a Third-Party Authentication method, "
+            "`password_keys` cannot be used as your Lish authentication method. To view this account's Cloud Manager "
+            "`authentication_type` field, send a request to the "
+            "[Get a profile](https://techdocs.akamai.com/linode-api/reference/get-profile) operation."
+        ) == (
+            "The authentication methods that are allowed when connecting to the Linode Shell (Lish). "
+            "See: https://www.linode.com/docs/guides/lish/",
+            "The authentication methods that are allowed when connecting to "
+            "[the Linode Shell (Lish)](https://www.linode.com/docs/guides/lish/).",
+        )
+
+        assert simplify_description(
+            "A unique, user-defined `string` referring to the Managed Database."
+        ) == (
+            "A unique, user-defined [italic deep_pink3 on grey15]string[/] referring to the Managed Database.",
+            "A unique, user-defined `string` referring to the Managed Database.",
+        )