Skip to content

Commit 41aee24

Browse files
Improve parsing & formatting of help page descriptions (#680)
1 parent 3ea0dd6 commit 41aee24

File tree

4 files changed

+68
-28
lines changed

4 files changed

+68
-28
lines changed

linodecli/baked/operation.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import openapi3.paths
1818
from openapi3.paths import Operation, Parameter
1919

20+
from linodecli.baked.parsing import simplify_description
2021
from linodecli.baked.request import OpenAPIFilteringRequest, OpenAPIRequest
2122
from linodecli.baked.response import OpenAPIResponse
2223
from linodecli.exit_codes import ExitCodes
@@ -356,8 +357,12 @@ def __init__(
356357
self.action_aliases = {}
357358
self.action = action
358359

359-
self.summary = operation.summary
360-
self.description = operation.description.split(".")[0]
360+
# Ensure the summary has punctuation
361+
self.summary = operation.summary.rstrip(".") + "."
362+
363+
self.description_rich, self.description = simplify_description(
364+
operation.description or ""
365+
)
361366

362367
# The apiVersion attribute should not be specified as a positional argument
363368
self.params = [

linodecli/baked/parsing.py

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
import functools
66
import re
77
from html import unescape
8-
from typing import List, Tuple
8+
from typing import List, Optional, Tuple
99

1010
# Sentence delimiter, split on a period followed by any type of
1111
# whitespace (space, new line, tab, etc.)
12-
REGEX_SENTENCE_DELIMITER = re.compile(r"\W(?:\s|$)")
12+
REGEX_SENTENCE_DELIMITER = re.compile(r"\.(?:\s|$)", flags=re.M)
1313

1414
# Matches on pattern __prefix__ at the beginning of a description
1515
# or after a comma
16-
REGEX_TECHDOCS_PREFIX = re.compile(r"(?:, |\A)__([\w-]+)__")
16+
REGEX_TECHDOCS_PREFIX = re.compile(r"(?:, |\A)__([^_]+)__")
1717

1818
# Matches on pattern [link title](https://.../)
1919
REGEX_MARKDOWN_LINK = re.compile(r"\[(?P<text>.*?)]\((?P<link>.*?)\)")
@@ -121,23 +121,35 @@ def get_short_description(description: str) -> str:
121121
:rtype: set
122122
"""
123123

124-
target_lines = description.splitlines()
125-
relevant_lines = None
126-
127-
for i, line in enumerate(target_lines):
124+
def __simplify(sentence: str) -> Optional[str]:
128125
# Edge case for descriptions starting with a note
129-
if line.lower().startswith("__note__"):
130-
continue
126+
if sentence.lower().startswith("__note__"):
127+
return None
128+
129+
sentence = strip_techdocs_prefixes(sentence)
131130

132-
relevant_lines = target_lines[i:]
133-
break
131+
# Check that the sentence still has content after stripping prefixes
132+
if len(sentence) < 2:
133+
return None
134134

135-
if relevant_lines is None:
135+
return sentence + "."
136+
137+
# Find the first relevant sentence
138+
result = next(
139+
simplified
140+
for simplified in iter(
141+
__simplify(sentence)
142+
for sentence in REGEX_SENTENCE_DELIMITER.split(description)
143+
)
144+
if simplified is not None
145+
)
146+
147+
if result is None:
136148
raise ValueError(
137149
f"description does not contain any relevant lines: {description}",
138150
)
139151

140-
return REGEX_SENTENCE_DELIMITER.split("\n".join(relevant_lines), 1)[0] + "."
152+
return result
141153

142154

143155
def strip_techdocs_prefixes(description: str) -> str:
@@ -150,14 +162,10 @@ def strip_techdocs_prefixes(description: str) -> str:
150162
:returns: The stripped description
151163
:rtype: str
152164
"""
153-
result_description = REGEX_TECHDOCS_PREFIX.sub(
154-
"", description.lstrip()
155-
).lstrip()
156-
157-
return result_description
165+
return REGEX_TECHDOCS_PREFIX.sub("", description.lstrip()).lstrip()
158166

159167

160-
def process_arg_description(description: str) -> Tuple[str, str]:
168+
def simplify_description(description: str) -> Tuple[str, str]:
161169
"""
162170
Processes the given raw request argument description into one suitable
163171
for help pages, etc.
@@ -173,12 +181,12 @@ def process_arg_description(description: str) -> Tuple[str, str]:
173181
return "", ""
174182

175183
result = get_short_description(description)
176-
result = strip_techdocs_prefixes(result)
177184
result = result.replace("\n", " ").replace("\r", " ")
178185

179-
description, links = extract_markdown_links(result)
186+
# NOTE: Links should only be separated from Rich Markdown links
187+
result_no_links, links = extract_markdown_links(result)
180188

181189
if len(links) > 0:
182-
description += f" See: {'; '.join(links)}"
190+
result_no_links += f" See: {'; '.join(links)}"
183191

184-
return unescape(markdown_to_rich_markup(description)), unescape(description)
192+
return unescape(markdown_to_rich_markup(result_no_links)), unescape(result)

linodecli/baked/request.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Request details for a CLI Operation
33
"""
44

5-
from linodecli.baked.parsing import process_arg_description
5+
from linodecli.baked.parsing import simplify_description
66

77

88
class OpenAPIRequestArg:
@@ -46,7 +46,7 @@ def __init__(
4646
#: the larger response model
4747
self.path = prefix + "." + name if prefix else name
4848

49-
description_rich, description = process_arg_description(
49+
description_rich, description = simplify_description(
5050
schema.description or ""
5151
)
5252

tests/unit/test_parsing.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
extract_markdown_links,
33
get_short_description,
44
markdown_to_rich_markup,
5+
simplify_description,
56
strip_techdocs_prefixes,
67
)
78

@@ -65,7 +66,7 @@ def test_get_first_sentence(self):
6566

6667
assert (
6768
get_short_description(
68-
"__Note__. This might be a sentence.\nThis is a sentence."
69+
"__Note__ This might be a sentence.\nThis is a sentence."
6970
)
7071
== "This is a sentence."
7172
)
@@ -101,3 +102,29 @@ def test_markdown_to_rich_markup(self):
101102
== "very [i]cool[/] [b]test[/] [i]string[/]*\n[b]wow[/] [i]cool[/]* "
102103
"[italic deep_pink3 on grey15]code block[/] `"
103104
)
105+
106+
def test_simplify_description(self):
107+
# This description was not parsed correctly prior to PR #680.
108+
assert simplify_description(
109+
"The authentication methods that are allowed when connecting to "
110+
"[the Linode Shell (Lish)](https://www.linode.com/docs/guides/lish/).\n"
111+
"\n"
112+
"- `keys_only` is the most secure if you intend to use Lish.\n"
113+
"- `disabled` is recommended if you do not intend to use Lish at all.\n"
114+
"- If this account's Cloud Manager authentication type is set to a Third-Party Authentication method, "
115+
"`password_keys` cannot be used as your Lish authentication method. To view this account's Cloud Manager "
116+
"`authentication_type` field, send a request to the "
117+
"[Get a profile](https://techdocs.akamai.com/linode-api/reference/get-profile) operation."
118+
) == (
119+
"The authentication methods that are allowed when connecting to the Linode Shell (Lish). "
120+
"See: https://www.linode.com/docs/guides/lish/",
121+
"The authentication methods that are allowed when connecting to "
122+
"[the Linode Shell (Lish)](https://www.linode.com/docs/guides/lish/).",
123+
)
124+
125+
assert simplify_description(
126+
"A unique, user-defined `string` referring to the Managed Database."
127+
) == (
128+
"A unique, user-defined [italic deep_pink3 on grey15]string[/] referring to the Managed Database.",
129+
"A unique, user-defined `string` referring to the Managed Database.",
130+
)

0 commit comments

Comments
 (0)