Skip to content

Commit c520b65

Browse files
committed
Anansi: Discover objects.inv also from RTD and PyPI
1 parent b4f077a commit c520b65

File tree

5 files changed

+134
-16
lines changed

5 files changed

+134
-16
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
- Anansi: Provide `anansi list-projects` subcommand, to list curated
1515
projects managed in accompanying `curated.yaml` file.
1616
- Anansi: Accept `--threshold` option, forwarding to `sphobjinv`.
17+
- Anansi: Discover `objects.inv` also from RTD and PyPI.

docs/usage.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ linksmith inventory
5959
(anansi)=
6060
## Anansi
6161

62-
Suggest references from curated intersphinx inventories.
62+
Suggest references from intersphinx inventories, derived from curated projects,
63+
RTD, or PyPI.
6364

6465
:::{rubric} Synopsis
6566
:::
@@ -71,6 +72,12 @@ anansi suggest sarge capture
7172
```shell
7273
anansi suggest matplotlib draw
7374
```
75+
```shell
76+
anansi suggest requests patch
77+
```
78+
```shell
79+
anansi suggest beradio json
80+
```
7481

7582
Display list of curated projects.
7683
```shell

linksmith/sphinx/community/anansi.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
from linksmith.settings import help_config
2727
from linksmith.sphinx.inventory import InventoryManager
28+
from linksmith.sphinx.util import RemoteObjectsInv
2829
from linksmith.util.data import multikeysort
2930

3031
logger = logging.getLogger(__name__)
@@ -82,7 +83,24 @@ def to_list(self):
8283
data.append(item.to_dict())
8384
return data
8485

85-
def suggest(self, project: str, term: str, threshold: int = 50):
86+
def get_project_documentation_url(self, project: str) -> str:
87+
"""
88+
Given a project name, attempt to resolve it via curated list, RTD, or PyPI.
89+
"""
90+
logger.info(f"Attempting to resolve project from curated list: {project}")
91+
for item in self.items:
92+
if item.name == project:
93+
return item.url
94+
95+
logger.info(f"Attempting to resolve project from Internet: {project}")
96+
try:
97+
return RemoteObjectsInv(project).discover()
98+
except FileNotFoundError as ex:
99+
logger.warning(ex)
100+
101+
raise KeyError(f"Project not found: {project}")
102+
103+
def suggest(self, project: str, term: str, threshold: int = 50) -> t.List[str]:
86104
"""
87105
Find occurrences for "term" in Sphinx inventory.
88106
A wrapper around sphobjinv's `suggest`.
@@ -95,20 +113,17 @@ def suggest(self, project: str, term: str, threshold: int = 50):
95113
https://sphobjinv.readthedocs.io/en/stable/cli/suggest.html
96114
https://sphobjinv.readthedocs.io/en/stable/api/inventory.html#sphobjinv.inventory.Inventory.suggest
97115
"""
98-
for item in self.items:
99-
if item.name == project:
100-
url = f"{item.url.rstrip('/')}/objects.inv"
101-
inv = InventoryManager(url).soi_factory()
102-
results = inv.suggest(term, thresh=threshold)
103-
if results:
104-
hits = len(results)
105-
logger.info(f"{hits} hits for project/term: {project}/{term}")
106-
return results
107-
else:
108-
logger.warning(f"No hits for project/term: {project}/{term}")
109-
return []
116+
documentation_url = self.get_project_documentation_url(project)
117+
url = f"{documentation_url.rstrip('/')}/objects.inv"
118+
inv = InventoryManager(url).soi_factory()
119+
results = inv.suggest(term, thresh=threshold)
120+
if results:
121+
hits = len(results)
122+
logger.info(f"{hits} hits for project/term: {project}/{term}")
123+
return results
110124
else:
111-
raise KeyError(f"Project not found: {project}")
125+
logger.warning(f"No hits for project/term: {project}/{term}")
126+
return []
112127

113128

114129
@click.group()
@@ -155,7 +170,7 @@ def cli_suggest(ctx: click.Context, project: str, term: str, threshold: int = 50
155170
try:
156171
results = library.suggest(project, term, threshold=threshold)
157172
print("\n".join(results)) # noqa: T201
158-
except Exception as ex:
173+
except (KeyError, FileNotFoundError) as ex:
159174
logger.error(str(ex).strip("'"))
160175
sys.exit(1)
161176

linksmith/sphinx/util.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1+
import logging
2+
import re
13
from pathlib import Path
24

5+
import requests
6+
7+
logger = logging.getLogger(__name__)
8+
39

410
class LocalObjectsInv:
511
"""
@@ -27,3 +33,72 @@ def discover(cls, project_root: Path) -> Path:
2733
if path.exists():
2834
return path
2935
raise FileNotFoundError("No objects.inv found in working directory")
36+
37+
38+
class RemoteObjectsInv:
39+
"""
40+
Support discovering an `objects.inv` on Read the Docs.
41+
"""
42+
43+
HTTP_TIMEOUT = 5
44+
45+
def __init__(self, project: str):
46+
self.project = project
47+
48+
def discover(self) -> str:
49+
try:
50+
return self.discover_rtd()
51+
except FileNotFoundError:
52+
return self.discover_pypi()
53+
54+
def discover_rtd(self) -> str:
55+
logger.info(f"Attempting to resolve project through RTD: {self.project}")
56+
try:
57+
result = requests.get(
58+
"https://readthedocs.org/api/v3/search/",
59+
params={"q": f"project:{self.project} *"},
60+
timeout=self.HTTP_TIMEOUT,
61+
).json()["results"][0]
62+
except IndexError:
63+
raise FileNotFoundError(f"Project not found at Read the Docs: {self.project}")
64+
domain = result["domain"]
65+
path = result["path"]
66+
67+
# No way to discover the language slot via API?
68+
# Derive `/en/latest/` into `/en/latest/objects.inv`. (requests)
69+
# Derive `/en/stable/examples.html` into `/en/stable/objects.inv`. (requests-cache)
70+
# Derive `/genindex.html` into `/objects.inv`. (cratedb-guide)
71+
# TODO: Also handle nested URLs like `/en/latest/snippets/myst/dropdown-group.html`.
72+
path = re.sub(r"(.*)/.*\.html?$", "\\1", path)
73+
74+
rtd_url = f"{domain}/{path}"
75+
rtd_exists = requests.get(rtd_url, allow_redirects=True, timeout=self.HTTP_TIMEOUT).status_code == 200
76+
77+
if rtd_exists:
78+
return rtd_url
79+
80+
raise FileNotFoundError("No objects.inv discovered through Read the Docs")
81+
82+
def discover_pypi(self) -> str:
83+
logger.info(f"Attempting to resolve project through PyPI: {self.project}")
84+
pypi_url = f"https://pypi.org/pypi/{self.project}/json"
85+
metadata = requests.get(pypi_url, timeout=self.HTTP_TIMEOUT).json()
86+
docs_url = metadata["info"]["docs_url"]
87+
home_page = metadata["info"]["home_page"]
88+
home_page2 = metadata["info"]["project_urls"]["Homepage"]
89+
for candidate in docs_url, home_page, home_page2:
90+
if candidate is None:
91+
continue
92+
objects_inv_candidate = f"{candidate.rstrip('/')}/objects.inv"
93+
try:
94+
objects_inv_status = requests.get(
95+
objects_inv_candidate,
96+
allow_redirects=True,
97+
timeout=self.HTTP_TIMEOUT,
98+
).status_code
99+
if objects_inv_status < 400:
100+
return candidate
101+
except Exception: # noqa: S110
102+
pass
103+
104+
raise FileNotFoundError("No objects.inv discovered through PyPI")

tests/test_anansi.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,23 @@ def test_anansi_suggest_miss(cli_runner, caplog):
6969
)
7070
assert result.exit_code == 0
7171
assert "No hits for project/term: sarge/foo" in caplog.messages
72+
73+
74+
def test_anansi_suggest_via_rtd(cli_runner):
75+
result = cli_runner.invoke(
76+
cli,
77+
args="anansi suggest requests-cache patch --threshold=75",
78+
catch_exceptions=False,
79+
)
80+
assert result.exit_code == 0
81+
assert ":std:label:`patching`" in result.output
82+
83+
84+
def test_anansi_suggest_via_pypi(cli_runner):
85+
result = cli_runner.invoke(
86+
cli,
87+
args="anansi suggest beradio json",
88+
catch_exceptions=False,
89+
)
90+
assert result.exit_code == 0
91+
assert ":py:method:`beradio.message.BERadioMessage.json`" in result.output

0 commit comments

Comments
 (0)