Skip to content

Commit a7db1d1

Browse files
authored
feat!: overhaul CLI (#230)
* implement GenomicMedLab/software-templates#83 * update docs
1 parent f516e85 commit a7db1d1

File tree

11 files changed

+519
-290
lines changed

11 files changed

+519
-290
lines changed

docs/source/conf.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"sphinx_copybutton",
2222
"sphinx.ext.autosummary",
2323
"sphinx_github_changelog",
24+
"sphinx_click",
2425
]
2526

2627
templates_path = ["_templates"]
@@ -77,3 +78,106 @@ def linkcode_resolve(domain, info):
7778
# -- code block style --------------------------------------------------------
7879
pygments_style = "default"
7980
pygements_dark_style = "monokai"
81+
82+
83+
# -- sphinx-click ------------------------------------------------------------
84+
# These functions let us write descriptions/docstrings in a way that doesn't look
85+
# weird in the Click CLI, but get additional formatting in the sphinx-click autodocs for
86+
# better readability.
87+
import re
88+
89+
from click.core import Context
90+
from sphinx.application import Sphinx
91+
from sphinx_click.ext import _get_usage, _indent
92+
93+
94+
CMD_PATTERN = r"--[^ ]+"
95+
STR_PATTERN = r"\"[^ ]+\""
96+
SNAKE_PATTERN = r"[A-Z]+_[A-Z_]*[A-Z][., ]"
97+
98+
99+
def _add_formatting_to_string(line: str) -> str:
100+
"""Add fixed-width code formatting to span sections in lines:
101+
102+
* shell options, eg `--update_all`
103+
* double-quoted strings, eg `"DO"`
104+
* all caps SNAKE_CASE env vars, eg `DISEASE_NORM_REMOTE_DB_URL`
105+
"""
106+
for pattern in (CMD_PATTERN, STR_PATTERN, SNAKE_PATTERN):
107+
line = re.sub(pattern, lambda x: f"``{x.group()}``", line)
108+
return line
109+
110+
111+
def process_description(app: Sphinx, ctx: Context, lines: list[str]):
112+
"""Add custom formatting to sphinx-click autodoc descriptions.
113+
114+
* remove :param: :return: etc
115+
* add fixed-width (code) font to certain words
116+
* add code block formatting to example shell commands
117+
* move primary usage example to the top of the description
118+
119+
Because we have to modify the lines list in place, we have to make multiple passes
120+
through it to format everything correctly.
121+
"""
122+
if not lines:
123+
return
124+
125+
# chop off params
126+
param_boundary = None
127+
for i, line in enumerate(lines):
128+
if ":param" in line:
129+
param_boundary = i
130+
break
131+
if param_boundary is not None:
132+
del lines[param_boundary:]
133+
lines[-1] = ""
134+
135+
# add code formatting to strings, commands, and env vars
136+
lines_to_fmt = []
137+
for i, line in enumerate(lines):
138+
if line.startswith((" ", ">>> ", "|")):
139+
continue # skip example code blocks
140+
if any(
141+
[
142+
re.findall(CMD_PATTERN, line),
143+
re.findall(STR_PATTERN, line),
144+
re.findall(SNAKE_PATTERN, line),
145+
]
146+
):
147+
lines_to_fmt.append(i)
148+
for line_num in lines_to_fmt:
149+
lines[line_num] = _add_formatting_to_string(lines[line_num])
150+
151+
# add code block formatting to example console commands
152+
for i in range(len(lines) - 1, -1, -1):
153+
if lines[i].startswith((" ", "| ")):
154+
if lines[i].startswith("| "):
155+
lines[i] = lines[i][3:]
156+
if (i == 0 or lines[i - 1] == "\b" or lines[i - 1] == ""):
157+
lines.insert(i, "")
158+
lines.insert(i, ".. code-block:: console")
159+
160+
# put usage at the top of the description
161+
lines.insert(0, "")
162+
for usage_line in _get_usage(ctx).splitlines()[::-1]:
163+
lines.insert(0, _indent(usage_line))
164+
lines.insert(0, "")
165+
lines.insert(0, ".. code-block:: shell")
166+
167+
168+
def process_option(app: Sphinx, ctx: Context, lines: list[str]):
169+
"""Add fixed-width formatting to strings in sphinx-click autodoc options."""
170+
for i, line in enumerate(lines):
171+
if re.findall(STR_PATTERN, line):
172+
lines[i] = re.sub(STR_PATTERN, lambda x: f"``{x.group()}``", line)
173+
174+
175+
def setup(app):
176+
"""Used to hook format customization into sphinx-click build.
177+
178+
In particular, since we move usage to the top of the command description, we need
179+
an extra hook here to silence the built-in usage section.
180+
"""
181+
app.connect("sphinx-click-process-description", process_description)
182+
app.connect("sphinx-click-process-options", process_option)
183+
app.connect("sphinx-click-process-usage", lambda app, ctx, lines: lines.clear())

docs/source/managing_data/loading_and_updating_data.rst

Lines changed: 5 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,55 +3,12 @@
33
Loading and updating data
44
=========================
55

6+
The primary means of managing Disease Normalizer data is via the included command-line interface.
7+
68
.. note::
79

810
See the :ref:`ETL API documentation<etl-api>` for information on programmatic access to the data loader classes.
911

10-
Full load/reload
11-
----------------
12-
13-
Calling the Disease Normalizer update command with the ``--update_all`` and ``--update_merged`` flags will delete all existing data, fetch new source data if available, and then perform a complete reload of the database (including merged records):
14-
15-
.. code-block:: shell
16-
17-
disease_norm_update --update_all --update_merged
18-
19-
20-
Reload individual source
21-
------------------------
22-
23-
To update specific sources, call the ``--sources`` option with one or more source name(s) quoted and separated by spaces. While it is possible to update individual source data without also updating the normalized record data, that may affect the proper function of the normalized query endpoints, so it is recommended to include the ``--update_merged`` flag as well.
24-
25-
.. code-block:: shell
26-
27-
disease_norm_update --sources="HGNC NCBI" --update_merged
28-
29-
30-
Use local data
31-
--------------
32-
33-
The Disease Normalizer will fetch the latest available data from all sources if local data is out-of-date. To suppress this and force usage of local files, use the `--use_existing` flag:
34-
35-
.. code-block:: shell
36-
37-
disease_norm_update --update_all --use_existing
38-
39-
40-
Check DB health
41-
---------------
42-
43-
The shell command ``disease_norm_check_db`` performs a basic check on the database status. It first confirms that the database's schema exists, and then identifies whether metadata is available for each source, and whether disease record and normalized concept tables are non-empty. Check the process's exit code for the result (per the UNIX standard, ``0`` means success, and any other return code means failure).
44-
45-
.. code-block:: console
46-
47-
$ disease_norm_check_db
48-
$ echo $?
49-
1 # indicates failure
50-
51-
This command is equivalent to the combination of the database classes' ``check_schema_initialized`` and ``check_tables_populated`` methods:
52-
53-
.. code-block:: python
54-
55-
from disease.database import create_db
56-
db = create_db()
57-
db_is_healthy = db.check_schema_initialized() and db.check_tables_populated()
12+
.. click:: disease.cli:cli
13+
:prog: disease-normalizer
14+
:nested: full

docs/source/managing_data/postgresql.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,18 @@ Once created, set the environment variable ``DISEASE_NORM_DB_URL`` to a connecti
2424
Load from remote source
2525
--------------------------------
2626

27-
The Disease Normalizer's PostgreSQL class provides the ``disease_norm_update_remote`` shell command to refresh its data directly from a remotely-stored SQL dump, instead of acquiring, transforming, and loading source data. This enables data loading on the order of seconds rather than hours. See the command description at ``disease_norm_update_remote --help`` for more information.
27+
The Disease Normalizer's PostgreSQL class supports a CLI command to refresh its data directly from a remotely-stored SQL dump, instead of acquiring, transforming, and loading source data. This enables data loading on the order of seconds rather than hours. See the command description at ``disease-normalizer update-from-remote --help`` for more information.
2828

2929
By default, this command will fetch the `latest data dump <https://vicc-normalizers.s3.us-east-2.amazonaws.com/disease_normalization/postgresql/disease_norm_latest.sql.tar.gz>`_ provided by the VICC. Alternative URLs can be set with the ``--data_url`` option: ::
3030

31-
disease_norm_update_remote --data_url=https://vicc-normalizers.s3.us-east-2.amazonaws.com/disease_normalization/postgresql/disease_norm_20230322163523.sql.tar.gz
31+
disease-normalizer update-from-remote --data_url=https://vicc-normalizers.s3.us-east-2.amazonaws.com/disease_normalization/postgresql/disease_norm_20230322163523.sql.tar.gz
3232

3333

3434
Create SQL dump from database
3535
-----------------------------
3636

37-
The Disease Normalizer's PostgreSQL class also provides the ``disease_norm_dump`` shell command to create a SQL dump of current data into a file. This command will create a file named ``disease_norm_YYYYMMDDHHmmss.sql`` in the current directory; the ``-o`` option can be used to specify an alternate location, like so: ::
37+
The Disease Normalizer's PostgreSQL class also supports a CLI command to create a SQL dump of current data into a file. This command will create a file named ``disease_norm_YYYYMMDDHHmmss.sql`` in the current directory; the ``-o`` option can be used to specify an alternate location, like so: ::
3838

39-
disease_norm_dump -o ~/.disease_data/
39+
disease-normalizer dump-database -o ~/.disease_data/
4040

41-
See ``disease_norm_dump --help`` for more information.
41+
See ``disease-normalizer dump-database --help`` for more information.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
disease.etl.update
2+
==================
3+
4+
.. automodule:: disease.etl.update
5+
:members:
6+
:undoc-members:
7+
:special-members: __init__
8+
:inherited-members:
9+
:exclude-members: model_fields, model_config

docs/source/reference/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ ETL Modules
3737
:template: module_summary_inh.rst
3838

3939
disease.etl.base
40+
disease.etl.update
4041
disease.etl.do
4142
disease.etl.mondo
4243
disease.etl.ncit

pyproject.toml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ docs = [
5151
"sphinxext-opengraph==0.8.2",
5252
"furo==2023.3.27",
5353
"gravis==0.1.0",
54-
"sphinx-github-changelog==1.2.1"
54+
"sphinx-github-changelog==1.2.1",
55+
"sphinx-click==5.0.1",
5556
]
5657

5758
[project.urls]
@@ -62,10 +63,7 @@ Source = "https://github.com/cancervariants/disease-normalization"
6263
"Bug Tracker" = "https://github.com/cancervariants/disease-normalization/issues"
6364

6465
[project.scripts]
65-
disease_norm_update = "disease.cli:update_db"
66-
disease_norm_update_remote = "disease.cli:update_from_remote"
67-
disease_norm_dump = "disease.cli:dump_database"
68-
disease_norm_check_db = "disease.cli:check_db"
66+
disease-normalizer = "disease.cli:cli"
6967

7068
[build-system]
7169
requires = ["setuptools>=64", "setuptools_scm>=8"]
@@ -88,7 +86,7 @@ branch = true
8886

8987
[tool.ruff]
9088
src = ["src"]
91-
exclude = ["docs/source/conf.py", "analysis/"]
89+
extend-exclude = ["docs/source/conf.py", "analysis/"]
9290

9391
[tool.ruff.lint]
9492
select = [

0 commit comments

Comments
 (0)