Fix the CLI. (#16)

superdosh · web-flow · commit 703e4fb1d020 · 2025-05-30T17:23:04.000-04:00
* Fix the CLI.

* Documentation for CLI.

* CLI tests.

* Centralize CLI commands.

* Also centralize plugin loading.
diff --git a/README.md b/README.md
@@ -38,6 +38,25 @@ given docker-compose.yaml file will start up:
 1. The runs can be monitored in MLFlow wherever you have that set up. If
    local with the default setup, http://localhost:8080.
 
+## CLI
+
+You can also interact with modelplane via CLI. Run `poetry run modelplane --help`
+for more details.
+
+*Important:* You must set the `MLFLOW_TRACKING_URI` environmental variable.
+For example, if you've brought up MLFlow using the docker compose process above,
+you could run:
+```
+MLFLOW_TRACKING_URI=http://localhost:8080 poetry run modelplane get-sut-responses --sut_id {sut_id} --prompts tests/data/prompts.csv --experiment expname
+```
+After running the command, you'd see the `run_id` in the output from mlflow, 
+or you can get the `run_id` via the MLFlow UI.
+
+Then you can run annotations with:
+```
+MLFLOW_TRACKING_URI=http://localhost:8080 poetry run modelplane annotate --annotator_id {annotator_id} --experiment expname --response_run_id {run_id}
+```
+
 ## TODO
 
 - [ ] Scoring against ground truth (measurement runner functionality)
diff --git a/src/modelplane/runways/__init__.py b/src/modelplane/runways/__init__.py
@@ -0,0 +1,4 @@
+from modelgauge.load_plugins import load_plugins
+
+
+load_plugins(disable_progress_bar=True)
diff --git a/src/modelplane/runways/annotator.py b/src/modelplane/runways/annotator.py
@@ -10,7 +10,6 @@
 import tempfile
 from collections import defaultdict
 
-import click
 import jsonlines
 import mlflow
 import mlflow.artifacts
@@ -19,7 +18,6 @@
 
 from modelgauge.annotation_pipeline import ANNOTATOR_CSV_INPUT_COLUMNS
 from modelgauge.annotator_registry import ANNOTATORS
-from modelgauge.load_plugins import load_plugins
 from modelgauge.pipeline_runner import AnnotatorRunner
 
 from modelplane.runways.utils import (
@@ -28,65 +26,6 @@
     is_debug_mode,
     setup_annotator_credentials,
 )
-from modelplane.utils.env import load_from_dotenv
-
-load_plugins(disable_progress_bar=True)
-
-
-@click.command(name="annotate")
-@click.option(
-    "--annotator_id",
-    type=str,
-    required=True,
-    help="The SUT UID to use.",
-)
-@click.option(
-    "--experiment",
-    type=str,
-    required=True,
-    help="The experiment name to use. If the experiment does not exist, it will be created.",
-)
-@click.option(
-    "--response_run_id",
-    type=str,
-    required=True,
-    help="The run ID corresponding to the responses to annotate.",
-)
-@click.option(
-    "--overwrite",
-    is_flag=True,
-    default=False,
-    help="Use the response_run_id to save annotation artifact. Any existing annotation artifact will be overwritten. If not set, a new run will be created.",
-)
-@click.option(
-    "--cache_dir",
-    type=str,
-    default=None,
-    help="The cache directory. Defaults to None. Local directory used to cache LLM responses.",
-)
-@click.option(
-    "--n_jobs",
-    type=int,
-    default=1,
-    help="The number of jobs to run in parallel. Defaults to 1.",
-)
-@load_from_dotenv
-def get_annotations(
-    annotator_id: str,
-    experiment: str,
-    response_run_id: str,
-    overwrite: bool = False,
-    cache_dir: str | None = None,
-    n_jobs: int = 1,
-):
-    return annotate(
-        annotator_id=annotator_id,
-        experiment=experiment,
-        response_run_id=response_run_id,
-        overwrite=overwrite,
-        cache_dir=cache_dir,
-        n_jobs=n_jobs,
-    )
 
 
 def annotate(
diff --git a/src/modelplane/runways/responder.py b/src/modelplane/runways/responder.py
@@ -3,10 +3,8 @@
 import pathlib
 import tempfile
 
-import click
 import mlflow
 
-from modelgauge.load_plugins import load_plugins
 from modelgauge.pipeline_runner import PromptRunner
 from modelgauge.sut_registry import SUTS
 
@@ -16,60 +14,6 @@
     is_debug_mode,
     setup_sut_credentials,
 )
-from modelplane.utils.env import load_from_dotenv
-
-load_plugins(disable_progress_bar=True)
-
-
-@click.command(name="get-responses")
-@click.option(
-    "--sut_id",
-    type=str,
-    required=True,
-    help="The SUT UID to use.",
-)
-@click.option(
-    "--prompts",
-    type=str,
-    required=True,
-    help="The path to the input prompts file.",
-)
-@click.option(
-    "--experiment",
-    type=str,
-    required=True,
-    help="The experiment name to use. If the experiment does not exist, it will be created.",
-)
-@click.option(
-    "--cache_dir",
-    type=str,
-    default=None,
-    help="The cache directory. Defaults to None. Local directory used to cache LLM responses.",
-)
-@click.option(
-    "--n_jobs",
-    type=int,
-    default=1,
-    help="The number of jobs to run in parallel. Defaults to 1.",
-)
-@load_from_dotenv
-def get_sut_responses(
-    sut_id: str,
-    prompts: str,
-    experiment: str,
-    cache_dir: str | None = None,
-    n_jobs: int = 1,
-):
-    """
-    Run the pipeline to get responses from SUTs.
-    """
-    return respond(
-        sut_id=sut_id,
-        prompts=prompts,
-        experiment=experiment,
-        cache_dir=cache_dir,
-        n_jobs=n_jobs,
-    )
 
 
 def respond(
diff --git a/src/modelplane/runways/run.py b/src/modelplane/runways/run.py
@@ -0,0 +1,122 @@
+import click
+
+
+from modelplane.runways.annotator import annotate
+from modelplane.runways.responder import respond
+from modelplane.utils.env import load_from_dotenv
+
+
+@click.group(name="modelplane")
+def cli():
+    pass
+
+
+@cli.command(name="get-sut-responses")
+@click.option(
+    "--sut_id",
+    type=str,
+    required=True,
+    help="The SUT UID to use.",
+)
+@click.option(
+    "--prompts",
+    type=str,
+    required=True,
+    help="The path to the input prompts file.",
+)
+@click.option(
+    "--experiment",
+    type=str,
+    required=True,
+    help="The experiment name to use. If the experiment does not exist, it will be created.",
+)
+@click.option(
+    "--cache_dir",
+    type=str,
+    default=None,
+    help="The cache directory. Defaults to None. Local directory used to cache LLM responses.",
+)
+@click.option(
+    "--n_jobs",
+    type=int,
+    default=1,
+    help="The number of jobs to run in parallel. Defaults to 1.",
+)
+@load_from_dotenv
+def get_sut_responses(
+    sut_id: str,
+    prompts: str,
+    experiment: str,
+    cache_dir: str | None = None,
+    n_jobs: int = 1,
+):
+    """
+    Run the pipeline to get responses from SUTs.
+    """
+    return respond(
+        sut_id=sut_id,
+        prompts=prompts,
+        experiment=experiment,
+        cache_dir=cache_dir,
+        n_jobs=n_jobs,
+    )
+
+
+@cli.command(name="annotate")
+@click.option(
+    "--annotator_id",
+    type=str,
+    required=True,
+    help="The SUT UID to use.",
+)
+@click.option(
+    "--experiment",
+    type=str,
+    required=True,
+    help="The experiment name to use. If the experiment does not exist, it will be created.",
+)
+@click.option(
+    "--response_run_id",
+    type=str,
+    required=True,
+    help="The run ID corresponding to the responses to annotate.",
+)
+@click.option(
+    "--overwrite",
+    is_flag=True,
+    default=False,
+    help="Use the response_run_id to save annotation artifact. Any existing annotation artifact will be overwritten. If not set, a new run will be created.",
+)
+@click.option(
+    "--cache_dir",
+    type=str,
+    default=None,
+    help="The cache directory. Defaults to None. Local directory used to cache LLM responses.",
+)
+@click.option(
+    "--n_jobs",
+    type=int,
+    default=1,
+    help="The number of jobs to run in parallel. Defaults to 1.",
+)
+@load_from_dotenv
+def get_annotations(
+    annotator_id: str,
+    experiment: str,
+    response_run_id: str,
+    overwrite: bool = False,
+    cache_dir: str | None = None,
+    n_jobs: int = 1,
+):
+    return annotate(
+        annotator_id=annotator_id,
+        experiment=experiment,
+        response_run_id=response_run_id,
+        overwrite=overwrite,
+        cache_dir=cache_dir,
+        n_jobs=n_jobs,
+    )
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/tests/it/test_cli.py b/tests/it/test_cli.py
@@ -0,0 +1,40 @@
+from click.testing import CliRunner
+
+from modelplane.runways.run import cli
+
+
+def test_main_help():
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "--help",
+        ],
+    )
+    assert result.exit_code == 0
+    assert "get-sut-responses" in result.output
+    assert "annotate" in result.output
+
+
+def test_get_sut_responses_help():
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "get-sut-responses",
+            "--help",
+        ],
+    )
+    assert result.exit_code == 0
+
+
+def test_annotate_help():
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "annotate",
+            "--help",
+        ],
+    )
+    assert result.exit_code == 0
diff --git a/tests/it/test_health.py b/tests/it/test_health.py
@@ -1,6 +1,5 @@
 # Ensures the mlflow tracking server is live.
 
-import mlflow
 from modelplane.mlflow.health import tracking_server_is_live
 
 

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +from modelgauge.load_plugins import load_plugins
++
++
 +load_plugins(disable_progress_bar=True)
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,5 @@`
`1`	`1`	`# Ensures the mlflow tracking server is live.`
`2`	`2`
`3`		`-import mlflow`
`4`	`3`	`from modelplane.mlflow.health import tracking_server_is_live`
`5`	`4`
`6`	`5`