feat: migrate CLI from experimental to main package (#2183)

NirantK · web-flow · commit a97d11329a19 · 2025-08-17T17:36:30.000+05:30
## Key Changes
- Migrated CLI from `ragas.experimental.cli` to `ragas.cli` with updated
imports
- Added main `ragas` CLI command entry point, removed deprecated
`ragas-experimental` command
- Added Rich console to main utils and organized CLI dependencies in
pyproject.toml
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -7,8 +7,9 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 Ragas is an evaluation toolkit for Large Language Model (LLM) applications. It provides objective metrics for evaluating LLM applications, test data generation capabilities, and integrations with popular LLM frameworks.
 
 The repository contains:
+
 1. **Ragas Library** - The main evaluation toolkit including experimental features (in `/ragas` directory)
-   - Core evaluation metrics and test generation 
+   - Core evaluation metrics and test generation
    - Experimental features available at `ragas.experimental`
 
 ## Development Environment Setup
@@ -139,6 +140,7 @@ The repository has the following structure:
 The Ragas core library provides metrics, test data generation and evaluation functionality for LLM applications:
 
 1. **Metrics** - Various metrics for evaluating LLM applications including:
+
    - AspectCritic
    - AnswerCorrectness
    - ContextPrecision
@@ -156,10 +158,11 @@ The experimental features are now integrated into the main ragas package:
 
 1. **Experimental features** are available at `ragas.experimental`
 2. **Dataset and Experiment management** - Enhanced data handling for experiments
-3. **Advanced metrics** - Extended metric capabilities  
+3. **Advanced metrics** - Extended metric capabilities
 4. **Backend support** - Multiple storage backends (CSV, JSONL, Google Drive, in-memory)
 
 To use experimental features:
+
 ```python
 from ragas.experimental import Dataset
 from ragas import experiment
@@ -191,5 +194,5 @@ analytics_logger.addHandler(console_handler)
 
 ## Memories
 
-- whenever you create such docs put in in /_experiments because that is gitignored and you can use it as a scratchpad or tmp directory for storing these
+- whenever you create such docs put in in /\_experiments because that is gitignored and you can use it as a scratchpad or tmp directory for storing these
 - always use uv to run python and python related commandline tools like isort, ruff, pyright ect. This is because we are using uv to manage the .venv and dependencies.
diff --git a/ragas/pyproject.toml b/ragas/pyproject.toml
@@ -17,15 +17,17 @@ dependencies = [
     "langchain-community",
     "langchain_openai",
     
+    # CLI dependencies
+    "typer",
+    "rich",
+    
     # LLM providers
     "openai>=1.0.0",
     
-    # CLI and experimental features
+    # Experimental features
     "tqdm",
     "instructor",
     "gitpython",
-    "typer",
-    "rich",
     "pillow>=10.4.0",
 ]
 dynamic = ["version", "readme"]
@@ -63,7 +65,7 @@ test = []
 "gdrive" = "ragas.backends.gdrive_backend:GDriveBackend"
 
 [project.scripts]
-ragas-experimental = "ragas.experimental.cli:app"
+ragas = "ragas.cli:app"
 
 [tool.setuptools]
 package-dir = {"" = "src"}
diff --git a/ragas/src/ragas/__init__.py b/ragas/src/ragas/__init__.py
@@ -1,14 +1,10 @@
+from ragas import backends
 from ragas.cache import CacheInterface, DiskCacheBackend, cacher
 from ragas.dataset_schema import EvaluationDataset, MultiTurnSample, SingleTurnSample
 from ragas.evaluation import evaluate
 from ragas.experiment import Experiment, experiment, version_experiment
 from ragas.run_config import RunConfig
 
-# Backend imports
-from ragas import backends
-
-# Backend imports
-
 try:
     from ._version import version as __version__
 except ImportError:
diff --git a/ragas/src/ragas/cli.py b/ragas/src/ragas/cli.py
@@ -16,8 +16,8 @@
 from rich.spinner import Spinner
 from rich.live import Live
 
-# from .project.core import Project  # TODO: Project module not implemented yet
-from .utils import console
+# from ragas.experimental.project.core import Project  # TODO: Project module not implemented yet
+from ragas.utils import console
 
 
 app = typer.Typer(help="Ragas CLI for running LLM evaluations")
diff --git a/ragas/src/ragas/utils.py b/ragas/src/ragas/utils.py
@@ -14,6 +14,7 @@
 import numpy as np
 import tiktoken
 from datasets import Dataset
+from rich.console import Console
 
 if t.TYPE_CHECKING:
     from ragas.metrics.base import Metric
@@ -308,6 +309,9 @@ def utc_converter(timestamp):
 
 base_logger = set_logging_level()
 
+# Rich console instance for CLI and other formatting needs
+console = Console()
+
 
 class MemorableNames:
     """Generator for memorable, unique names for experiments and datasets."""
diff --git a/ragas/tests/unit/test_cli.py b/ragas/tests/unit/test_cli.py
@@ -0,0 +1,39 @@
+"""Tests for the Ragas CLI module."""
+
+from typer.testing import CliRunner
+from ragas.cli import app
+
+
+def test_cli_help():
+    """Test that the CLI help command works."""
+    runner = CliRunner()
+    result = runner.invoke(app, ["--help"])
+    assert result.exit_code == 0
+    assert "Ragas CLI for running LLM evaluations" in result.stdout
+
+
+def test_hello_world_help():
+    """Test that the hello-world help command works."""
+    runner = CliRunner()
+    result = runner.invoke(app, ["hello-world", "--help"])
+    assert result.exit_code == 0
+    assert "Directory to run the hello world example in" in result.stdout
+
+
+def test_evals_help():
+    """Test that the evals help command works."""
+    runner = CliRunner()
+    result = runner.invoke(app, ["evals", "--help"])
+    assert result.exit_code == 0
+    assert "Run evaluations on a dataset" in result.stdout
+
+
+if __name__ == "__main__":
+    print("Running CLI tests...")
+    test_cli_help()
+    print("✓ CLI help test passed")
+    test_hello_world_help()
+    print("✓ Hello world help test passed")
+    test_evals_help()
+    print("✓ Evals help test passed")
+    print("All CLI tests passed!")
diff --git a/ragas/tests/unit/test_langgraph.py b/ragas/tests/unit/test_langgraph.py
@@ -1,5 +1,5 @@
 import json
-from typing import List, Union
+from typing import List, Union, cast
 
 import pytest
 from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
@@ -111,9 +111,10 @@ def test_unsupported_message_type():
     class CustomMessage:
         content = "test"
 
-    messages: List[Union[HumanMessage, SystemMessage, AIMessage, ToolMessage]] = [
-        CustomMessage()
-    ]  # type: ignore
+    messages = cast(
+        List[Union[HumanMessage, SystemMessage, AIMessage, ToolMessage]],
+        [CustomMessage()],
+    )
 
     with pytest.raises(ValueError) as exc_info:
         convert_to_ragas_messages(messages)