1313from rich .table import Table
1414from rich .text import Text
1515from rich .panel import Panel
16+ from rich .spinner import Spinner
17+ from rich .live import Live
1618from .project .core import Project
1719from .utils import console
1820
@@ -265,6 +267,7 @@ async def run_experiments(
265267 input_data_class : type ,
266268 baseline_name : Optional [str ] = None ,
267269 metrics : str = None ,
270+ name : Optional [str ] = None ,
268271):
269272 """Run experiments using ragas dataset system."""
270273 console .print (f"Getting dataset: { dataset_name } " )
@@ -280,7 +283,7 @@ async def run_experiments(
280283
281284 # Run the experiment using the run_async method
282285 try :
283- experiment_result = await experiment_func .run_async (dataset )
286+ experiment_result = await experiment_func .run_async (dataset , name = name )
284287 success ("✓ Completed experiments successfully" )
285288 except Exception as e :
286289 error (f"Error running experiments: { e } " )
@@ -373,6 +376,9 @@ def evals(
373376 baseline : Optional [str ] = typer .Option (
374377 None , "--baseline" , help = "Baseline experiment name to compare against"
375378 ),
379+ name : Optional [str ] = typer .Option (
380+ None , "--name" , help = "Name of the experiment run"
381+ ),
376382):
377383 """Run evaluations on a dataset."""
378384 console .print (f"Running evaluation: { eval_file } " )
@@ -428,7 +434,13 @@ def evals(
428434 # Run the experiments
429435 asyncio .run (
430436 run_experiments (
431- project , experiment_func , dataset , input_data_class , baseline , metrics
437+ project ,
438+ experiment_func ,
439+ dataset ,
440+ input_data_class ,
441+ baseline ,
442+ metrics ,
443+ name ,
432444 )
433445 )
434446 success ("✓ Evaluation completed successfully" )
@@ -439,5 +451,169 @@ def evals(
439451 raise typer .Exit (1 )
440452
441453
454+ @app .command ()
455+ def hello_world (
456+ directory : Optional [str ] = typer .Argument (
457+ "." , help = "Directory to run the hello world example in"
458+ ),
459+ ):
460+ import pandas as pd
461+ import os
462+ import time
463+
464+ if not os .path .exists (directory ):
465+ raise typer .Exit (f"Directory { directory } does not exist." )
466+
467+ with Live (
468+ Spinner ("dots" , text = "Creating hello world example..." , style = "green" ),
469+ console = console ,
470+ ) as live :
471+ live .update (Spinner ("dots" , text = "Creating directories..." , style = "green" ))
472+ Path (directory ).joinpath ("hello_world" ).mkdir (parents = True , exist_ok = True )
473+ os .makedirs (os .path .join (directory , "hello_world" , "datasets" ), exist_ok = True )
474+ os .makedirs (
475+ os .path .join (directory , "hello_world" , "experiments" ), exist_ok = True
476+ )
477+ time .sleep (0.5 ) # Brief pause to show spinner
478+
479+ live .update (Spinner ("dots" , text = "Creating test dataset..." , style = "green" ))
480+ hello_world_data = [
481+ {
482+ "id" : 1 ,
483+ "query" : "What is the capital of France?" ,
484+ "expected_output" : "Paris" ,
485+ },
486+ {"id" : 2 , "query" : "What is 2 + 2?" , "expected_output" : "4" },
487+ {
488+ "id" : 3 ,
489+ "query" : "What is the largest mammal?" ,
490+ "expected_output" : "Blue Whale" ,
491+ },
492+ {
493+ "id" : 4 ,
494+ "query" : "Who developed the theory of relativity?" ,
495+ "expected_output" : "Einstein" ,
496+ },
497+ {
498+ "id" : 5 ,
499+ "query" : "What is the programming language used for data science?" ,
500+ "expected_output" : "Python" ,
501+ },
502+ {
503+ "id" : 6 ,
504+ "query" : "What is the highest mountain in the world?" ,
505+ "expected_output" : "Mount Everest" ,
506+ },
507+ {
508+ "id" : 7 ,
509+ "query" : "Who wrote 'Romeo and Juliet'?" ,
510+ "expected_output" : "Shakespeare" ,
511+ },
512+ {
513+ "id" : 8 ,
514+ "query" : "What is the fourth planet from the Sun?" ,
515+ "expected_output" : "Mars" ,
516+ },
517+ {
518+ "id" : 9 ,
519+ "query" : "What is the name of the fruit that keeps the doctor away?" ,
520+ "expected_output" : "Apple" ,
521+ },
522+ {
523+ "id" : 10 ,
524+ "query" : "Who painted the Mona Lisa?" ,
525+ "expected_output" : "Leonardo da Vinci" ,
526+ },
527+ ]
528+ df = pd .DataFrame (hello_world_data )
529+ df .to_csv (
530+ os .path .join (directory , "hello_world" , "datasets" , "test_data.csv" ),
531+ index = False ,
532+ )
533+ time .sleep (0.5 ) # Brief pause to show spinner
534+
535+ live .update (
536+ Spinner ("dots" , text = "Creating evaluation script..." , style = "green" )
537+ )
538+ # Create evals.py file
539+ evals_content = '''import typing as t
540+
541+ import numpy as np
542+ from ragas_experimental import BaseModel, Project
543+ from ragas_experimental.project.backends import LocalCSVProjectBackend
544+ from ragas_experimental.metric.result import MetricResult
545+ from ragas_experimental.metric.numeric import numeric_metric
546+
547+ p = Project(
548+ project_id="hello_world",
549+ project_backend=LocalCSVProjectBackend("."),
550+ )
551+
552+
553+ @numeric_metric(name="accuracy_score", range=(0, 1))
554+ def accuracy_score(response: str, expected: str):
555+ """
556+ Is the response a good response to the query?
557+ """
558+ result = 1 if expected.lower().strip() == response.lower().strip() else 0
559+ return MetricResult(
560+ result=result,
561+ reason=(
562+ f"Response contains {expected}"
563+ if result
564+ else f"Response does not contain {expected}"
565+ ),
566+ )
567+
568+
569+ def mock_app_endpoint(**kwargs) -> str:
570+ """Mock AI endpoint for testing purposes."""
571+ mock_responses = [
572+ "Paris","4","Blue Whale","Einstein","Python","Mount Everest","Shakespeare",
573+ "Mars","Apple","Leonardo da Vinci",]
574+ return np.random.choice(mock_responses)
575+
576+
577+ class TestDataRow(BaseModel):
578+ id: t.Optional[int]
579+ query: str
580+ expected_output: str
581+
582+
583+ class ExperimentDataRow(TestDataRow):
584+ response: str
585+ accuracy: int
586+ accuracy_reason: t.Optional[str] = None
587+
588+
589+ @p.experiment(ExperimentDataRow)
590+ async def run_experiment(row: TestDataRow):
591+ response = mock_app_endpoint(query=row.query)
592+ accuracy = accuracy_score.score(response=response, expected=row.expected_output)
593+
594+ experiment_view = ExperimentDataRow(
595+ **row.model_dump(),
596+ response=response,
597+ accuracy=accuracy.result,
598+ accuracy_reason=accuracy.reason,
599+ )
600+ return experiment_view
601+ '''
602+
603+ evals_path = os .path .join (directory , "hello_world" , "evals.py" )
604+ with open (evals_path , "w" ) as f :
605+ f .write (evals_content )
606+ time .sleep (0.5 ) # Brief pause to show spinner
607+
608+ live .update (Spinner ("dots" , text = "Finalizing hello world example..." ))
609+ time .sleep (0.5 ) # Brief pause to show spinner
610+
611+ hello_world_path = os .path .join (directory , "hello_world" )
612+ success (f"✓ Created hello world example in { hello_world_path } " )
613+ success (
614+ "✓ You can now run: ragas evals hello_world/evals.py --dataset test_data --metrics accuracy"
615+ )
616+
617+
442618if __name__ == "__main__" :
443619 app ()
0 commit comments