Skip to content

Commit 9b6b9cc

Browse files
committed
add evaluate command
1 parent fedb6bb commit 9b6b9cc

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

docs/references.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,8 @@
9696

9797
- [LangChain / MLflow](https://docs.langchain.com/oss/python/integrations/providers/mlflow_tracking)
9898
- [MLflow / Tracing LangGraph🦜🕸️](https://mlflow.org/docs/latest/genai/tracing/integrations/listing/langgraph/)
99+
- [GenAI Evaluation Quickstart](https://mlflow.org/docs/latest/genai/eval-monitor/quickstart/)
100+
101+
### [LiteLLM](https://docs.litellm.ai/)
102+
103+
- [Azure OpenAI](https://docs.litellm.ai/docs/providers/azure/)

scripts/mlflow_operator.py

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
import logging
2+
import os
23
from logging import basicConfig
34

45
import mlflow
56
import typer
67
from dotenv import load_dotenv
7-
from langchain_core.messages import HumanMessage
8+
from langchain_core.messages import HumanMessage, SystemMessage
9+
from mlflow.genai import scorer
10+
from mlflow.genai.scorers import Correctness, Guidelines
811

912
from template_langgraph.agents.demo_agents.weather_agent import graph
13+
from template_langgraph.llms.azure_openais import AzureOpenAiWrapper, Settings
1014
from template_langgraph.loggers import get_logger
1115

1216
app = typer.Typer(
@@ -32,6 +36,12 @@ def tracing(
3236
"-q",
3337
help="Query to run with the LangGraph agent",
3438
),
39+
experiment_name: str = typer.Option(
40+
"LangGraph Experiment",
41+
"--experiment-name",
42+
"-e",
43+
help="MLflow experiment name",
44+
),
3545
tracking_uri: str = typer.Option(
3646
"http://localhost:5001",
3747
"--tracking-uri",
@@ -50,7 +60,7 @@ def tracing(
5060

5161
mlflow.langchain.autolog()
5262
mlflow.set_tracking_uri(tracking_uri)
53-
mlflow.set_experiment("LangGraph Experiment")
63+
mlflow.set_experiment(experiment_name)
5464

5565
result = graph.invoke(
5666
{
@@ -68,6 +78,91 @@ def tracing(
6878
logger.info(f"Trace info: {trace.info.token_usage}")
6979

7080

81+
@app.command(
82+
help="Evaluate the LangGraph agent with MLflow tracing ref. https://mlflow.org/docs/latest/genai/eval-monitor/quickstart/"
83+
)
84+
def evaluate(
85+
experiment_name: str = typer.Option(
86+
"LangGraph Experiment",
87+
"--experiment-name",
88+
"-e",
89+
help="MLflow experiment name",
90+
),
91+
tracking_uri: str = typer.Option(
92+
"http://localhost:5001",
93+
"--tracking-uri",
94+
"-t",
95+
help="MLflow tracking URI",
96+
),
97+
verbose: bool = typer.Option(
98+
True,
99+
"--verbose",
100+
"-v",
101+
help="Enable verbose output",
102+
),
103+
):
104+
set_verbose_logging(verbose)
105+
logger.info("Running...")
106+
107+
mlflow.langchain.autolog()
108+
mlflow.set_tracking_uri(tracking_uri)
109+
mlflow.set_experiment(experiment_name)
110+
111+
llm = AzureOpenAiWrapper().chat_model
112+
113+
def qa_predict_fn(question: str) -> str:
114+
"""Simple Q&A prediction function using OpenAI"""
115+
response = llm.invoke(
116+
[
117+
SystemMessage(content="You are a helpful assistant. Answer questions concisely."),
118+
HumanMessage(content=question),
119+
]
120+
)
121+
return response.content
122+
123+
@scorer
124+
def is_concise(outputs: str) -> bool:
125+
"""Evaluate if the answer is concise (less than 5 words)"""
126+
return len(outputs.split()) <= 5
127+
128+
# To configure LiteLLM for Azure OpenAI ref. https://docs.litellm.ai/docs/providers/azure/
129+
settings = Settings()
130+
131+
os.environ["AZURE_API_KEY"] = settings.azure_openai_api_key
132+
os.environ["AZURE_API_BASE"] = settings.azure_openai_endpoint
133+
os.environ["AZURE_API_VERSION"] = settings.azure_openai_api_version
134+
os.environ["AZURE_API_TYPE"] = "azure"
135+
136+
model = f"azure:/{settings.azure_openai_model_chat}"
137+
results = mlflow.genai.evaluate(
138+
data=[
139+
{
140+
"inputs": {"question": "What is the capital of France?"},
141+
"expectations": {"expected_response": "Paris"},
142+
},
143+
{
144+
"inputs": {"question": "Who was the first person to build an airplane?"},
145+
"expectations": {"expected_response": "Wright Brothers"},
146+
},
147+
{
148+
"inputs": {"question": "Who wrote Romeo and Juliet?"},
149+
"expectations": {"expected_response": "William Shakespeare"},
150+
},
151+
],
152+
predict_fn=qa_predict_fn,
153+
scorers=[
154+
Correctness(model=model),
155+
Guidelines(
156+
model=model,
157+
name="is_english",
158+
guidelines="The answer must be in English",
159+
),
160+
is_concise,
161+
],
162+
)
163+
logger.info(f"Evaluation results: {results}")
164+
165+
71166
if __name__ == "__main__":
72167
load_dotenv(
73168
override=True,

0 commit comments

Comments
 (0)