Skip to content

Commit 2064072

Browse files
committed
route llm cookbook
1 parent c217ea7 commit 2064072

File tree

3 files changed

+1290
-1
lines changed

3 files changed

+1290
-1
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import json
2+
import os
3+
4+
from dotenv import load_dotenv
5+
from routellm.controller import Controller
6+
7+
from parea import Parea, trace, trace_insert
8+
from parea.schemas import EvaluationResult, Log, LLMInputs, Completion, Message, Role, ModelParams
9+
10+
load_dotenv()
11+
12+
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
13+
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
14+
15+
ROUTER = "mf"
16+
COST_THRESHOLD = 0.11593
17+
# This tells RouteLLM to use the MF router with a cost threshold of 0.11593
18+
RMODEL = f"router-{ROUTER}-{COST_THRESHOLD}"
19+
STRONG_MODEL = "gpt-4o"
20+
WEAK_MODEL = "groq/llama3-70b-8192"
21+
client = Controller(
22+
routers=[ROUTER],
23+
strong_model=STRONG_MODEL,
24+
weak_model=WEAK_MODEL,
25+
)
26+
p = Parea(api_key=os.getenv("PAREA_API_KEY"))
27+
p.wrap_openai_client(client)
28+
29+
questions = [
30+
{"question": "Write a function that takes a string as input and returns the string reversed."},
31+
{"question": "Write a haiku about a sunset."},
32+
{"question": "Write a cold email to a VP of Eng selling them on OpenAI's API."},
33+
{"question": "What's the largest city in Germany?"},
34+
]
35+
36+
37+
def llm_judge(log: Log) -> EvaluationResult:
38+
try:
39+
response = p.completion(
40+
data=Completion(
41+
llm_configuration=LLMInputs(
42+
model="gpt-4o-mini",
43+
messages=[
44+
Message(
45+
role=Role.user,
46+
content=f"""[Instruction]\nPlease act as an impartial judge and evaluate the quality and
47+
correctness of the response provided. Be as objective as possible. Respond in JSON with two fields: \n
48+
\t 1. score: int = a number from a scale of 0 to 5; 5 being great and 0 being bad.\n
49+
\t 2. reason: str = explain your reasoning for the selected score.\n\n
50+
This is this question asked: QUESTION:\n{log.inputs['question']}\n
51+
This is the response you are judging, RESPONSE:\n{log.output}\n\n""",
52+
)
53+
],
54+
model_params=ModelParams(response_format={"type": "json_object"}),
55+
),
56+
)
57+
)
58+
r = json.loads(response.content)
59+
return EvaluationResult(name="LLMJudge", score=int(r["score"]) / 5, reason=r["reason"])
60+
except Exception as e:
61+
return EvaluationResult(name="error-LLMJudge", score=0, reason=f"Error in grading: {e}")
62+
63+
64+
@trace(eval_funcs=[llm_judge])
65+
def answer_llm(question: str) -> str:
66+
r = client.chat.completions.create(
67+
model=RMODEL,
68+
messages=[{"role": "user", "content": f"Answer this question: {question}\n"}],
69+
)
70+
trace_insert({"metadata": {"selected_model": r.model}})
71+
return r.choices[0].message.content
72+
73+
74+
if __name__ == "__main__":
75+
p.experiment(
76+
name="RouteLLM",
77+
data=questions,
78+
func=answer_llm,
79+
metadata={
80+
"router": ROUTER,
81+
"cost_threshold": str(COST_THRESHOLD),
82+
"strong_model": STRONG_MODEL,
83+
"weak_model": WEAK_MODEL,
84+
},
85+
).run()

0 commit comments

Comments
 (0)