Skip to content

Commit f776ade

Browse files
committed
Instructions for lab 16
1 parent 61c5aeb commit f776ade

File tree

2 files changed

+222
-33
lines changed

2 files changed

+222
-33
lines changed

agent.py

Lines changed: 46 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pydantic import BaseModel
77
from config import settings
88
from agents import Agent, Runner, SQLiteSession, function_tool, set_default_openai_key
9+
from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX
910

1011
db = {
1112
"job_descriptions": {
@@ -26,7 +27,7 @@ def extract_skills(session_id: str, job_id: int) -> list[str]:
2627
job_description = db["job_descriptions"][job_id]
2728
skills = ["Python", "SQL", "System Design"]
2829
db["state"][session_id]["skills"] = skills
29-
print(f"\n📋 Extracted skills: {', '.join(skills)}")
30+
print(f"Extracted skills: {skills}")
3031
return skills
3132

3233
@function_tool
@@ -41,14 +42,24 @@ def update_evaluation(session_id: str, skill: str, evaluation_result: bool) -> b
4142
except KeyError:
4243
return False
4344

44-
@function_tool
45-
def transfer_to_skill_evaluator(session_id: str, skill: str) -> bool:
46-
"""This function takes a skill, evaluates it and returns the evaluation result for the skill as a boolean pass / fail"""
47-
result = True
48-
print(f"Evaluating skill: {skill}. Result {result}")
49-
return result
45+
@function_tool
46+
def get_next_skill_to_evaluate(session_id: str) -> str | None:
47+
"""Retrieve the next skill to evaluate. Returns None if there are no more skills to evaluate"""
48+
all_skills = db["state"][session_id]["skills"]
49+
evaluated = db["state"][session_id]["evaluation"]
50+
evaluated_skills = [item[0] for item in evaluated]
51+
remaining_skills = set(all_skills) - set(evaluated_skills)
52+
try:
53+
next_skill = remaining_skills.pop()
54+
print("NEXT SKILL TOOL", next_skill)
55+
return next_skill
56+
except KeyError:
57+
print("No more skills")
58+
return None
5059

5160
ORCHESTRATOR_SYSTEM_PROMPT = """
61+
{RECOMMENDED_PROMPT_PREFIX}
62+
5263
You are an interview orchestrator. Your goal is to evaluate the candidate on the required skills.
5364
5465
# INSTRUCTIONS
@@ -57,9 +68,10 @@ def transfer_to_skill_evaluator(session_id: str, skill: str) -> bool:
5768
5869
1. Extract key skills from the job description using extract_skills tool
5970
2. Then welcome the candidate, explain the screening process and ask the candidate if they are ready
60-
3. Then, for EACH skill in the list, use transfer_to_skill_evaluator tool to delegate evaluation
71+
3. Then, use the get_next_skill_to_evaluate tool to get the skill to evaluate
72+
4. If the skill is not `None` then hand off to the "Skills Evaluator Agent" to perform the evaluation. Pass in the skill to evaluate
6173
4. Once you get the response, use the update_evaluation tool to save the evaluation result into the database
62-
5. Once all skills are evaluated, mention that the screening is complete and thank the candidate for their time
74+
5. Once get_next_skill_to_evaluate returns `None`, return a json with a single field `status` set to "done" to indicate completion
6375
"""
6476

6577
ORCHESTRATOR_USER_PROMPT = """
@@ -71,21 +83,6 @@ def transfer_to_skill_evaluator(session_id: str, skill: str) -> bool:
7183
Begin by welcoming the applicant, extracting the key skills, then evaluate each one.
7284
"""
7385

74-
def run_orchestrator_agent(session_id, job_id):
75-
session = SQLiteSession(f"screening-{session_id}")
76-
agent = Agent(
77-
name="Interview Orchestrator Agent",
78-
instructions=ORCHESTRATOR_SYSTEM_PROMPT,
79-
model="gpt-5.1",
80-
tools=[extract_skills, transfer_to_skill_evaluator, update_evaluation]
81-
)
82-
user_input = ORCHESTRATOR_USER_PROMPT.format(job_id=job_id, session_id=session_id)
83-
while user_input != 'bye':
84-
result = Runner.run_sync(agent, user_input, session=session)
85-
print(result.final_output)
86-
user_input = input("User: ")
87-
return
88-
8986
question_bank = {
9087
"python": {
9188
"easy": [
@@ -179,6 +176,8 @@ def check_answer(skill:str, question: str, answer: str) -> Tuple[bool, str]:
179176
return result.model_dump_json()
180177

181178
EVALUATION_SYSTEM_PROMPT = """
179+
{RECOMMENDED_PROMPT_PREFIX}
180+
182181
You are a specialised skill evaluator. Your job is to evaluate the candidate's proficiency in a given skill
183182
184183
1. Identify which skill you're evaluating (it will be mentioned in the conversation)
@@ -189,11 +188,14 @@ def check_answer(skill:str, question: str, answer: str) -> Tuple[bool, str]:
189188
- If the check_answer tool returned incorrect, choose the lower difficulty, without going below 'easy'
190189
- Stop after 3 questions MAXIMUM
191190
5. If the correctly answered two of the three questions, then they pass, otherwise they fail
191+
6. After completion of 3 questions, hand off to the "Interview Orchestrator Agent" passing in the result of the evaluation
192+
193+
# DECISION RULES:
192194
193-
DECISION RULES:
194-
- Maximum 3 questions per skill
195+
- Do not give feedback on the user's answer. Always proceed to the next question
196+
- 3 questions per skill
195197
196-
OUTPUT:
198+
# OUTPUT:
197199
198200
After the evaluation is complete, return the pass/fail in a json object with the following properties
199201
- result: true or false
@@ -203,25 +205,36 @@ def check_answer(skill:str, question: str, answer: str) -> Tuple[bool, str]:
203205
Evaluate the user on the following skill: {skill}
204206
"""
205207

206-
def run_evaluation_agent(session_id, skill):
208+
def run(session_id, job_id):
207209
session = SQLiteSession(f"screening-{session_id}")
208-
agent = Agent(
210+
orchestrator_agent = Agent(
211+
name="Interview Orchestrator Agent",
212+
instructions=ORCHESTRATOR_SYSTEM_PROMPT.format(RECOMMENDED_PROMPT_PREFIX=RECOMMENDED_PROMPT_PREFIX),
213+
model="gpt-5.1",
214+
tools=[extract_skills, get_next_skill_to_evaluate, update_evaluation]
215+
)
216+
evaluation_agent = Agent(
209217
name="Skills Evaluator Agent",
210-
instructions=EVALUATION_SYSTEM_PROMPT,
218+
instructions=EVALUATION_SYSTEM_PROMPT.format(RECOMMENDED_PROMPT_PREFIX=RECOMMENDED_PROMPT_PREFIX),
211219
model="gpt-5.1",
212220
tools=[get_question, check_answer]
213221
)
214-
user_input = EVALUATION_USER_PROMPT.format(skill=skill)
222+
orchestrator_agent.handoffs = [evaluation_agent]
223+
evaluation_agent.handoffs = [orchestrator_agent]
224+
user_input = ORCHESTRATOR_USER_PROMPT.format(job_id=job_id, session_id=session_id)
225+
agent = orchestrator_agent
215226
while user_input != 'bye':
216-
result = Runner.run_sync(agent, user_input, session=session)
227+
result = Runner.run_sync(agent, user_input, session=session, max_turns=20)
228+
agent = result.last_agent
217229
print(result.final_output)
218230
user_input = input("User: ")
219231

220232
def main():
221233
set_default_openai_key(settings.OPENAI_API_KEY)
222234
job_id = 1
223235
session_id = "session123"
224-
run_evaluation_agent(session_id, "Python")
236+
run(session_id, job_id)
237+
print("FINAL EVALUATION STATE", db)
225238

226239
if __name__ == "__main__":
227240
main()

labs/16-handoffs.md

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# Lab 16: Handoffs
2+
3+
We have individually written two agents:
4+
5+
- Orchestrator agent
6+
- Evaluation agent
7+
8+
We now want to integrate both together, so that
9+
10+
- Orchestrator agent extracts list of skills
11+
- Gets the first skill
12+
- Hands over to evaluation agent to evaluate the skill
13+
- Evaluation agent asks three questions and hands back to orchestration agent with the result
14+
- Orchestrator agent saves it to the database
15+
- Repeat from step 3 until all skills are done
16+
17+
Here, orchestrator agent and evaluation agent have to coordinate back and forth between them to complete the task.
18+
19+
## High Level Overview
20+
21+
1. First, we need to delete some code that we don't need anymore
22+
- Delete the tool `transfer_to_skill_evaluator`
23+
- Delete the function `run_orchestrator_agent`
24+
- Delete the function `run_evaluation_agent`
25+
1. Now we need to implement a new tool `get_next_skill_to_evaluate(session_id: str) -> str | None`
26+
- It should return the next skill to evaluate
27+
- It calculates this by looking in the DB
28+
- Find all the skills to tests (`skills` field)
29+
- See which skills have already been evaluated (`evaluation` field)
30+
- Find a skill that is remaining to be evaluated and return it
31+
- Return `None` if no more skills remaining
32+
1. Then import this `from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX`
33+
- This is a prompt text that Open AI recommends that you add to the top of the system prompt for multi agent system
34+
35+
So update both agent system prompts as follows
36+
37+
```python
38+
ORCHESTRATOR_SYSTEM_PROMPT = """
39+
{RECOMMENDED_PROMPT_PREFIX}
40+
41+
You are an interview orchestrator. Your goal is to evaluate the candidate on the required skills.
42+
43+
# INSTRUCTIONS
44+
45+
Follow the following steps exactly
46+
47+
1. Extract key skills from the job description using extract_skills tool
48+
2. Then welcome the candidate, explain the screening process and ask the candidate if they are ready
49+
3. Then, use the get_next_skill_to_evaluate tool to get the skill to evaluate
50+
4. If the skill is not `None` then hand off to the "Skills Evaluator Agent" to perform the evaluation. Pass in the skill to evaluate
51+
4. Once you get the response, use the update_evaluation tool to save the evaluation result into the database
52+
5. Once get_next_skill_to_evaluate returns `None`, return a json with a single field `status` set to "done" to indicate completion
53+
"""
54+
55+
EVALUATION_SYSTEM_PROMPT = """
56+
{RECOMMENDED_PROMPT_PREFIX}
57+
58+
You are a specialised skill evaluator. Your job is to evaluate the candidate's proficiency in a given skill
59+
60+
1. Identify which skill you're evaluating (it will be mentioned in the conversation)
61+
2. Use the get_question tool to get a question to ask (start with 'medium' difficulty). Ask the question verbatim, DO NOT MODIFY it in any way
62+
3. After each candidate answer, use check_answer tool to evaluate
63+
4. Decide the next question:
64+
- If the check_answer tool returned correct, choose the next higher difficulty, without going above 'hard'
65+
- If the check_answer tool returned incorrect, choose the lower difficulty, without going below 'easy'
66+
- Stop after 3 questions MAXIMUM
67+
5. If the correctly answered two of the three questions, then they pass, otherwise they fail
68+
6. After completion of 3 questions, hand off to the "Interview Orchestrator Agent" passing in the result of the evaluation
69+
70+
# DECISION RULES:
71+
72+
- Do not give feedback on the user's answer. Always proceed to the next question
73+
- 3 questions per skill
74+
75+
# OUTPUT:
76+
77+
After the evaluation is complete, return the pass/fail in a json object with the following properties
78+
- result: true or false
79+
"""
80+
```
81+
82+
1. Now we will create the main function `def run(session_id, job_id):`
83+
- This function replaces `run_orchestrator_agent` and `run_evaluation_agent`
84+
- Create the `session`
85+
- Create the `orchestrator_agent` (remember to pass in the new tool and remove the deleted one)
86+
- Create the `evaluation_agent`
87+
- In both the above, remember to fill in the value for the `RECOMMENDED_PROMPT_PREFIX`
88+
- Configure the handoffs and run the agents as shown below
89+
90+
```python
91+
orchestrator_agent.handoffs = [evaluation_agent]
92+
evaluation_agent.handoffs = [orchestrator_agent]
93+
user_input = ORCHESTRATOR_USER_PROMPT.format(job_id=job_id, session_id=session_id)
94+
agent = orchestrator_agent
95+
while user_input != 'bye':
96+
result = Runner.run_sync(agent, user_input, session=session, max_turns=20)
97+
agent = result.last_agent
98+
print(result.final_output)
99+
user_input = input("User: ")
100+
```
101+
102+
Finally, update `main()`
103+
104+
```python
105+
def main():
106+
set_default_openai_key(settings.OPENAI_API_KEY)
107+
job_id = 1
108+
session_id = "session123"
109+
run(session_id, job_id)
110+
print("FINAL EVALUATION STATE", db)
111+
```
112+
113+
And run the code
114+
115+
## Hints
116+
117+
### How to implement get_next_skill_to_evaluate?
118+
119+
<details>
120+
<summary>Answer</summary>
121+
122+
```python
123+
@function_tool
124+
def get_next_skill_to_evaluate(session_id: str) -> str | None:
125+
"""Retrieve the next skill to evaluate. Returns None if there are no more skills to evaluate"""
126+
all_skills = db["state"][session_id]["skills"]
127+
evaluated = db["state"][session_id]["evaluation"]
128+
evaluated_skills = [item[0] for item in evaluated]
129+
remaining_skills = set(all_skills) - set(evaluated_skills)
130+
try:
131+
next_skill = remaining_skills.pop()
132+
print("NEXT SKILL TOOL", next_skill)
133+
return next_skill
134+
except KeyError:
135+
print("No more skills")
136+
return None
137+
```
138+
</details>
139+
140+
### How to implement the session?
141+
142+
<details>
143+
<summary>Answer</summary>
144+
145+
See previous lab
146+
</details>
147+
148+
### How to implement the orchestrator agent?
149+
150+
<details>
151+
<summary>Answer</summary>
152+
153+
```python
154+
orchestrator_agent = Agent(
155+
name="Interview Orchestrator Agent",
156+
instructions=ORCHESTRATOR_SYSTEM_PROMPT.format(RECOMMENDED_PROMPT_PREFIX=RECOMMENDED_PROMPT_PREFIX),
157+
model="gpt-5.1",
158+
tools=[extract_skills, get_next_skill_to_evaluate, update_evaluation]
159+
)
160+
```
161+
</details>
162+
163+
### How to implement the evaluation agent?
164+
165+
<details>
166+
<summary>Answer</summary>
167+
168+
```python
169+
evaluation_agent = Agent(
170+
name="Skills Evaluator Agent",
171+
instructions=EVALUATION_SYSTEM_PROMPT.format(RECOMMENDED_PROMPT_PREFIX=RECOMMENDED_PROMPT_PREFIX),
172+
model="gpt-5.1",
173+
tools=[get_question, check_answer]
174+
)
175+
```
176+
</details>

0 commit comments

Comments
 (0)