Skip to content

Commit b6f332e

Browse files
committed
Instructions for lab 19
1 parent f5382a1 commit b6f332e

File tree

3 files changed

+76
-42
lines changed

3 files changed

+76
-42
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,4 +210,5 @@ pgdata/
210210
uploads/
211211
notes.md
212212
qdrant_store/
213-
.vscode/
213+
.vscode/
214+
.braintrust

agent.py

Lines changed: 12 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1+
import json
12
import random
23
from typing import Literal, Tuple
3-
from langchain_openai import ChatOpenAI
4-
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
5-
from langchain_core.output_parsers import PydanticOutputParser
6-
from pydantic import BaseModel
74
from config import settings
85
from agents import Agent, Runner, SQLiteSession, function_tool, set_default_openai_key, set_trace_processors
96
from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX
10-
from braintrust import init_logger
7+
from braintrust import init_logger, load_prompt
118
from braintrust.wrappers.openai import BraintrustTracingProcessor
9+
from openai import OpenAI
1210

1311
db = {
1412
"job_descriptions": {
@@ -136,46 +134,19 @@ def get_question(topic: str, difficulty: Literal['easy', 'medium', 'hard']) -> s
136134
questions = question_bank[topic.lower()][difficulty.lower()]
137135
return random.choice(questions)
138136

139-
VALIDATION_PROMPT = """
140-
Evaluate the given interview answer.
141-
142-
# Instructions
143-
144-
Provide a JSON response with:
145-
- correct: true or false depending if the answer was correct or not for the given question in the context of the given skill.
146-
- reasoning: brief explanation (2-3 sentences)
147-
148-
For subjective answers, mark the answer true if the majority of the important points have been mentioned.
149-
150-
Answers are expected to be brief, so be rigorous but fair. Look for technical accuracy and clarity.
151-
152-
# Output Format
153-
154-
{format_instructions}
155-
156-
# Task
157-
158-
Skill: {skill}
159-
Question: {question}
160-
Answer:
161-
{answer}
162-
163-
Evaluation:"""
164-
165-
class ValidationResult(BaseModel):
166-
correct: bool
167-
reasoning: str
168-
169137
@function_tool
170138
def check_answer(skill:str, question: str, answer: str) -> Tuple[bool, str]:
171139
"""Given a question and an answer for a particular skill, validate if the answer is correct. Returns a tuple (correct, reasoning)"""
172140

173-
llm = ChatOpenAI(model="gpt-5.1", temperature=0, api_key=settings.OPENAI_API_KEY)
174-
parser = PydanticOutputParser(pydantic_object=ValidationResult)
175-
prompt = PromptTemplate.from_template(VALIDATION_PROMPT).partial(format_instructions=parser.get_format_instructions())
176-
chain = prompt | llm | parser
177-
result = chain.invoke({"skill": skill, "question": question, "answer": answer})
178-
return result.model_dump_json()
141+
prompt = load_prompt(project="Prodapt", slug="check-answer-prompt-0d7c")
142+
details = prompt.build(skill=skill, question=question, answer=answer)
143+
client = OpenAI(api_key=settings.OPENAI_API_KEY)
144+
response = client.chat.completions.create(
145+
model="gpt-5.1", temperature=0,
146+
response_format=details["response_format"],
147+
messages=details["messages"]
148+
)
149+
return json.loads(response.choices[0].message.content)
179150

180151
EVALUATION_SYSTEM_PROMPT = """
181152
{RECOMMENDED_PROMPT_PREFIX}

labs/19-prompt-management.md

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Lab 19: Prompt Management
2+
3+
In this lab, we will create a prompt in Braintrust, and then in our code we will read it and use it.
4+
5+
The prompt we are going to configure is the `VALIDATION_PROMPT` that is used to check if the answer provided by the user is correct or not.
6+
7+
## High Level Overview
8+
9+
1. Go the the prompt dashboard, and click the `Prompts` section on the left
10+
1. Create a prompt
11+
- Name: Check Answer Prompt
12+
- Slug: `check-answer-prompt-0d7c`
13+
- Output type: Structured output
14+
- boolean field `correct`
15+
- string field `reasoning`
16+
17+
Set this prompt
18+
19+
```
20+
Evaluate the given interview answer.
21+
22+
Be rigorous but fair. Look for technical accuracy and clarity.
23+
24+
# Task
25+
26+
Skill: {{skill}}
27+
Question: {{question}}
28+
Answer:
29+
{{answer}}
30+
31+
Evaluation:
32+
```
33+
34+
1. Click `Save new custom prompt` button on the top right
35+
1. In `agents.py`, rewrite the function `check_answer` to use this prompt from braintrust
36+
- Use `load_prompt` function
37+
- Directly use OpenAI chat completions to perform the call (no langchain)
38+
- Documentation: https://www.braintrust.dev/docs/core/functions/prompts#load-a-prompt
39+
40+
## Hints
41+
42+
### How do I implement check_answer?
43+
44+
<details>
45+
<summary>Answer</summary>
46+
47+
```python
48+
@function_tool
49+
def check_answer(skill:str, question: str, answer: str) -> Tuple[bool, str]:
50+
"""Given a question and an answer for a particular skill, validate if the answer is correct. Returns a tuple (correct, reasoning)"""
51+
52+
prompt = load_prompt(project="Prodapt", slug="check-answer-prompt-0d7c")
53+
details = prompt.build(skill=skill, question=question, answer=answer)
54+
client = OpenAI(api_key=settings.OPENAI_API_KEY)
55+
response = client.chat.completions.create(
56+
model="gpt-5.1", temperature=0,
57+
response_format=details["response_format"],
58+
messages=details["messages"]
59+
)
60+
return json.loads(response.choices[0].message.content)
61+
```
62+
</details>

0 commit comments

Comments
 (0)