|
| 1 | +import json |
1 | 2 | import random |
2 | 3 | from typing import Literal, Tuple |
3 | | -from langchain_openai import ChatOpenAI |
4 | | -from langchain_core.prompts import ChatPromptTemplate, PromptTemplate |
5 | | -from langchain_core.output_parsers import PydanticOutputParser |
6 | | -from pydantic import BaseModel |
7 | 4 | from config import settings |
8 | 5 | from agents import Agent, Runner, SQLiteSession, function_tool, set_default_openai_key, set_trace_processors |
9 | 6 | from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX |
10 | | -from braintrust import init_logger |
| 7 | +from braintrust import init_logger, load_prompt |
11 | 8 | from braintrust.wrappers.openai import BraintrustTracingProcessor |
| 9 | +from openai import OpenAI |
12 | 10 |
|
13 | 11 | db = { |
14 | 12 | "job_descriptions": { |
@@ -136,46 +134,19 @@ def get_question(topic: str, difficulty: Literal['easy', 'medium', 'hard']) -> s |
136 | 134 | questions = question_bank[topic.lower()][difficulty.lower()] |
137 | 135 | return random.choice(questions) |
138 | 136 |
|
139 | | -VALIDATION_PROMPT = """ |
140 | | -Evaluate the given interview answer. |
141 | | -
|
142 | | -# Instructions |
143 | | -
|
144 | | -Provide a JSON response with: |
145 | | -- correct: true or false depending if the answer was correct or not for the given question in the context of the given skill. |
146 | | -- reasoning: brief explanation (2-3 sentences) |
147 | | -
|
148 | | -For subjective answers, mark the answer true if the majority of the important points have been mentioned. |
149 | | -
|
150 | | -Answers are expected to be brief, so be rigorous but fair. Look for technical accuracy and clarity. |
151 | | -
|
152 | | -# Output Format |
153 | | -
|
154 | | -{format_instructions} |
155 | | -
|
156 | | -# Task |
157 | | -
|
158 | | -Skill: {skill} |
159 | | -Question: {question} |
160 | | -Answer: |
161 | | -{answer} |
162 | | -
|
163 | | -Evaluation:""" |
164 | | - |
165 | | -class ValidationResult(BaseModel): |
166 | | - correct: bool |
167 | | - reasoning: str |
168 | | - |
169 | 137 | @function_tool |
170 | 138 | def check_answer(skill:str, question: str, answer: str) -> Tuple[bool, str]: |
171 | 139 | """Given a question and an answer for a particular skill, validate if the answer is correct. Returns a tuple (correct, reasoning)""" |
172 | 140 |
|
173 | | - llm = ChatOpenAI(model="gpt-5.1", temperature=0, api_key=settings.OPENAI_API_KEY) |
174 | | - parser = PydanticOutputParser(pydantic_object=ValidationResult) |
175 | | - prompt = PromptTemplate.from_template(VALIDATION_PROMPT).partial(format_instructions=parser.get_format_instructions()) |
176 | | - chain = prompt | llm | parser |
177 | | - result = chain.invoke({"skill": skill, "question": question, "answer": answer}) |
178 | | - return result.model_dump_json() |
| 141 | + prompt = load_prompt(project="Prodapt", slug="check-answer-prompt-0d7c") |
| 142 | + details = prompt.build(skill=skill, question=question, answer=answer) |
| 143 | + client = OpenAI(api_key=settings.OPENAI_API_KEY) |
| 144 | + response = client.chat.completions.create( |
| 145 | + model="gpt-5.1", temperature=0, |
| 146 | + response_format=details["response_format"], |
| 147 | + messages=details["messages"] |
| 148 | + ) |
| 149 | + return json.loads(response.choices[0].message.content) |
179 | 150 |
|
180 | 151 | EVALUATION_SYSTEM_PROMPT = """ |
181 | 152 | {RECOMMENDED_PROMPT_PREFIX} |
|
0 commit comments