|
| 1 | +import json |
| 2 | +import os |
| 3 | + |
| 4 | +from dotenv import load_dotenv |
| 5 | + |
| 6 | +from parea import Parea, trace |
| 7 | +from parea.evals import call_openai |
| 8 | +from parea.schemas import Completion |
| 9 | +from parea.schemas.log import EvaluationResult, Log |
| 10 | + |
| 11 | +load_dotenv() |
| 12 | + |
| 13 | +p = Parea(api_key=os.getenv("PAREA_API_KEY")) |
| 14 | + |
| 15 | + |
| 16 | +def eval_fun(log: Log) -> EvaluationResult: |
| 17 | + # access the output and target from the log |
| 18 | + # output, target = log.output, log.target |
| 19 | + response: str = call_openai( |
| 20 | + model="gpt-4o", |
| 21 | + messages=[{"role": "system", "content": "Use JSON. provide a score and reason."}], # <- CHANGE THIS |
| 22 | + response_format={"type": "json_object"}, |
| 23 | + temperature=0.0, |
| 24 | + ) |
| 25 | + response_dict = json.loads(response) |
| 26 | + return EvaluationResult(name="YOUR_EVAL_NAME", score=response_dict["score"], reason=response_dict["reason"]) |
| 27 | + |
| 28 | + |
| 29 | +@trace(eval_funcs=[eval_fun]) |
| 30 | +def deployed_prompt(prompt_template_input: str) -> str: |
| 31 | + return p.completion(Completion(deployment_id="YOUR_DEPLOYED_PROMPT_ID", llm_inputs={"prompt_template_input_name": prompt_template_input})).content |
| 32 | + |
| 33 | + |
| 34 | +if __name__ == "__main__": |
| 35 | + p.experiment( |
| 36 | + name="some_experiment_name", |
| 37 | + data=172, # dataset Id from Parea, can also use dataset name if unique |
| 38 | + func=deployed_prompt, |
| 39 | + ).run() |
0 commit comments