Skip to content

Commit 79a87f6

Browse files
committed
add q&a auto eval cookbook
1 parent 526b96e commit 79a87f6

File tree

4 files changed

+139
-2
lines changed

4 files changed

+139
-2
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
from typing import Any
2+
3+
import json
4+
import os
5+
import re
6+
from functools import lru_cache
7+
8+
import markdownify
9+
import requests
10+
from dotenv import load_dotenv
11+
from langchain.text_splitter import TokenTextSplitter
12+
from langchain_community.vectorstores.faiss import FAISS
13+
from langchain_core.documents import Document
14+
from langchain_openai import OpenAIEmbeddings
15+
from openai import OpenAI
16+
17+
from parea import Parea, trace, trace_insert
18+
from parea.evals.general import answer_matches_target_llm_grader_factory
19+
from parea.evals.rag import (
20+
answer_context_faithfulness_binary_factory,
21+
answer_context_faithfulness_statement_level_factory,
22+
context_query_relevancy_factory,
23+
percent_target_supported_by_context_factory,
24+
)
25+
26+
load_dotenv()
27+
28+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
29+
p = Parea(api_key=os.getenv("PAREA_API_KEY"))
30+
p.wrap_openai_client(client)
31+
32+
CHUNK_SIZE = 2000
33+
CHUNK_OVERLAP = 200
34+
text_splitter = TokenTextSplitter(model_name="gpt-3.5-turbo", chunk_size=CHUNK_SIZE, chunk_overlap=200)
35+
embeddings = OpenAIEmbeddings()
36+
37+
MODEL = "gpt-4o"
38+
TOPK = 4
39+
NUM_SECTIONS = 20
40+
CODA_QA_FILE_LOC = "https://gist.githubusercontent.com/wong-codaio/b8ea0e087f800971ca5ec9eef617273e/raw/39f8bd2ebdecee485021e20f2c1d40fd649a4c77/articles.json"
41+
CODA_QA_PAIRS_LOC = "https://gist.githubusercontent.com/nelsonauner/2ef4d38948b78a9ec2cff4aa265cff3f/raw/c47306b4469c68e8e495f4dc050f05aff9f997e1/qa_pairs_coda_data.jsonl"
42+
43+
44+
@lru_cache()
45+
def get_coda_qa_content(CODA_QA_FILE_LOC) -> list[Document]:
46+
coda_qa_content_data = requests.get(CODA_QA_FILE_LOC).json()
47+
return [
48+
Document(page_content=section.strip(), metadata={"doc_id": row["id"], "markdown": section.strip()})
49+
for row in coda_qa_content_data
50+
for section in re.split(r"(.*\n=+\n)", markdownify.markdownify(row["body"]))
51+
if section.strip() and not re.match(r".*\n=+\n", section)
52+
]
53+
54+
55+
@lru_cache()
56+
def get_coda_qa_pairs_raw(CODA_QA_PAIRS_LOC):
57+
coda_qa_pairs = requests.get(CODA_QA_PAIRS_LOC)
58+
qa_pairs = [json.loads(line) for line in coda_qa_pairs.text.split("\n") if line]
59+
return [{"question": qa_pair["input"], "doc_metadata": qa_pair["metadata"], "target": qa_pair["expected"]} for qa_pair in qa_pairs]
60+
61+
62+
class DocumentRetriever:
63+
def __init__(self):
64+
coda_qa_content_data = get_coda_qa_content(CODA_QA_FILE_LOC)
65+
documents = text_splitter.split_documents(coda_qa_content_data)
66+
vectorstore = FAISS.from_documents(documents, embeddings)
67+
self.retriever = vectorstore.as_retriever(search_kwargs={"k": TOPK})
68+
69+
@trace
70+
def retrieve_top_k(self, question: str) -> list[Document]:
71+
trace_insert({"metadata": {"source_file": CODA_QA_FILE_LOC}})
72+
return self.retriever.invoke(question)
73+
74+
75+
@trace(
76+
eval_funcs=[
77+
# Evals that do not need a target
78+
answer_context_faithfulness_binary_factory(),
79+
answer_context_faithfulness_statement_level_factory(),
80+
context_query_relevancy_factory(context_fields=["context"]),
81+
# Eval that need a target
82+
answer_matches_target_llm_grader_factory(model="gpt-4o"),
83+
percent_target_supported_by_context_factory(context_fields=["context"]),
84+
]
85+
)
86+
def generate_answer_from_docs(question: str, context: str) -> str:
87+
return (
88+
client.chat.completions.create(
89+
model=MODEL,
90+
messages=[
91+
{
92+
"role": "user",
93+
"content": f"""Use the following pieces of context to answer the question.
94+
Do not make up an answer if no context is provided to help answer it.
95+
\n\nContext:\n---------\n{context}\n\n---------\nQuestion: {question}\n---------\n\nAnswer:
96+
""",
97+
}
98+
],
99+
)
100+
.choices[0]
101+
.message.content
102+
)
103+
104+
105+
@trace
106+
def main(question: str, doc_metadata: dict[str, Any]) -> str:
107+
relevant_sections = DocumentRetriever().retrieve_top_k(question)
108+
context = "\n\n".join(doc.page_content for doc in relevant_sections)
109+
trace_insert({"metadata": doc_metadata})
110+
return generate_answer_from_docs(question, context)
111+
112+
113+
if __name__ == "__main__":
114+
metadata = dict(model=MODEL, topk=str(TOPK), num_sections=str(NUM_SECTIONS), chunk_size=str(CHUNK_SIZE), chunk_overlap=str(CHUNK_OVERLAP))
115+
qa_pairs = get_coda_qa_pairs_raw(CODA_QA_PAIRS_LOC)
116+
p.experiment(
117+
name="Coda_RAG",
118+
data=qa_pairs[:NUM_SECTIONS],
119+
func=main,
120+
metadata=metadata,
121+
).run()

parea/cookbook/parea_llm_proxy/tracing_with_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
p = Parea(api_key=os.getenv("PAREA_API_KEY"))
1616

1717
# Parea SDK makes it easy to use different LLMs with the same apis structure and standardized request/response schemas.
18-
LLM_OPTIONS = [("gpt-3.5-turbo-0125", "openai"), ("gpt-4-0125-preview", "openai"), ("claude-instant-1", "anthropic"), ("claude-2.1", "anthropic")]
18+
LLM_OPTIONS = [("gpt-3.5-turbo-0125", "openai"), ("gpt-4o", "openai"), ("claude-3-haiku-20240307", "anthropic"), ("claude-3-opus-20240229", "anthropic")]
1919
LIMIT = 1
2020

2121

poetry.lock

Lines changed: 16 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ boto3 = "^1.34.6"
8080
langchain-openai = "^0.0.5"
8181
pinecone-client = "^3.1.0"
8282
guidance = "^0.1.13"
83+
markdownify = "^0.12.1"
8384

8485
[tool.black]
8586
# https://github.com/psf/black

0 commit comments

Comments
 (0)