diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e081a81 --- /dev/null +++ b/.env.example @@ -0,0 +1,13 @@ +# === Azure OpenAI === +AZURE_OPENAI_ENDPOINT= +AZURE_OPENAI_API_KEY= +AZURE_OPENAI_API_VERSION=2024-05-01-preview +AZURE_OPENAI_DEPLOYMENT= + +# === Azure AI Search === +AZURE_SEARCH_ENDPOINT= +AZURE_SEARCH_API_KEY= +AZURE_SEARCH_INDEX_NAME= + +# Optional local API for eval (POST /ask) +RAG_API_URL=http://localhost:8000/ask diff --git a/tools/eval.py b/tools/eval.py new file mode 100644 index 0000000..d64f8ca --- /dev/null +++ b/tools/eval.py @@ -0,0 +1,53 @@ +import os, csv, argparse, json, time + +RAG_API_URL = os.getenv("RAG_API_URL", "http://localhost:8000/ask") + +def _ask_via_rest(q: str) -> str: + import requests + r = requests.post(RAG_API_URL, json={"question": q}, timeout=60) + r.raise_for_status() + data = r.json() + return data.get("answer") or data.get("output_text") or json.dumps(data) + +def normalize(s: str) -> str: return (s or "").strip().lower() +def exact_match(p: str, g: str) -> float: return 1.0 if normalize(p)==normalize(g) and g else 0.0 + +def token_f1(p: str, g: str) -> float: + P, G = normalize(p).split(), normalize(g).split() + if not P and not G: return 1.0 + if not P or not G: return 0.0 + common = set(P)&set(G) + if not common: return 0.0 + prec = sum(w in G for w in P)/len(P) + rec = sum(w in P for w in G)/len(G) + return 0.0 if prec+rec==0 else 2*prec*rec/(prec+rec) + +def run_eval(rows): + out = [] + for q,g in rows: + try: pred = _ask_via_rest(q) + except Exception as e: pred = f"[ERROR] {e}" + em = exact_match(pred,g) if g else "" + f1 = token_f1(pred,g) if g else "" + print(f"Q: {q} +A: {pred} +EM: {em} F1: {f1} +---") + out.append((q,pred,g,em,f1)) + with open("results.csv","w",newline="",encoding="utf-8") as f: + w=csv.writer(f); w.writerow(["question","pred","gold","exact_match","f1"]); w.writerows(out) + print(f"Saved results.csv (n={len(out)})") + +def main(): + ap=argparse.ArgumentParser() + ap.add_argument("--q", type=str) + ap.add_argument("--file", type=str) + a=ap.parse_args() + if a.q: run_eval([(a.q,"")]) + elif a.file: + with open(a.file,newline="",encoding="utf-8") as f: + r=csv.DictReader(f) + rows=[(d["question"], d.get("answer","")) for d in r] + run_eval(rows) + else: ap.error("Provide --q or --file") +if __name__=="__main__": main() diff --git a/tools/eval_qna.csv b/tools/eval_qna.csv new file mode 100644 index 0000000..6f29ccf --- /dev/null +++ b/tools/eval_qna.csv @@ -0,0 +1,3 @@ +question,answer +What is your return policy?,Returns accepted within 30 days with receipt. +How do I reset my password?,Use the “Forgot Password” link and follow the email instructions.