|
25 | 25 | "outputs": [], |
26 | 26 | "source": [ |
27 | 27 | "import os\n", |
| 28 | + "\n", |
28 | 29 | "# TODO REMOVE ENVIRONMENT VARIABLES!!!\n", |
29 | 30 | "# get keys for your project from https://cloud.langfuse.com\n", |
30 | 31 | "os.environ[\"LANGFUSE_PUBLIC_KEY\"] = \"\"\n", |
31 | 32 | "os.environ[\"LANGFUSE_SECRET_KEY\"] = \"\"\n", |
32 | | - " \n", |
| 33 | + "\n", |
33 | 34 | "# your openai key\n", |
34 | | - "#os.environ[\"OPENAI_API_KEY\"] = \"\"" |
| 35 | + "# os.environ[\"OPENAI_API_KEY\"] = \"\"" |
35 | 36 | ] |
36 | 37 | }, |
37 | 38 | { |
|
86 | 87 | "source": [ |
87 | 88 | "from datasets import load_dataset\n", |
88 | 89 | "\n", |
89 | | - "fiqa_eval = load_dataset(\"explodinggradients/fiqa\", \"ragas_eval\")['baseline']\n", |
| 90 | + "fiqa_eval = load_dataset(\"explodinggradients/fiqa\", \"ragas_eval\")[\"baseline\"]\n", |
90 | 91 | "fiqa_eval" |
91 | 92 | ] |
92 | 93 | }, |
|
180 | 181 | ], |
181 | 182 | "source": [ |
182 | 183 | "row = fiqa_eval[0]\n", |
183 | | - "row['question'], row['answer']" |
| 184 | + "row[\"question\"], row[\"answer\"]" |
184 | 185 | ] |
185 | 186 | }, |
186 | 187 | { |
|
199 | 200 | "outputs": [], |
200 | 201 | "source": [ |
201 | 202 | "from langfuse import Langfuse\n", |
202 | | - " \n", |
| 203 | + "\n", |
203 | 204 | "langfuse = Langfuse()" |
204 | 205 | ] |
205 | 206 | }, |
|
223 | 224 | " for m in metrics:\n", |
224 | 225 | " print(f\"calculating {m.name}\")\n", |
225 | 226 | " scores[m.name] = m.score_single(\n", |
226 | | - " {'question': query, 'contexts': chunks, 'answer': answer}\n", |
| 227 | + " {\"question\": query, \"contexts\": chunks, \"answer\": answer}\n", |
227 | 228 | " )\n", |
228 | 229 | " return scores" |
229 | 230 | ] |
|
272 | 273 | } |
273 | 274 | ], |
274 | 275 | "source": [ |
275 | | - "from langfuse.model import CreateTrace, CreateSpan, CreateGeneration, CreateEvent, CreateScore\n", |
| 276 | + "from langfuse.model import (\n", |
| 277 | + " CreateTrace,\n", |
| 278 | + " CreateSpan,\n", |
| 279 | + " CreateGeneration,\n", |
| 280 | + " CreateEvent,\n", |
| 281 | + " CreateScore,\n", |
| 282 | + ")\n", |
276 | 283 | "\n", |
277 | 284 | "# start a new trace when you get a question\n", |
278 | | - "question = row['question']\n", |
279 | | - "trace = langfuse.trace(CreateTrace(name = \"rag\"))\n", |
| 285 | + "question = row[\"question\"]\n", |
| 286 | + "trace = langfuse.trace(CreateTrace(name=\"rag\"))\n", |
280 | 287 | "\n", |
281 | 288 | "# retrieve the relevant chunks\n", |
282 | 289 | "# chunks = get_similar_chunks(question)\n", |
283 | | - "contexts = row['contexts']\n", |
| 290 | + "contexts = row[\"contexts\"]\n", |
284 | 291 | "# pass it as span\n", |
285 | | - "trace.span(CreateSpan(\n", |
286 | | - " name = \"retrieval\", input={'question': question}, output={'contexts': contexts}\n", |
287 | | - "))\n", |
| 292 | + "trace.span(\n", |
| 293 | + " CreateSpan(\n", |
| 294 | + " name=\"retrieval\", input={\"question\": question}, output={\"contexts\": contexts}\n", |
| 295 | + " )\n", |
| 296 | + ")\n", |
288 | 297 | "\n", |
289 | 298 | "# use llm to generate a answer with the chunks\n", |
290 | 299 | "# answer = get_response_from_llm(question, chunks)\n", |
291 | | - "answer = row['answer']\n", |
292 | | - "trace.span(CreateSpan(\n", |
293 | | - " name = \"generation\", input={'question': question, 'contexts': contexts}, output={'answer': answer}\n", |
294 | | - "))\n", |
| 300 | + "answer = row[\"answer\"]\n", |
| 301 | + "trace.span(\n", |
| 302 | + " CreateSpan(\n", |
| 303 | + " name=\"generation\",\n", |
| 304 | + " input={\"question\": question, \"contexts\": contexts},\n", |
| 305 | + " output={\"answer\": answer},\n", |
| 306 | + " )\n", |
| 307 | + ")\n", |
295 | 308 | "\n", |
296 | 309 | "# compute scores for the question, context, answer tuple\n", |
297 | 310 | "ragas_scores = score_with_ragas(question, contexts, answer)\n", |
|
357 | 370 | "metadata": {}, |
358 | 371 | "outputs": [], |
359 | 372 | "source": [ |
360 | | - "from langfuse.model import CreateTrace, CreateSpan, CreateGeneration, CreateEvent, CreateScore\n", |
| 373 | + "from langfuse.model import (\n", |
| 374 | + " CreateTrace,\n", |
| 375 | + " CreateSpan,\n", |
| 376 | + " CreateGeneration,\n", |
| 377 | + " CreateEvent,\n", |
| 378 | + " CreateScore,\n", |
| 379 | + ")\n", |
| 380 | + "\n", |
361 | 381 | "# fiqa traces\n", |
362 | 382 | "for interaction in fiqa_eval.select(range(10, 20)):\n", |
363 | | - " trace = langfuse.trace(CreateTrace(name = \"rag\"))\n", |
364 | | - " trace.span(CreateSpan(\n", |
365 | | - " name = \"retrieval\", \n", |
366 | | - " input={'question': question}, \n", |
367 | | - " output={'contexts': contexts}\n", |
368 | | - " ))\n", |
369 | | - " trace.span(CreateSpan(\n", |
370 | | - " name = \"generation\", \n", |
371 | | - " input={'question': question, 'contexts': contexts}, \n", |
372 | | - " output={'answer': answer}\n", |
373 | | - " ))\n", |
| 383 | + " trace = langfuse.trace(CreateTrace(name=\"rag\"))\n", |
| 384 | + " trace.span(\n", |
| 385 | + " CreateSpan(\n", |
| 386 | + " name=\"retrieval\",\n", |
| 387 | + " input={\"question\": question},\n", |
| 388 | + " output={\"contexts\": contexts},\n", |
| 389 | + " )\n", |
| 390 | + " )\n", |
| 391 | + " trace.span(\n", |
| 392 | + " CreateSpan(\n", |
| 393 | + " name=\"generation\",\n", |
| 394 | + " input={\"question\": question, \"contexts\": contexts},\n", |
| 395 | + " output={\"answer\": answer},\n", |
| 396 | + " )\n", |
| 397 | + " )\n", |
374 | 398 | "\n", |
375 | 399 | "# await that Langfuse SDK has processed all events before trying to retrieve it in the next step\n", |
376 | 400 | "langfuse.flush()" |
|
393 | 417 | "source": [ |
394 | 418 | "def get_traces(name=None, limit=None, user_id=None):\n", |
395 | 419 | " all_data = []\n", |
396 | | - " page = 1 \n", |
| 420 | + " page = 1\n", |
397 | 421 | "\n", |
398 | 422 | " while True:\n", |
399 | | - " response = langfuse.client.trace.list(\n", |
400 | | - " name=name, page=page, user_id=user_id\n", |
401 | | - " )\n", |
| 423 | + " response = langfuse.client.trace.list(name=name, page=page, user_id=user_id)\n", |
402 | 424 | " if not response.data:\n", |
403 | 425 | " break\n", |
404 | 426 | " page += 1\n", |
|
430 | 452 | "from random import sample\n", |
431 | 453 | "\n", |
432 | 454 | "NUM_TRACES_TO_SAMPLE = 3\n", |
433 | | - "traces = get_traces(name='rag', limit=5)\n", |
| 455 | + "traces = get_traces(name=\"rag\", limit=5)\n", |
434 | 456 | "traces_sample = sample(traces, NUM_TRACES_TO_SAMPLE)\n", |
435 | 457 | "\n", |
436 | 458 | "len(traces_sample)" |
|
464 | 486 | "for t in traces_sample:\n", |
465 | 487 | " observations = [langfuse.client.observations.get(o) for o in t.observations]\n", |
466 | 488 | " for o in observations:\n", |
467 | | - " if o.name == 'retrieval':\n", |
468 | | - " question = o.input['question']\n", |
469 | | - " contexts = o.output['contexts']\n", |
470 | | - " if o.name=='generation':\n", |
471 | | - " answer = o.output['answer']\n", |
472 | | - " evaluation_batch['question'].append(question)\n", |
473 | | - " evaluation_batch['contexts'].append(contexts)\n", |
474 | | - " evaluation_batch['answer'].append(answer)\n", |
475 | | - " evaluation_batch['trace_id'].append(t.id)" |
| 489 | + " if o.name == \"retrieval\":\n", |
| 490 | + " question = o.input[\"question\"]\n", |
| 491 | + " contexts = o.output[\"contexts\"]\n", |
| 492 | + " if o.name == \"generation\":\n", |
| 493 | + " answer = o.output[\"answer\"]\n", |
| 494 | + " evaluation_batch[\"question\"].append(question)\n", |
| 495 | + " evaluation_batch[\"contexts\"].append(contexts)\n", |
| 496 | + " evaluation_batch[\"answer\"].append(answer)\n", |
| 497 | + " evaluation_batch[\"trace_id\"].append(t.id)" |
476 | 498 | ] |
477 | 499 | }, |
478 | 500 | { |
|
671 | 693 | "\n", |
672 | 694 | "for _, row in df.iterrows():\n", |
673 | 695 | " for metric_name in [\"faithfulness\", \"answer_relevancy\"]:\n", |
674 | | - " langfuse.score(InitialScore(\n", |
675 | | - " name=metric_name,\n", |
676 | | - " value=row[metric_name],\n", |
677 | | - " trace_id=row[\"trace_id\"]))" |
| 696 | + " langfuse.score(\n", |
| 697 | + " InitialScore(\n", |
| 698 | + " name=metric_name, value=row[metric_name], trace_id=row[\"trace_id\"]\n", |
| 699 | + " )\n", |
| 700 | + " )" |
678 | 701 | ] |
679 | 702 | }, |
680 | 703 | { |
|
0 commit comments