From 97d05c7989642402809282d03d0195ffe7f63619 Mon Sep 17 00:00:00 2001 From: Ben Langfeld Date: Fri, 29 Aug 2025 14:44:02 -0300 Subject: [PATCH] Langchain integration for dataset runs doesn't exist The documented example does not work since the `get_langchain_handler` function doesn't exist in the Python SDK. --- .../evaluation/dataset-runs/remote-run.mdx | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/pages/docs/evaluation/dataset-runs/remote-run.mdx b/pages/docs/evaluation/dataset-runs/remote-run.mdx index a178824cd..61d78d5de 100644 --- a/pages/docs/evaluation/dataset-runs/remote-run.mdx +++ b/pages/docs/evaluation/dataset-runs/remote-run.mdx @@ -364,7 +364,7 @@ Please refer to the [integrations](/docs/integrations/overview) page for details When running an experiment on a dataset, the application that shall be tested is executed for each item in the dataset. The execution trace is then linked to the dataset item. This allows you to compare different runs of the same application on the same dataset. Each experiment is identified by a `run_name`. - + You may then execute that LLM-app for each dataset item to create a dataset run: @@ -433,30 +433,6 @@ for (const item of dataset.items) { await langfuse.flush(); ``` - - - -```python /for item in dataset.items:/ -from langfuse import get_client - -# Load the dataset -dataset = get_client().get_dataset("") - -# Loop over the dataset items -for item in dataset.items: - # Langchain callback handler that automatically links the execution trace to the dataset item - handler = item.get_langchain_handler(run_name="") - - # Execute application and pass custom handler - my_langchain_chain.run(item.input, callbacks=[handler]) - - # Optionally: Add scores computed in your experiment runner, e.g. json equality check - langfuse.score(trace_id=handler.get_trace_id(), name="my_score", value=1) - -# Flush the langfuse client to ensure all data is sent to the server at the end of the experiment run -langfuse.flush() -``` -