Skip to content

Commit 1619a88

Browse files
docs: clarify that experiment run_name must be unique
When using the low-level SDK methods, the run_name must be unique per dataset run. Reusing the same run_name silently prevents the new run from appearing in the UI. Updated docs and code examples to explain this and show timestamp-based run names as a good practice. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c13f85e commit 1619a88

File tree

1 file changed

+23
-6
lines changed

1 file changed

+23
-6
lines changed

pages/docs/evaluation/experiments/experiments-via-sdk.mdx

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,25 +1062,31 @@ Please refer to the [integrations](/docs/integrations/overview) page for details
10621062
10631063
### Run experiment on dataset
10641064
1065-
When running an experiment on a dataset, the application that shall be tested is executed for each item in the dataset. The execution trace is then linked to the dataset item. This allows you to compare different runs of the same application on the same dataset. Each experiment is identified by a `run_name`.
1065+
When running an experiment on a dataset, the application that shall be tested is executed for each item in the dataset. The execution trace is then linked to the dataset item. This allows you to compare different runs of the same application on the same dataset.
1066+
1067+
Each experiment is identified by a unique `run_name`. If you reuse the same `run_name`, the new run will not appear separately in the Langfuse dataset run UI. As a good practice, include a timestamp in your `run_name` to ensure uniqueness (the [Experiment Runner SDK](#experiment-runner-sdk) does this automatically).
10661068
10671069
<LangTabs items={["Python SDK", "JS/TS SDK", "Langchain (Python)", "Langchain (JS/TS)", "Vercel AI SDK", "Other frameworks"]}>
10681070
<Tab>
10691071
10701072
You may then execute that LLM-app for each dataset item to create a dataset run:
10711073
10721074
```python filename="execute_dataset.py" /for item in dataset.items:/
1075+
from datetime import datetime
10731076
from langfuse import get_client
10741077
from .app import my_llm_application
10751078

10761079
# Load the dataset
10771080
dataset = get_client().get_dataset("<dataset_name>")
10781081

1082+
# Include a timestamp to ensure the run_name is unique
1083+
run_name = f"my-experiment-{datetime.now().isoformat()}"
1084+
10791085
# Loop over the dataset items
10801086
for item in dataset.items:
10811087
# Use the item.run() context manager for automatic trace linking
10821088
with item.run(
1083-
run_name="<run_name>",
1089+
run_name=run_name,
10841090
run_description="My first run",
10851091
run_metadata={"model": "llama3"},
10861092
) as root_span:
@@ -1109,14 +1115,17 @@ import { LangfuseClient } from "@langfuse/client";
11091115

11101116
const langfuse = new LangfuseClient();
11111117

1118+
// Include a timestamp to ensure the run_name is unique
1119+
const runName = `my-experiment-${new Date().toISOString()}`;
1120+
11121121
for (const item of dataset.items) {
11131122
// execute application function and get langfuseObject (trace/span/generation/event, and other observation types: see /docs/observability/features/observation-types)
11141123
// output also returned as it is used to evaluate the run
11151124
// you can also link using ids, see sdk reference for details
11161125
const [span, output] = await myLlmApplication.run(item.input);
11171126

11181127
// link the execution trace to the dataset item and give it a run_name
1119-
await item.link(span, "<run_name>", {
1128+
await item.link(span, runName, {
11201129
description: "My first run", // optional run description
11211130
metadata: { model: "llama3" }, // optional run metadata
11221131
});
@@ -1137,21 +1146,25 @@ await langfuse.flush();
11371146
<Tab>
11381147
11391148
```python /for item in dataset.items:/
1149+
from datetime import datetime
11401150
from langfuse import get_client
11411151
from langfuse.langchain import CallbackHandler
11421152
#from .app import my_llm_application
11431153

11441154
# Load the dataset
11451155
dataset = get_client().get_dataset("<dataset_name>")
11461156

1157+
# Include a timestamp to ensure the run_name is unique
1158+
run_name = f"my-experiment-{datetime.now().isoformat()}"
1159+
11471160
# Initialize the Langfuse handler
11481161
langfuse_handler = CallbackHandler()
11491162

11501163
# Loop over the dataset items
11511164
for item in dataset.items:
11521165
# Use the item.run() context manager for automatic trace linking
11531166
with item.run(
1154-
run_name="<run_name>",
1167+
run_name=run_name,
11551168
run_description="My first run",
11561169
run_metadata={"model": "llama3"},
11571170
) as root_span:
@@ -1182,7 +1195,8 @@ import { CallbackHandler } from "@langfuse/langchain";
11821195
...
11831196

11841197
const langfuse = new LangfuseClient()
1185-
const runName = "my-dataset-run";
1198+
// Include a timestamp to ensure the run_name is unique
1199+
const runName = `my-dataset-run-${new Date().toISOString()}`;
11861200
for (const item of dataset.items) {
11871201
const [span, output] = await startActiveObservation('my_llm_application', async (span) => {
11881202
// ... your Langchain code ...
@@ -1214,13 +1228,16 @@ import { LangfuseClient } from "@langfuse/client";
12141228

12151229
const langfuse = new LangfuseClient();
12161230

1231+
// Include a timestamp to ensure the run_name is unique
1232+
const runName = `my-experiment-${new Date().toISOString()}`;
1233+
12171234
// iterate over the dataset items
12181235
for (const item of dataset.items) {
12191236
// run application on the dataset item input
12201237
const [span, output] = await runMyLLMApplication(item.input, trace.id);
12211238

12221239
// link the execution trace to the dataset item and give it a run_name
1223-
await item.link(span, "<run_name>", {
1240+
await item.link(span, runName, {
12241241
description: "My first run", // optional run description
12251242
metadata: { model: "gpt-4o" }, // optional run metadata
12261243
});

0 commit comments

Comments
 (0)