docs: clarify that experiment run_name must be unique

Lotte-Verheyden · claude · Lotte-Verheyden · commit 1619a88e7894 · 2026-02-09T15:43:44.000+01:00
When using the low-level SDK methods, the run_name must be unique per
dataset run. Reusing the same run_name silently prevents the new run
from appearing in the UI. Updated docs and code examples to explain
this and show timestamp-based run names as a good practice.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/pages/docs/evaluation/experiments/experiments-via-sdk.mdx b/pages/docs/evaluation/experiments/experiments-via-sdk.mdx
@@ -1062,25 +1062,31 @@ Please refer to the [integrations](/docs/integrations/overview) page for details
 
 ### Run experiment on dataset
 
-When running an experiment on a dataset, the application that shall be tested is executed for each item in the dataset. The execution trace is then linked to the dataset item. This allows you to compare different runs of the same application on the same dataset. Each experiment is identified by a `run_name`.
+When running an experiment on a dataset, the application that shall be tested is executed for each item in the dataset. The execution trace is then linked to the dataset item. This allows you to compare different runs of the same application on the same dataset.
+
+Each experiment is identified by a unique `run_name`. If you reuse the same `run_name`, the new run will not appear separately in the Langfuse dataset run UI. As a good practice, include a timestamp in your `run_name` to ensure uniqueness (the [Experiment Runner SDK](#experiment-runner-sdk) does this automatically).
 
 <LangTabs items={["Python SDK", "JS/TS SDK", "Langchain (Python)", "Langchain (JS/TS)", "Vercel AI SDK", "Other frameworks"]}>
 <Tab>
 
 You may then execute that LLM-app for each dataset item to create a dataset run:
 
 ```python filename="execute_dataset.py" /for item in dataset.items:/
+from datetime import datetime
 from langfuse import get_client
 from .app import my_llm_application
 
 # Load the dataset
 dataset = get_client().get_dataset("<dataset_name>")
 
+# Include a timestamp to ensure the run_name is unique
+run_name = f"my-experiment-{datetime.now().isoformat()}"
+
 # Loop over the dataset items
 for item in dataset.items:
     # Use the item.run() context manager for automatic trace linking
     with item.run(
-        run_name="<run_name>",
+        run_name=run_name,
         run_description="My first run",
         run_metadata={"model": "llama3"},
     ) as root_span:
@@ -1109,14 +1115,17 @@ import { LangfuseClient } from "@langfuse/client";
 
 const langfuse = new LangfuseClient();
 
+// Include a timestamp to ensure the run_name is unique
+const runName = `my-experiment-${new Date().toISOString()}`;
+
 for (const item of dataset.items) {
   // execute application function and get langfuseObject (trace/span/generation/event, and other observation types: see /docs/observability/features/observation-types)
   // output also returned as it is used to evaluate the run
   // you can also link using ids, see sdk reference for details
   const [span, output] = await myLlmApplication.run(item.input);
 
   // link the execution trace to the dataset item and give it a run_name
-  await item.link(span, "<run_name>", {
+  await item.link(span, runName, {
     description: "My first run", // optional run description
     metadata: { model: "llama3" }, // optional run metadata
   });
@@ -1137,21 +1146,25 @@ await langfuse.flush();
 <Tab>
 
 ```python /for item in dataset.items:/
+from datetime import datetime
 from langfuse import get_client
 from langfuse.langchain import CallbackHandler
 #from .app import my_llm_application
 
 # Load the dataset
 dataset = get_client().get_dataset("<dataset_name>")
 
+# Include a timestamp to ensure the run_name is unique
+run_name = f"my-experiment-{datetime.now().isoformat()}"
+
 # Initialize the Langfuse handler
 langfuse_handler = CallbackHandler()
 
 # Loop over the dataset items
 for item in dataset.items:
     # Use the item.run() context manager for automatic trace linking
     with item.run(
-        run_name="<run_name>",
+        run_name=run_name,
         run_description="My first run",
         run_metadata={"model": "llama3"},
     ) as root_span:
@@ -1182,7 +1195,8 @@ import { CallbackHandler } from "@langfuse/langchain";
 ...
 
 const langfuse = new LangfuseClient()
-const runName = "my-dataset-run";
+// Include a timestamp to ensure the run_name is unique
+const runName = `my-dataset-run-${new Date().toISOString()}`;
 for (const item of dataset.items) {
   const [span, output] = await startActiveObservation('my_llm_application', async (span) => {
     // ... your Langchain code ...
@@ -1214,13 +1228,16 @@ import { LangfuseClient } from "@langfuse/client";
 
 const langfuse = new LangfuseClient();
 
+// Include a timestamp to ensure the run_name is unique
+const runName = `my-experiment-${new Date().toISOString()}`;
+
 // iterate over the dataset items
 for (const item of dataset.items) {
   // run application on the dataset item input
   const [span, output] = await runMyLLMApplication(item.input, trace.id);
 
   // link the execution trace to the dataset item and give it a run_name
-  await item.link(span, "<run_name>", {
+  await item.link(span, runName, {
     description: "My first run", // optional run description
     metadata: { model: "gpt-4o" }, // optional run metadata
   });