Skip to content

Commit 1ec2dd2

Browse files
committed
update docs/integrations/langfuse.md for pre-commit
1 parent 85cd485 commit 1ec2dd2

File tree

1 file changed

+31
-31
lines changed

1 file changed

+31
-31
lines changed

docs/integrations/langfuse.md

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,11 @@ def fetch_traces_for_evaluation(
116116
) -> list[dict]:
117117
"""
118118
Fetch traces from Langfuse for evaluation.
119-
119+
120120
Args:
121121
limit: Maximum number of traces to fetch
122122
tags: Optional tag filter
123-
123+
124124
Returns:
125125
List of trace dictionaries
126126
"""
@@ -129,7 +129,7 @@ def fetch_traces_for_evaluation(
129129
limit=limit,
130130
tags=tags,
131131
)
132-
132+
133133
result = []
134134
for trace in response.data:
135135
# Only include traces with input and output
@@ -140,13 +140,13 @@ def fetch_traces_for_evaluation(
140140
"output": trace.output,
141141
"metadata": trace.metadata or {},
142142
}
143-
143+
144144
# Add expected output if available in metadata
145145
if trace.metadata and "expected" in trace.metadata:
146146
trace_dict["expected"] = trace.metadata["expected"]
147-
147+
148148
result.append(trace_dict)
149-
149+
150150
return result
151151
```
152152

@@ -164,23 +164,23 @@ def fetch_recent_traces(
164164
) -> list[dict]:
165165
"""
166166
Fetch traces from the last N hours.
167-
167+
168168
Args:
169169
hours_back: Number of hours to look back
170170
limit: Maximum number of traces to fetch
171171
tags: Optional tag filter
172-
172+
173173
Returns:
174174
List of trace dictionaries
175175
"""
176176
from_timestamp = datetime.now() - timedelta(hours=hours_back)
177-
177+
178178
response = langfuse.api.trace.list(
179179
limit=limit,
180180
tags=tags,
181181
from_timestamp=from_timestamp,
182182
)
183-
183+
184184
result = []
185185
for trace in response.data:
186186
if trace.input and trace.output:
@@ -190,7 +190,7 @@ def fetch_recent_traces(
190190
"output": trace.output,
191191
"metadata": trace.metadata or {},
192192
})
193-
193+
194194
return result
195195
```
196196

@@ -225,14 +225,14 @@ from openjudge.graders.schema import GraderScore, GraderError
225225

226226
async def evaluate_single_trace():
227227
"""Evaluate traces using a single grader"""
228-
228+
229229
# Initialize model and grader
230230
model = OpenAIChatModel(model="qwen3-32b")
231231
grader = RelevanceGrader(model=model)
232-
232+
233233
# Fetch traces
234234
traces = fetch_traces_for_evaluation(limit=10)
235-
235+
236236
for trace in traces:
237237
try:
238238
# Run evaluation
@@ -241,7 +241,7 @@ async def evaluate_single_trace():
241241
query=trace["input"],
242242
response=trace["output"],
243243
)
244-
244+
245245
# Process result and send to Langfuse
246246
if isinstance(result, GraderScore):
247247
langfuse.create_score(
@@ -253,10 +253,10 @@ async def evaluate_single_trace():
253253
print(f"✓ Trace {trace['id'][:8]}... scored: {result.score}")
254254
elif isinstance(result, GraderError):
255255
print(f"✗ Trace {trace['id'][:8]}... error: {result.error}")
256-
256+
257257
except Exception as e:
258258
print(f"✗ Error evaluating trace {trace['id']}: {e}")
259-
259+
260260
# Ensure all scores are sent
261261
langfuse.flush()
262262

@@ -280,10 +280,10 @@ from openjudge.runner.aggregator.weighted_sum_aggregator import WeightedSumAggre
280280

281281
async def batch_evaluate_traces():
282282
"""Batch evaluate traces using GradingRunner"""
283-
283+
284284
# Initialize model
285285
model = OpenAIChatModel(model="qwen3-32b")
286-
286+
287287
# Configure multiple graders with field mappers
288288
# Map trace fields to grader expected parameters
289289
runner = GradingRunner(
@@ -310,18 +310,18 @@ async def batch_evaluate_traces():
310310
)
311311
],
312312
)
313-
313+
314314
# Fetch traces
315315
traces = fetch_traces_for_evaluation(limit=50)
316-
316+
317317
if not traces:
318318
print("No traces to evaluate")
319319
return
320-
320+
321321
# Prepare evaluation data
322322
evaluation_data = []
323323
trace_id_mapping = {} # Map index to trace_id
324-
324+
325325
for i, trace in enumerate(traces):
326326
eval_item = {
327327
"input": trace["input"],
@@ -330,14 +330,14 @@ async def batch_evaluate_traces():
330330
# Add expected output as reference if available
331331
if trace.get("expected"):
332332
eval_item["expected"] = trace["expected"]
333-
333+
334334
evaluation_data.append(eval_item)
335335
trace_id_mapping[i] = trace["id"]
336-
336+
337337
# Run batch evaluation
338338
try:
339339
results = await runner.arun(evaluation_data)
340-
340+
341341
# Send results back to Langfuse
342342
# results contains individual grader scores + aggregated "overall_quality" score
343343
scores_sent = 0
@@ -347,18 +347,18 @@ async def batch_evaluate_traces():
347347
print(f"Sending {grader_name} score for trace {trace_id}")
348348
send_result_to_langfuse(trace_id, grader_name, result)
349349
scores_sent += 1
350-
351-
350+
351+
352352
print(f"✓ Successfully sent {scores_sent} scores for {len(traces)} traces")
353-
353+
354354
except Exception as e:
355355
print(f"✗ Batch evaluation failed: {e}")
356-
356+
357357
# Ensure all scores are sent
358358
langfuse.flush()
359359
def send_result_to_langfuse(trace_id: str, grader_name: str, result) -> None:
360360
"""Send evaluation result to Langfuse"""
361-
361+
362362
if isinstance(result, GraderScore):
363363
langfuse.create_score(
364364
trace_id=trace_id,

0 commit comments

Comments
 (0)