Skip to content

Implement OpenLLMetry Tracing in Python Application #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions TRACING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# OpenLLMetry Tracing Setup

This application now includes OpenLLMetry tracing to monitor LLM calls and performance in Traceloop.

## Setup

1. **Install dependencies** (if not already installed):
```bash
uv sync
```

2. **Set your Traceloop API key** (optional but recommended):
```bash
export TRACELOOP_API_KEY="your_api_key_here"
```

You can get your API key from your [Traceloop dashboard](https://app.traceloop.com/).

3. **Run the application**:
```bash
uv run kickoff
```

## What Gets Traced

The application automatically traces:

- **Main prompt optimization flow**: Full workflow execution
- **Prompt evaluation**: Each evaluation step with scores and feedback
- **Prompt optimization**: Optimization attempts with before/after prompts
- **Retry logic**: Retry counts and completion reasons

## Trace Attributes

Each trace includes relevant attributes such as:
- Prompt content (original and optimized)
- Evaluation scores
- Retry counts
- Failure reasons
- Optimization results

## Viewing Traces

1. Go to your [Traceloop dashboard](https://app.traceloop.com/)
2. Navigate to the "Traces" section
3. View detailed traces of your prompt optimization runs

## Local Development

If you don't set the `TRACELOOP_API_KEY` environment variable, the application will still run and trace locally, but traces won't be sent to Traceloop. This allows for development without requiring API keys.

## Environment Variables

- `TRACELOOP_API_KEY`: Your Traceloop API key (optional for local development)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies = [
"openai>=1.75.0",
"python-dotenv>=1.1.0",
"traceloop-sdk>=0.40.4",
"openllmetry>=0.27.0",
]

[project.scripts]
Expand Down
1 change: 1 addition & 0 deletions src/prompt_optimizer/evaluate_crew/evaluate_crew.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import List
from pydantic import BaseModel, Field
from prompt_optimizer.tools.run_prompt import RunPrompt
from prompt_optimizer.tracing import tracer_instance
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The imported 'tracer_instance' is not used in this module. Consider removing it or adding instrumentation if needed.

Suggested change
from prompt_optimizer.tracing import tracer_instance



class EvaluationResult(BaseModel):
Expand Down
102 changes: 63 additions & 39 deletions src/prompt_optimizer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
EvaluationResult,
)
from prompt_optimizer.optimize_crew.optimize_crew import PromptOptimizer
from prompt_optimizer.tracing import tracer_instance


START_PROMPT = """Answer the following question based on the provided context:
Expand All @@ -30,49 +31,70 @@ class PromptOptimizationFlow(Flow[PromptOptimizationFlowState]):

@start("retry")
def evaluate_prompt(self):
print("Evaluating prompt")
result: EvaluationResult = (
PromptEvaluator().crew().kickoff(inputs={"prompt": self.state.prompt})
).pydantic
self.state.score = result.score
self.state.valid = not result.failure_reasons
self.state.feedback = result.failure_reasons

print(f"Evaluation results:")
print(f"Score: {self.state.score:.2f}")
if result.failure_reasons:
print("\nFailure reasons:")
print(result.failure_reasons)

self.state.retry_count += 1

return "optimize"
with tracer_instance.start_as_current_span("evaluate_prompt") as span:
print("Evaluating prompt")
span.set_attribute("prompt", self.state.prompt)
span.set_attribute("retry_count", self.state.retry_count)

result: EvaluationResult = (
PromptEvaluator().crew().kickoff(inputs={"prompt": self.state.prompt})
).pydantic

self.state.score = result.score
self.state.valid = not result.failure_reasons
self.state.feedback = result.failure_reasons

span.set_attribute("score", self.state.score)
span.set_attribute("valid", self.state.valid)
if result.failure_reasons:
span.set_attribute("failure_reasons", result.failure_reasons)

print(f"Evaluation results:")
print(f"Score: {self.state.score:.2f}")
if result.failure_reasons:
print("\nFailure reasons:")
print(result.failure_reasons)

self.state.retry_count += 1

return "optimize"

@router(evaluate_prompt)
def optimize_prompt(self):
if self.state.score > 0.8:
return "complete"

if self.state.retry_count > 3:
return "max_retry_exceeded"

print("Optimizing prompt")
result = (
PromptOptimizer()
.crew()
.kickoff(
inputs={
"prompt": self.state.prompt,
"feedback": self.state.feedback,
"score": self.state.score,
}
with tracer_instance.start_as_current_span("optimize_prompt") as span:
span.set_attribute("current_score", self.state.score)
span.set_attribute("retry_count", self.state.retry_count)

if self.state.score > 0.8:
span.set_attribute("optimization_result", "complete")
return "complete"

if self.state.retry_count > 3:
span.set_attribute("optimization_result", "max_retry_exceeded")
return "max_retry_exceeded"

print("Optimizing prompt")
span.set_attribute("original_prompt", self.state.prompt)
span.set_attribute("feedback", self.state.feedback or "")

result = (
PromptOptimizer()
.crew()
.kickoff(
inputs={
"prompt": self.state.prompt,
"feedback": self.state.feedback,
"score": self.state.score,
}
)
)
)

print("Optimized prompt:", result.raw)
self.state.prompt = result.raw
print("Optimized prompt:", result.raw)
span.set_attribute("optimized_prompt", result.raw)
span.set_attribute("optimization_result", "retry")
self.state.prompt = result.raw

return "retry"
return "retry"

@listen("complete")
def save_result(self):
Expand All @@ -95,8 +117,10 @@ def max_retry_exceeded_exit(self):


def kickoff():
prompt_flow = PromptOptimizationFlow()
prompt_flow.kickoff()
with tracer_instance.start_as_current_span("prompt_optimization_flow") as span:
span.set_attribute("operation", "kickoff")
prompt_flow = PromptOptimizationFlow()
prompt_flow.kickoff()


def plot():
Expand Down
1 change: 1 addition & 0 deletions src/prompt_optimizer/optimize_crew/optimize_crew.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.knowledge.source.crew_docling_source import CrewDoclingSource
from typing import List
from prompt_optimizer.tracing import tracer_instance


@CrewBase
Expand Down
21 changes: 21 additions & 0 deletions src/prompt_optimizer/tracing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
from openllmetry import init, tracer

def initialize_tracing():
"""Initialize OpenLLMetry tracing for the application."""
# Get API key from environment variable or use placeholder
api_key = os.getenv("TRACELOOP_API_KEY")

if not api_key:
print("Warning: TRACELOOP_API_KEY not set. Set it to enable tracing to Traceloop.")
# Initialize without API key for local development
init()
else:
# Initialize with API key for production
init(api_key=api_key)

print("OpenLLMetry tracing initialized")
return tracer

# Initialize tracing when module is imported
tracer_instance = initialize_tracing()