traceloop · nirga · Jul 1, 2025 · ellipsis-dev · Jul 1, 2025
diff --git a/TRACING.md b/TRACING.md
@@ -0,0 +1,54 @@
+# OpenLLMetry Tracing Setup
+
+This application now includes OpenLLMetry tracing to monitor LLM calls and performance in Traceloop.
+
+## Setup
+
+1. **Install dependencies** (if not already installed):
+   ```bash
+   uv sync
+   ```
+
+2. **Set your Traceloop API key** (optional but recommended):
+   ```bash
+   export TRACELOOP_API_KEY="your_api_key_here"
+   ```
+
+   You can get your API key from your [Traceloop dashboard](https://app.traceloop.com/).
+
+3. **Run the application**:
+   ```bash
+   uv run kickoff
+   ```
+
+## What Gets Traced
+
+The application automatically traces:
+
+- **Main prompt optimization flow**: Full workflow execution
+- **Prompt evaluation**: Each evaluation step with scores and feedback
+- **Prompt optimization**: Optimization attempts with before/after prompts
+- **Retry logic**: Retry counts and completion reasons
+
+## Trace Attributes
+
+Each trace includes relevant attributes such as:
+- Prompt content (original and optimized)
+- Evaluation scores
+- Retry counts
+- Failure reasons
+- Optimization results
+
+## Viewing Traces
+
+1. Go to your [Traceloop dashboard](https://app.traceloop.com/)
+2. Navigate to the "Traces" section
+3. View detailed traces of your prompt optimization runs
+
+## Local Development
+
+If you don't set the `TRACELOOP_API_KEY` environment variable, the application will still run and trace locally, but traces won't be sent to Traceloop. This allows for development without requiring API keys.
+
+## Environment Variables
+
+- `TRACELOOP_API_KEY`: Your Traceloop API key (optional for local development)
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,6 +13,7 @@ dependencies = [
     "openai>=1.75.0",
     "python-dotenv>=1.1.0",
     "traceloop-sdk>=0.40.4",
+    "openllmetry>=0.27.0",
 ]
 
 [project.scripts]

diff --git a/src/prompt_optimizer/evaluate_crew/evaluate_crew.py b/src/prompt_optimizer/evaluate_crew/evaluate_crew.py
@@ -4,6 +4,7 @@
 from typing import List
 from pydantic import BaseModel, Field
 from prompt_optimizer.tools.run_prompt import RunPrompt
+from prompt_optimizer.tracing import tracer_instance
-from prompt_optimizer.tracing import tracer_instance
-from prompt_optimizer.tracing import tracer_instance
 
 
 class EvaluationResult(BaseModel):

diff --git a/src/prompt_optimizer/main.py b/src/prompt_optimizer/main.py
@@ -8,6 +8,7 @@
     EvaluationResult,
 )
 from prompt_optimizer.optimize_crew.optimize_crew import PromptOptimizer
+from prompt_optimizer.tracing import tracer_instance
 
 
 START_PROMPT = """Answer the following question based on the provided context:
@@ -30,49 +31,70 @@ class PromptOptimizationFlow(Flow[PromptOptimizationFlowState]):
 
     @start("retry")
     def evaluate_prompt(self):
-        print("Evaluating prompt")
-        result: EvaluationResult = (
-            PromptEvaluator().crew().kickoff(inputs={"prompt": self.state.prompt})
-        ).pydantic
-        self.state.score = result.score
-        self.state.valid = not result.failure_reasons
-        self.state.feedback = result.failure_reasons
-
-        print(f"Evaluation results:")
-        print(f"Score: {self.state.score:.2f}")
-        if result.failure_reasons:
-            print("\nFailure reasons:")
-            print(result.failure_reasons)
-
-        self.state.retry_count += 1
-
-        return "optimize"
+        with tracer_instance.start_as_current_span("evaluate_prompt") as span:
+            print("Evaluating prompt")
+            span.set_attribute("prompt", self.state.prompt)
+            span.set_attribute("retry_count", self.state.retry_count)
+
+            result: EvaluationResult = (
+                PromptEvaluator().crew().kickoff(inputs={"prompt": self.state.prompt})
+            ).pydantic
+
+            self.state.score = result.score
+            self.state.valid = not result.failure_reasons
+            self.state.feedback = result.failure_reasons
+
+            span.set_attribute("score", self.state.score)
+            span.set_attribute("valid", self.state.valid)
+            if result.failure_reasons:
+                span.set_attribute("failure_reasons", result.failure_reasons)
+
+            print(f"Evaluation results:")
+            print(f"Score: {self.state.score:.2f}")
+            if result.failure_reasons:
+                print("\nFailure reasons:")
+                print(result.failure_reasons)
+
+            self.state.retry_count += 1
+
+            return "optimize"
 
     @router(evaluate_prompt)
     def optimize_prompt(self):
-        if self.state.score > 0.8:
-            return "complete"
-
-        if self.state.retry_count > 3:
-            return "max_retry_exceeded"
-
-        print("Optimizing prompt")
-        result = (
-            PromptOptimizer()
-            .crew()
-            .kickoff(
-                inputs={
-                    "prompt": self.state.prompt,
-                    "feedback": self.state.feedback,
-                    "score": self.state.score,
-                }
+        with tracer_instance.start_as_current_span("optimize_prompt") as span:
+            span.set_attribute("current_score", self.state.score)
+            span.set_attribute("retry_count", self.state.retry_count)
+
+            if self.state.score > 0.8:
+                span.set_attribute("optimization_result", "complete")
+                return "complete"
+
+            if self.state.retry_count > 3:
+                span.set_attribute("optimization_result", "max_retry_exceeded")
+                return "max_retry_exceeded"
+
+            print("Optimizing prompt")
+            span.set_attribute("original_prompt", self.state.prompt)
+            span.set_attribute("feedback", self.state.feedback or "")
+
+            result = (
+                PromptOptimizer()
+                .crew()
+                .kickoff(
+                    inputs={
+                        "prompt": self.state.prompt,
+                        "feedback": self.state.feedback,
+                        "score": self.state.score,
+                    }
+                )
             )
-        )
 
-        print("Optimized prompt:", result.raw)
-        self.state.prompt = result.raw
+            print("Optimized prompt:", result.raw)
+            span.set_attribute("optimized_prompt", result.raw)
+            span.set_attribute("optimization_result", "retry")
+            self.state.prompt = result.raw
 
-        return "retry"
+            return "retry"
 
     @listen("complete")
     def save_result(self):
@@ -95,8 +117,10 @@ def max_retry_exceeded_exit(self):
 
 
 def kickoff():
-    prompt_flow = PromptOptimizationFlow()
-    prompt_flow.kickoff()
+    with tracer_instance.start_as_current_span("prompt_optimization_flow") as span:
+        span.set_attribute("operation", "kickoff")
+        prompt_flow = PromptOptimizationFlow()
+        prompt_flow.kickoff()
 
 
 def plot():

diff --git a/src/prompt_optimizer/optimize_crew/optimize_crew.py b/src/prompt_optimizer/optimize_crew/optimize_crew.py
@@ -3,6 +3,7 @@
 from crewai.agents.agent_builder.base_agent import BaseAgent
 from crewai.knowledge.source.crew_docling_source import CrewDoclingSource
 from typing import List
+from prompt_optimizer.tracing import tracer_instance
 
 
 @CrewBase

diff --git a/src/prompt_optimizer/tracing.py b/src/prompt_optimizer/tracing.py
@@ -0,0 +1,21 @@
+import os
+from openllmetry import init, tracer
+
+def initialize_tracing():
+    """Initialize OpenLLMetry tracing for the application."""
+    # Get API key from environment variable or use placeholder
+    api_key = os.getenv("TRACELOOP_API_KEY")
+
+    if not api_key:
+        print("Warning: TRACELOOP_API_KEY not set. Set it to enable tracing to Traceloop.")
+        # Initialize without API key for local development
+        init()
+    else:
+        # Initialize with API key for production
+        init(api_key=api_key)
+
+    print("OpenLLMetry tracing initialized")
+    return tracer
+
+# Initialize tracing when module is imported
+tracer_instance = initialize_tracing()