Skip to content

Commit 1297e6e

Browse files
authored
fix(exp): Add run in github experiment (#3459)
1 parent e63ebe3 commit 1297e6e

File tree

3 files changed

+367
-13
lines changed

3 files changed

+367
-13
lines changed
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""
2+
Example experiment script for CI/CD using run_in_github
3+
4+
This script:
5+
1. Executes tasks locally on the dataset
6+
2. Sends task results to the backend
7+
3. Backend runs evaluators and posts PR comment with results
8+
"""
9+
10+
import asyncio
11+
import os
12+
from openai import AsyncOpenAI
13+
from traceloop.sdk import Traceloop
14+
from traceloop.sdk.experiment.model import RunInGithubResponse
15+
16+
# Initialize Traceloop client
17+
client = Traceloop.init(
18+
app_name="research-experiment-ci-cd",
19+
api_key=os.getenv("TRACELOOP_API_KEY"),
20+
api_endpoint=os.getenv("TRACELOOP_BASE_URL"),
21+
)
22+
23+
24+
async def generate_research_response(question: str) -> str:
25+
"""Generate a research response using OpenAI"""
26+
openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
27+
28+
response = await openai_client.chat.completions.create(
29+
model="gpt-4",
30+
messages=[
31+
{
32+
"role": "system",
33+
"content": "You are a helpful research assistant. Provide accurate, well-researched answers.",
34+
},
35+
{"role": "user", "content": question},
36+
],
37+
temperature=0.7,
38+
max_tokens=500,
39+
)
40+
41+
return response.choices[0].message.content
42+
43+
44+
async def research_task(row):
45+
"""Task function that processes each dataset row"""
46+
query = row.get("query", "")
47+
answer = await generate_research_response(query)
48+
49+
return {
50+
"completion": answer,
51+
"question": query,
52+
"sentence": answer
53+
}
54+
55+
56+
async def main():
57+
"""Run experiment in GitHub context"""
58+
print("🚀 Running research experiment in GitHub CI/CD...")
59+
60+
# Execute tasks locally and send results to backend
61+
response = await client.experiment.run(
62+
task=research_task,
63+
dataset_slug="research-queries",
64+
dataset_version="v2",
65+
evaluators=["research-relevancy", "categories", "research-facts-counter"],
66+
experiment_slug="research-exp",
67+
)
68+
69+
# Print response
70+
print("\n✅ Experiment completed and submitted!")
71+
72+
if isinstance(response, RunInGithubResponse):
73+
print(f"Experiment Slug: {response.experiment_slug}")
74+
print(f"Run ID: {response.run_id}")
75+
else:
76+
print(f"Results: {response}")
77+
78+
print("\n📝 The backend will run evaluators and post results to your PR.")
79+
print(" Check your GitHub PR for the results comment.")
80+
81+
82+
if __name__ == "__main__":
83+
asyncio.run(main())

0 commit comments

Comments
 (0)