Skip to content

Commit e240945

Browse files
authored
feat: add datasets api (#643)
1 parent b43eaaf commit e240945

File tree

19 files changed

+4688
-3
lines changed

19 files changed

+4688
-3
lines changed

packages/sample-app/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"run:pinecone": "npm run build && node dist/src/sample_pinecone.js",
2929
"run:langchain": "npm run build && node dist/src/sample_langchain.js",
3030
"run:sample_structured_output": "npm run build && node dist/src/sample_structured_output.js",
31+
"run:dataset": "npm run build && node dist/src/sample_dataset.js",
3132
"run:image_generation": "npm run build && node dist/src/sample_openai_image_generation.js",
3233
"run:sample_edit": "npm run build && node dist/src/test_edit_only.js",
3334
"run:sample_generate": "npm run build && node dist/src/test_generate_only.js",
Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
import * as traceloop from "@traceloop/node-server-sdk";
2+
import OpenAI from "openai";
3+
4+
const main = async () => {
5+
// Initialize Traceloop SDK
6+
traceloop.initialize({
7+
appName: "sample_dataset",
8+
apiKey: process.env.TRACELOOP_API_KEY,
9+
disableBatch: true,
10+
traceloopSyncEnabled: true,
11+
});
12+
13+
try {
14+
await traceloop.waitForInitialization();
15+
} catch (error) {
16+
console.error(
17+
"Failed to initialize Traceloop SDK:",
18+
error instanceof Error ? error.message : String(error),
19+
);
20+
console.error("Initialization error details:", error);
21+
process.exit(1);
22+
}
23+
24+
const client = traceloop.getClient();
25+
if (!client) {
26+
console.error("Failed to initialize Traceloop client");
27+
return;
28+
}
29+
30+
console.log("🚀 Dataset API Sample Application");
31+
console.log("==================================\n");
32+
33+
try {
34+
// 1. Create a new dataset for tracking LLM interactions
35+
console.log("📝 Creating a new dataset...");
36+
const dataset = await client.datasets.create({
37+
name: `llm-interactions-${Date.now()}`,
38+
description:
39+
"Dataset for tracking OpenAI chat completions and user interactions",
40+
});
41+
42+
console.log(`✅ Dataset created: ${dataset.name} (ID: ${dataset.id})\n`);
43+
44+
// 2. Define the schema by adding columns
45+
console.log("🏗️ Adding columns to define schema...");
46+
47+
const columnsToAdd = [
48+
{
49+
name: "user_id",
50+
type: "string" as const,
51+
required: true,
52+
description: "Unique identifier for the user",
53+
},
54+
{
55+
name: "prompt",
56+
type: "string" as const,
57+
required: true,
58+
description: "The user's input prompt",
59+
},
60+
{
61+
name: "response",
62+
type: "string" as const,
63+
required: true,
64+
description: "The AI model's response",
65+
},
66+
{
67+
name: "model",
68+
type: "string" as const,
69+
required: true,
70+
description: "The AI model used (e.g., gpt-4)",
71+
},
72+
{
73+
name: "tokens_used",
74+
type: "number" as const,
75+
required: false,
76+
description: "Total tokens consumed",
77+
},
78+
{
79+
name: "response_time_ms",
80+
type: "number" as const,
81+
required: false,
82+
description: "Response time in milliseconds",
83+
},
84+
{
85+
name: "satisfaction_score",
86+
type: "number" as const,
87+
required: false,
88+
description: "User satisfaction rating (1-5)",
89+
},
90+
{
91+
name: "timestamp",
92+
type: "string" as const,
93+
required: true,
94+
description: "When the interaction occurred",
95+
},
96+
];
97+
await dataset.addColumn(columnsToAdd);
98+
99+
console.log("✅ Schema defined with 8 columns\n");
100+
101+
// 3. Simulate some LLM interactions and collect data
102+
console.log("🤖 Simulating LLM interactions...");
103+
104+
const openai = new OpenAI({
105+
apiKey: process.env.OPENAI_API_KEY,
106+
});
107+
108+
const samplePrompts = [
109+
"Explain machine learning in simple terms",
110+
"Write a Python function to calculate fibonacci numbers",
111+
"What are the benefits of using TypeScript?",
112+
"How does async/await work in JavaScript?",
113+
"Explain the concept of closures in programming",
114+
];
115+
116+
const interactions = [];
117+
118+
for (let i = 0; i < samplePrompts.length; i++) {
119+
const prompt = samplePrompts[i];
120+
const userId = `user_${String(i + 1).padStart(3, "0")}`;
121+
122+
console.log(` Processing prompt ${i + 1}/${samplePrompts.length}...`);
123+
124+
const startTime = Date.now();
125+
126+
try {
127+
// Make actual OpenAI API call
128+
const completion = await openai.chat.completions.create({
129+
model: "gpt-3.5-turbo",
130+
messages: [{ role: "user", content: prompt }],
131+
max_tokens: 150,
132+
});
133+
134+
const endTime = Date.now();
135+
const response =
136+
completion.choices[0]?.message?.content || "No response";
137+
const tokensUsed = completion.usage?.total_tokens || 0;
138+
const responseTime = endTime - startTime;
139+
140+
const interaction = {
141+
user_id: userId,
142+
prompt: prompt,
143+
response: response,
144+
model: "gpt-3.5-turbo",
145+
tokens_used: tokensUsed,
146+
response_time_ms: responseTime,
147+
satisfaction_score: Math.floor(Math.random() * 5) + 1, // Random satisfaction 1-5
148+
timestamp: new Date().toISOString(),
149+
};
150+
151+
interactions.push(interaction);
152+
153+
// Add individual row to dataset
154+
await dataset.addRow(interaction);
155+
} catch (error) {
156+
console.log(
157+
` ⚠️ Error with prompt ${i + 1}: ${error instanceof Error ? error.message : String(error)}`,
158+
);
159+
160+
// Add error interaction data
161+
const errorInteraction = {
162+
user_id: userId,
163+
prompt: prompt,
164+
response: `Error: ${error instanceof Error ? error.message : String(error)}`,
165+
model: "gpt-3.5-turbo",
166+
tokens_used: 0,
167+
response_time_ms: Date.now() - startTime,
168+
satisfaction_score: 1,
169+
timestamp: new Date().toISOString(),
170+
};
171+
172+
interactions.push(errorInteraction);
173+
await dataset.addRow(errorInteraction);
174+
}
175+
}
176+
177+
console.log(`✅ Added ${interactions.length} interaction records\n`);
178+
179+
// 4. Import additional data from CSV
180+
console.log("📊 Importing additional data from CSV...");
181+
182+
const csvData = `user_id,prompt,response,model,tokens_used,response_time_ms,satisfaction_score,timestamp
183+
user_006,"What is React?","React is a JavaScript library for building user interfaces...","gpt-3.5-turbo",85,1200,4,"2024-01-15T10:30:00Z"
184+
user_007,"Explain Docker","Docker is a containerization platform that allows you to package applications...","gpt-3.5-turbo",120,1500,5,"2024-01-15T10:35:00Z"
185+
user_008,"What is GraphQL?","GraphQL is a query language and runtime for APIs...","gpt-3.5-turbo",95,1100,4,"2024-01-15T10:40:00Z"`;
186+
187+
await dataset.fromCSV(csvData, { hasHeader: true });
188+
console.log("✅ Imported 3 additional records from CSV\n");
189+
190+
// 5. Get dataset info
191+
console.log("📈 Getting dataset information...");
192+
const rows = await dataset.getRows(); // Get all rows
193+
const allColumns = await dataset.getColumns(); // Get all columns
194+
console.log(` • Total rows: ${rows.length}`);
195+
console.log(` • Total columns: ${allColumns.length}`);
196+
console.log(` • Last updated: ${dataset.updatedAt}\n`);
197+
198+
// 6. Retrieve and analyze some data
199+
console.log("🔍 Analyzing collected data...");
200+
const analysisRows = rows.slice(0, 10); // Get first 10 rows for analysis
201+
202+
if (analysisRows.length > 0) {
203+
console.log(` • Retrieved ${analysisRows.length} rows for analysis`);
204+
205+
// Calculate average satisfaction score
206+
const satisfactionScores = analysisRows
207+
.map((row) => row.data.satisfaction_score as number)
208+
.filter((score) => score != null);
209+
210+
if (satisfactionScores.length > 0) {
211+
const avgSatisfaction =
212+
satisfactionScores.reduce((a, b) => a + b, 0) /
213+
satisfactionScores.length;
214+
console.log(
215+
` • Average satisfaction score: ${avgSatisfaction.toFixed(2)}/5`,
216+
);
217+
}
218+
219+
// Calculate average response time
220+
const responseTimes = analysisRows
221+
.map((row) => row.data.response_time_ms as number)
222+
.filter((time) => time != null);
223+
224+
if (responseTimes.length > 0) {
225+
const avgResponseTime =
226+
responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length;
227+
console.log(
228+
` • Average response time: ${avgResponseTime.toFixed(0)}ms`,
229+
);
230+
}
231+
232+
// Show sample interactions
233+
console.log("\n📋 Sample interactions:");
234+
analysisRows.slice(0, 3).forEach((row, index) => {
235+
console.log(` ${index + 1}. User: "${row.data.prompt}"`);
236+
console.log(
237+
` Response: "${String(row.data.response).substring(0, 80)}..."`,
238+
);
239+
console.log(` Satisfaction: ${row.data.satisfaction_score}/5\n`);
240+
});
241+
}
242+
243+
// 7. Get dataset versions (if any exist)
244+
console.log("📚 Checking dataset versions...");
245+
try {
246+
const versions = await dataset.getVersions();
247+
console.log(` • Total versions: ${versions.total}`);
248+
249+
if (versions.versions.length > 0) {
250+
console.log(" • Available versions:");
251+
versions.versions.forEach((version) => {
252+
console.log(
253+
` - ${version.version} (published: ${version.publishedAt})`,
254+
);
255+
});
256+
} else {
257+
console.log(" • No published versions yet");
258+
}
259+
} catch (error) {
260+
console.log(` ⚠️ Could not retrieve versions: ${error.message}`);
261+
}
262+
263+
console.log();
264+
265+
// 8. Publish the dataset
266+
console.log("🚀 Publishing dataset...");
267+
await dataset.publish({
268+
version: "v1.0",
269+
description:
270+
"Initial release of LLM interactions dataset with sample data",
271+
});
272+
273+
console.log(
274+
`✅ Dataset published! Status: ${dataset.published ? "Published" : "Draft"}\n`,
275+
);
276+
277+
// 9. List all datasets (to show our new one)
278+
console.log("📑 Listing all datasets...");
279+
const datasetsList = await client.datasets.list(); // Get all datasets
280+
console.log(` • Found ${datasetsList.total} total datasets`);
281+
console.log(" • Recent datasets:");
282+
283+
datasetsList.datasets.slice(0, 3).forEach((ds, index) => {
284+
const isOurDataset = ds.id === dataset.id;
285+
console.log(
286+
` ${index + 1}. ${ds.name}${isOurDataset ? " ← (just created!)" : ""}`,
287+
);
288+
console.log(` Description: ${ds.description || "No description"}`);
289+
console.log(` Published: ${ds.published ? "Yes" : "No"}\n`);
290+
});
291+
292+
// 10. Demonstrate dataset retrieval
293+
console.log("🔎 Testing dataset retrieval...");
294+
const retrievedDataset = await client.datasets.get(dataset.slug);
295+
if (retrievedDataset) {
296+
console.log(
297+
`✅ Retrieved dataset by slug: ${retrievedDataset.name} (ID: ${retrievedDataset.id})`,
298+
);
299+
} else {
300+
console.log("❌ Could not retrieve dataset");
301+
}
302+
303+
console.log("\n🎉 Dataset API demonstration completed successfully!");
304+
console.log("\n💡 Key features demonstrated:");
305+
console.log(" • Dataset creation and schema definition");
306+
console.log(" • Real-time data collection from LLM interactions");
307+
console.log(" • CSV data import capabilities");
308+
console.log(" • Statistical analysis of collected data");
309+
console.log(" • Dataset publishing and version management");
310+
console.log(" • Search and retrieval operations");
311+
312+
console.log(`\n📊 Dataset Summary:`);
313+
console.log(` • Name: ${dataset.name}`);
314+
console.log(` • ID: ${dataset.id}`);
315+
console.log(` • Published: ${dataset.published ? "Yes" : "No"}`);
316+
console.log(` • Total interactions recorded: ${rows.length}`);
317+
} catch (error) {
318+
console.error(
319+
"❌ Error in dataset operations:",
320+
error instanceof Error ? error.message : String(error),
321+
);
322+
if (error instanceof Error && error.stack) {
323+
console.error("Stack trace:", error.stack);
324+
}
325+
}
326+
};
327+
328+
// Error handling for the main function
329+
main().catch((error) => {
330+
console.error("💥 Application failed:", error.message);
331+
process.exit(1);
332+
});

packages/traceloop-sdk/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
"cross-fetch": "^4.1.0",
6161
"fetch-retry": "^6.0.0",
6262
"nunjucks": "^3.2.4",
63+
"papaparse": "^5.5.3",
6364
"posthog-node": "^5.5.1",
6465
"supports-color": "^10.0.0",
6566
"uuid": "^11.1.0"
@@ -81,6 +82,7 @@
8182
"@qdrant/js-client-rest": "^1.15.0",
8283
"@types/mocha": "^10.0.10",
8384
"@types/node": "^24.0.15",
85+
"@types/papaparse": "^5.3.16",
8486
"@types/uuid": "^10.0.0",
8587
"ai": "^4.3.19",
8688
"chromadb": "^3.0.9",

0 commit comments

Comments
 (0)