|
| 1 | +import * as traceloop from "@traceloop/node-server-sdk"; |
| 2 | +import OpenAI from "openai"; |
| 3 | + |
| 4 | +const main = async () => { |
| 5 | + // Initialize Traceloop SDK |
| 6 | + traceloop.initialize({ |
| 7 | + appName: "sample_dataset", |
| 8 | + apiKey: process.env.TRACELOOP_API_KEY, |
| 9 | + disableBatch: true, |
| 10 | + traceloopSyncEnabled: true, |
| 11 | + }); |
| 12 | + |
| 13 | + try { |
| 14 | + await traceloop.waitForInitialization(); |
| 15 | + } catch (error) { |
| 16 | + console.error( |
| 17 | + "Failed to initialize Traceloop SDK:", |
| 18 | + error instanceof Error ? error.message : String(error), |
| 19 | + ); |
| 20 | + console.error("Initialization error details:", error); |
| 21 | + process.exit(1); |
| 22 | + } |
| 23 | + |
| 24 | + const client = traceloop.getClient(); |
| 25 | + if (!client) { |
| 26 | + console.error("Failed to initialize Traceloop client"); |
| 27 | + return; |
| 28 | + } |
| 29 | + |
| 30 | + console.log("🚀 Dataset API Sample Application"); |
| 31 | + console.log("==================================\n"); |
| 32 | + |
| 33 | + try { |
| 34 | + // 1. Create a new dataset for tracking LLM interactions |
| 35 | + console.log("📝 Creating a new dataset..."); |
| 36 | + const dataset = await client.datasets.create({ |
| 37 | + name: `llm-interactions-${Date.now()}`, |
| 38 | + description: |
| 39 | + "Dataset for tracking OpenAI chat completions and user interactions", |
| 40 | + }); |
| 41 | + |
| 42 | + console.log(`✅ Dataset created: ${dataset.name} (ID: ${dataset.id})\n`); |
| 43 | + |
| 44 | + // 2. Define the schema by adding columns |
| 45 | + console.log("🏗️ Adding columns to define schema..."); |
| 46 | + |
| 47 | + const columnsToAdd = [ |
| 48 | + { |
| 49 | + name: "user_id", |
| 50 | + type: "string" as const, |
| 51 | + required: true, |
| 52 | + description: "Unique identifier for the user", |
| 53 | + }, |
| 54 | + { |
| 55 | + name: "prompt", |
| 56 | + type: "string" as const, |
| 57 | + required: true, |
| 58 | + description: "The user's input prompt", |
| 59 | + }, |
| 60 | + { |
| 61 | + name: "response", |
| 62 | + type: "string" as const, |
| 63 | + required: true, |
| 64 | + description: "The AI model's response", |
| 65 | + }, |
| 66 | + { |
| 67 | + name: "model", |
| 68 | + type: "string" as const, |
| 69 | + required: true, |
| 70 | + description: "The AI model used (e.g., gpt-4)", |
| 71 | + }, |
| 72 | + { |
| 73 | + name: "tokens_used", |
| 74 | + type: "number" as const, |
| 75 | + required: false, |
| 76 | + description: "Total tokens consumed", |
| 77 | + }, |
| 78 | + { |
| 79 | + name: "response_time_ms", |
| 80 | + type: "number" as const, |
| 81 | + required: false, |
| 82 | + description: "Response time in milliseconds", |
| 83 | + }, |
| 84 | + { |
| 85 | + name: "satisfaction_score", |
| 86 | + type: "number" as const, |
| 87 | + required: false, |
| 88 | + description: "User satisfaction rating (1-5)", |
| 89 | + }, |
| 90 | + { |
| 91 | + name: "timestamp", |
| 92 | + type: "string" as const, |
| 93 | + required: true, |
| 94 | + description: "When the interaction occurred", |
| 95 | + }, |
| 96 | + ]; |
| 97 | + await dataset.addColumn(columnsToAdd); |
| 98 | + |
| 99 | + console.log("✅ Schema defined with 8 columns\n"); |
| 100 | + |
| 101 | + // 3. Simulate some LLM interactions and collect data |
| 102 | + console.log("🤖 Simulating LLM interactions..."); |
| 103 | + |
| 104 | + const openai = new OpenAI({ |
| 105 | + apiKey: process.env.OPENAI_API_KEY, |
| 106 | + }); |
| 107 | + |
| 108 | + const samplePrompts = [ |
| 109 | + "Explain machine learning in simple terms", |
| 110 | + "Write a Python function to calculate fibonacci numbers", |
| 111 | + "What are the benefits of using TypeScript?", |
| 112 | + "How does async/await work in JavaScript?", |
| 113 | + "Explain the concept of closures in programming", |
| 114 | + ]; |
| 115 | + |
| 116 | + const interactions = []; |
| 117 | + |
| 118 | + for (let i = 0; i < samplePrompts.length; i++) { |
| 119 | + const prompt = samplePrompts[i]; |
| 120 | + const userId = `user_${String(i + 1).padStart(3, "0")}`; |
| 121 | + |
| 122 | + console.log(` Processing prompt ${i + 1}/${samplePrompts.length}...`); |
| 123 | + |
| 124 | + const startTime = Date.now(); |
| 125 | + |
| 126 | + try { |
| 127 | + // Make actual OpenAI API call |
| 128 | + const completion = await openai.chat.completions.create({ |
| 129 | + model: "gpt-3.5-turbo", |
| 130 | + messages: [{ role: "user", content: prompt }], |
| 131 | + max_tokens: 150, |
| 132 | + }); |
| 133 | + |
| 134 | + const endTime = Date.now(); |
| 135 | + const response = |
| 136 | + completion.choices[0]?.message?.content || "No response"; |
| 137 | + const tokensUsed = completion.usage?.total_tokens || 0; |
| 138 | + const responseTime = endTime - startTime; |
| 139 | + |
| 140 | + const interaction = { |
| 141 | + user_id: userId, |
| 142 | + prompt: prompt, |
| 143 | + response: response, |
| 144 | + model: "gpt-3.5-turbo", |
| 145 | + tokens_used: tokensUsed, |
| 146 | + response_time_ms: responseTime, |
| 147 | + satisfaction_score: Math.floor(Math.random() * 5) + 1, // Random satisfaction 1-5 |
| 148 | + timestamp: new Date().toISOString(), |
| 149 | + }; |
| 150 | + |
| 151 | + interactions.push(interaction); |
| 152 | + |
| 153 | + // Add individual row to dataset |
| 154 | + await dataset.addRow(interaction); |
| 155 | + } catch (error) { |
| 156 | + console.log( |
| 157 | + ` ⚠️ Error with prompt ${i + 1}: ${error instanceof Error ? error.message : String(error)}`, |
| 158 | + ); |
| 159 | + |
| 160 | + // Add error interaction data |
| 161 | + const errorInteraction = { |
| 162 | + user_id: userId, |
| 163 | + prompt: prompt, |
| 164 | + response: `Error: ${error instanceof Error ? error.message : String(error)}`, |
| 165 | + model: "gpt-3.5-turbo", |
| 166 | + tokens_used: 0, |
| 167 | + response_time_ms: Date.now() - startTime, |
| 168 | + satisfaction_score: 1, |
| 169 | + timestamp: new Date().toISOString(), |
| 170 | + }; |
| 171 | + |
| 172 | + interactions.push(errorInteraction); |
| 173 | + await dataset.addRow(errorInteraction); |
| 174 | + } |
| 175 | + } |
| 176 | + |
| 177 | + console.log(`✅ Added ${interactions.length} interaction records\n`); |
| 178 | + |
| 179 | + // 4. Import additional data from CSV |
| 180 | + console.log("📊 Importing additional data from CSV..."); |
| 181 | + |
| 182 | + const csvData = `user_id,prompt,response,model,tokens_used,response_time_ms,satisfaction_score,timestamp |
| 183 | +user_006,"What is React?","React is a JavaScript library for building user interfaces...","gpt-3.5-turbo",85,1200,4,"2024-01-15T10:30:00Z" |
| 184 | +user_007,"Explain Docker","Docker is a containerization platform that allows you to package applications...","gpt-3.5-turbo",120,1500,5,"2024-01-15T10:35:00Z" |
| 185 | +user_008,"What is GraphQL?","GraphQL is a query language and runtime for APIs...","gpt-3.5-turbo",95,1100,4,"2024-01-15T10:40:00Z"`; |
| 186 | + |
| 187 | + await dataset.fromCSV(csvData, { hasHeader: true }); |
| 188 | + console.log("✅ Imported 3 additional records from CSV\n"); |
| 189 | + |
| 190 | + // 5. Get dataset info |
| 191 | + console.log("📈 Getting dataset information..."); |
| 192 | + const rows = await dataset.getRows(); // Get all rows |
| 193 | + const allColumns = await dataset.getColumns(); // Get all columns |
| 194 | + console.log(` • Total rows: ${rows.length}`); |
| 195 | + console.log(` • Total columns: ${allColumns.length}`); |
| 196 | + console.log(` • Last updated: ${dataset.updatedAt}\n`); |
| 197 | + |
| 198 | + // 6. Retrieve and analyze some data |
| 199 | + console.log("🔍 Analyzing collected data..."); |
| 200 | + const analysisRows = rows.slice(0, 10); // Get first 10 rows for analysis |
| 201 | + |
| 202 | + if (analysisRows.length > 0) { |
| 203 | + console.log(` • Retrieved ${analysisRows.length} rows for analysis`); |
| 204 | + |
| 205 | + // Calculate average satisfaction score |
| 206 | + const satisfactionScores = analysisRows |
| 207 | + .map((row) => row.data.satisfaction_score as number) |
| 208 | + .filter((score) => score != null); |
| 209 | + |
| 210 | + if (satisfactionScores.length > 0) { |
| 211 | + const avgSatisfaction = |
| 212 | + satisfactionScores.reduce((a, b) => a + b, 0) / |
| 213 | + satisfactionScores.length; |
| 214 | + console.log( |
| 215 | + ` • Average satisfaction score: ${avgSatisfaction.toFixed(2)}/5`, |
| 216 | + ); |
| 217 | + } |
| 218 | + |
| 219 | + // Calculate average response time |
| 220 | + const responseTimes = analysisRows |
| 221 | + .map((row) => row.data.response_time_ms as number) |
| 222 | + .filter((time) => time != null); |
| 223 | + |
| 224 | + if (responseTimes.length > 0) { |
| 225 | + const avgResponseTime = |
| 226 | + responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length; |
| 227 | + console.log( |
| 228 | + ` • Average response time: ${avgResponseTime.toFixed(0)}ms`, |
| 229 | + ); |
| 230 | + } |
| 231 | + |
| 232 | + // Show sample interactions |
| 233 | + console.log("\n📋 Sample interactions:"); |
| 234 | + analysisRows.slice(0, 3).forEach((row, index) => { |
| 235 | + console.log(` ${index + 1}. User: "${row.data.prompt}"`); |
| 236 | + console.log( |
| 237 | + ` Response: "${String(row.data.response).substring(0, 80)}..."`, |
| 238 | + ); |
| 239 | + console.log(` Satisfaction: ${row.data.satisfaction_score}/5\n`); |
| 240 | + }); |
| 241 | + } |
| 242 | + |
| 243 | + // 7. Get dataset versions (if any exist) |
| 244 | + console.log("📚 Checking dataset versions..."); |
| 245 | + try { |
| 246 | + const versions = await dataset.getVersions(); |
| 247 | + console.log(` • Total versions: ${versions.total}`); |
| 248 | + |
| 249 | + if (versions.versions.length > 0) { |
| 250 | + console.log(" • Available versions:"); |
| 251 | + versions.versions.forEach((version) => { |
| 252 | + console.log( |
| 253 | + ` - ${version.version} (published: ${version.publishedAt})`, |
| 254 | + ); |
| 255 | + }); |
| 256 | + } else { |
| 257 | + console.log(" • No published versions yet"); |
| 258 | + } |
| 259 | + } catch (error) { |
| 260 | + console.log(` ⚠️ Could not retrieve versions: ${error.message}`); |
| 261 | + } |
| 262 | + |
| 263 | + console.log(); |
| 264 | + |
| 265 | + // 8. Publish the dataset |
| 266 | + console.log("🚀 Publishing dataset..."); |
| 267 | + await dataset.publish({ |
| 268 | + version: "v1.0", |
| 269 | + description: |
| 270 | + "Initial release of LLM interactions dataset with sample data", |
| 271 | + }); |
| 272 | + |
| 273 | + console.log( |
| 274 | + `✅ Dataset published! Status: ${dataset.published ? "Published" : "Draft"}\n`, |
| 275 | + ); |
| 276 | + |
| 277 | + // 9. List all datasets (to show our new one) |
| 278 | + console.log("📑 Listing all datasets..."); |
| 279 | + const datasetsList = await client.datasets.list(); // Get all datasets |
| 280 | + console.log(` • Found ${datasetsList.total} total datasets`); |
| 281 | + console.log(" • Recent datasets:"); |
| 282 | + |
| 283 | + datasetsList.datasets.slice(0, 3).forEach((ds, index) => { |
| 284 | + const isOurDataset = ds.id === dataset.id; |
| 285 | + console.log( |
| 286 | + ` ${index + 1}. ${ds.name}${isOurDataset ? " ← (just created!)" : ""}`, |
| 287 | + ); |
| 288 | + console.log(` Description: ${ds.description || "No description"}`); |
| 289 | + console.log(` Published: ${ds.published ? "Yes" : "No"}\n`); |
| 290 | + }); |
| 291 | + |
| 292 | + // 10. Demonstrate dataset retrieval |
| 293 | + console.log("🔎 Testing dataset retrieval..."); |
| 294 | + const retrievedDataset = await client.datasets.get(dataset.slug); |
| 295 | + if (retrievedDataset) { |
| 296 | + console.log( |
| 297 | + `✅ Retrieved dataset by slug: ${retrievedDataset.name} (ID: ${retrievedDataset.id})`, |
| 298 | + ); |
| 299 | + } else { |
| 300 | + console.log("❌ Could not retrieve dataset"); |
| 301 | + } |
| 302 | + |
| 303 | + console.log("\n🎉 Dataset API demonstration completed successfully!"); |
| 304 | + console.log("\n💡 Key features demonstrated:"); |
| 305 | + console.log(" • Dataset creation and schema definition"); |
| 306 | + console.log(" • Real-time data collection from LLM interactions"); |
| 307 | + console.log(" • CSV data import capabilities"); |
| 308 | + console.log(" • Statistical analysis of collected data"); |
| 309 | + console.log(" • Dataset publishing and version management"); |
| 310 | + console.log(" • Search and retrieval operations"); |
| 311 | + |
| 312 | + console.log(`\n📊 Dataset Summary:`); |
| 313 | + console.log(` • Name: ${dataset.name}`); |
| 314 | + console.log(` • ID: ${dataset.id}`); |
| 315 | + console.log(` • Published: ${dataset.published ? "Yes" : "No"}`); |
| 316 | + console.log(` • Total interactions recorded: ${rows.length}`); |
| 317 | + } catch (error) { |
| 318 | + console.error( |
| 319 | + "❌ Error in dataset operations:", |
| 320 | + error instanceof Error ? error.message : String(error), |
| 321 | + ); |
| 322 | + if (error instanceof Error && error.stack) { |
| 323 | + console.error("Stack trace:", error.stack); |
| 324 | + } |
| 325 | + } |
| 326 | +}; |
| 327 | + |
| 328 | +// Error handling for the main function |
| 329 | +main().catch((error) => { |
| 330 | + console.error("💥 Application failed:", error.message); |
| 331 | + process.exit(1); |
| 332 | +}); |
0 commit comments