Skip to content

Commit 8269d88

Browse files
committed
Refactor testProviderMetrics to support concurrent result handling and add session-based logging and results directories; implement markdown report generation for test results
1 parent eda0297 commit 8269d88

File tree

2 files changed

+204
-26
lines changed

2 files changed

+204
-26
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ build/
1515
# PAMPAX
1616
.pampa/
1717
pampa.*
18+
19+
# Logs
20+
logs/
21+
results/

main.go

Lines changed: 200 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ type TestResult struct {
4242

4343
// testProviderMetrics runs a full benchmark test against a single provider.
4444
// It is designed to be run as a goroutine.
45-
func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync.WaitGroup, logDir, resultsDir string) {
45+
func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync.WaitGroup, logDir, resultsDir string, results *[]TestResult, resultsMutex *sync.Mutex) {
4646
// Defer wg.Done() if this is part of a concurrent group
4747
if wg != nil {
4848
defer wg.Done()
@@ -52,7 +52,7 @@ func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync
5252
timestamp := time.Now().Format("20060102-150405")
5353
logFile, err := os.Create(filepath.Join(logDir, fmt.Sprintf("%s-%s.log", config.Name, timestamp)))
5454
if err != nil {
55-
log.Printf("Error creating log file for %s: %v", config.Name, err)
55+
log.Printf("Error creating log file for %s: %v", config.Name, err)
5656
return
5757
}
5858
defer logFile.Close()
@@ -88,18 +88,23 @@ func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync
8888
var firstTokenTime time.Time
8989
var fullResponseContent strings.Builder
9090

91-
ctx := context.Background()
91+
// Add timeout context to prevent indefinite hangs (5 minutes)
92+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
93+
defer cancel()
94+
9295
stream, err := client.CreateChatCompletionStream(ctx, req)
9396
if err != nil {
94-
providerLogger.Printf("Error creating stream for %s: %v", config.Name, err)
97+
providerLogger.Printf("Error creating stream for %s: %v", config.Name, err)
9598
// Save error result
96-
saveResult(resultsDir, TestResult{
99+
result := TestResult{
97100
Provider: config.Name,
98101
Model: config.Model,
99102
Timestamp: time.Now(),
100103
Success: false,
101104
Error: err.Error(),
102-
})
105+
}
106+
saveResult(resultsDir, result)
107+
appendResult(results, resultsMutex, result)
103108
return
104109
}
105110
defer stream.Close() // IMPORTANT: Always close the stream
@@ -116,20 +121,27 @@ func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync
116121
}
117122

118123
if err != nil {
119-
providerLogger.Printf("Stream error for %s: %v", config.Name, err)
124+
errMsg := err.Error()
125+
if ctx.Err() == context.DeadlineExceeded {
126+
errMsg = "Timeout: stream took longer than 5 minutes"
127+
}
128+
providerLogger.Printf("Stream error for %s: %v", config.Name, errMsg)
120129
// Save error result
121-
saveResult(resultsDir, TestResult{
130+
result := TestResult{
122131
Provider: config.Name,
123132
Model: config.Model,
124133
Timestamp: time.Now(),
125134
Success: false,
126-
Error: err.Error(),
127-
})
135+
Error: errMsg,
136+
}
137+
saveResult(resultsDir, result)
138+
appendResult(results, resultsMutex, result)
128139
return
129140
}
130141

131142
// Check if Choices array is empty (some APIs send empty chunks)
132143
if len(response.Choices) == 0 {
144+
providerLogger.Printf("[%s] ... Received empty chunk (no Choices)", config.Name)
133145
continue
134146
}
135147

@@ -155,13 +167,15 @@ func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync
155167
if firstTokenTime.IsZero() {
156168
providerLogger.Printf("Error for %s: Did not receive any content from the API.", config.Name)
157169
// Save error result
158-
saveResult(resultsDir, TestResult{
170+
result := TestResult{
159171
Provider: config.Name,
160172
Model: config.Model,
161173
Timestamp: time.Now(),
162174
Success: false,
163175
Error: "No content received from API",
164-
})
176+
}
177+
saveResult(resultsDir, result)
178+
appendResult(results, resultsMutex, result)
165179
return
166180
}
167181

@@ -173,13 +187,15 @@ func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync
173187
if completionTokens == 0 {
174188
providerLogger.Printf("Error for %s: Received response with 0 tokens.", config.Name)
175189
// Save error result
176-
saveResult(resultsDir, TestResult{
190+
result := TestResult{
177191
Provider: config.Name,
178192
Model: config.Model,
179193
Timestamp: time.Now(),
180194
Success: false,
181195
Error: "Received 0 tokens",
182-
})
196+
}
197+
saveResult(resultsDir, result)
198+
appendResult(results, resultsMutex, result)
183199
return
184200
}
185201

@@ -215,7 +231,7 @@ func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync
215231
// providerLogger.Printf("[%s] Full Response:\n%s\n", config.Name, fullResponse)
216232

217233
// Save successful result
218-
saveResult(resultsDir, TestResult{
234+
result := TestResult{
219235
Provider: config.Name,
220236
Model: config.Model,
221237
Timestamp: time.Now(),
@@ -224,7 +240,18 @@ func testProviderMetrics(config ProviderConfig, tke *tiktoken.Tiktoken, wg *sync
224240
Throughput: throughput,
225241
CompletionTokens: completionTokens,
226242
Success: true,
227-
})
243+
}
244+
saveResult(resultsDir, result)
245+
appendResult(results, resultsMutex, result)
246+
}
247+
248+
// appendResult safely appends a result to the shared results slice
249+
func appendResult(results *[]TestResult, mutex *sync.Mutex, result TestResult) {
250+
if results != nil && mutex != nil {
251+
mutex.Lock()
252+
*results = append(*results, result)
253+
mutex.Unlock()
254+
}
228255
}
229256

230257
// saveResult saves the test result to a JSON file
@@ -234,16 +261,149 @@ func saveResult(resultsDir string, result TestResult) {
234261

235262
data, err := json.MarshalIndent(result, "", " ")
236263
if err != nil {
237-
log.Printf("Error marshaling result for %s: %v", result.Provider, err)
264+
log.Printf("Error marshaling result for %s: %v", result.Provider, err)
238265
return
239266
}
240267

241268
if err := os.WriteFile(filename, data, 0644); err != nil {
242-
log.Printf("Error writing result file for %s: %v", result.Provider, err)
269+
log.Printf("Error writing result file for %s: %v", result.Provider, err)
243270
return
244271
}
245272

246-
log.Printf("✅ Result saved: %s", filename)
273+
log.Printf("Result saved: %s", filename)
274+
}
275+
276+
// generateMarkdownReport creates a summary report of all test results
277+
func generateMarkdownReport(resultsDir string, results []TestResult, sessionTimestamp string) error {
278+
filename := filepath.Join(resultsDir, "REPORT.md")
279+
280+
var report strings.Builder
281+
report.WriteString("# LLM API Speed Test Results\n\n")
282+
report.WriteString(fmt.Sprintf("**Test Session:** %s\n\n", sessionTimestamp))
283+
report.WriteString("---\n\n")
284+
285+
// Summary statistics
286+
successful := 0
287+
failed := 0
288+
for _, r := range results {
289+
if r.Success {
290+
successful++
291+
} else {
292+
failed++
293+
}
294+
}
295+
296+
report.WriteString("## Summary\n\n")
297+
report.WriteString(fmt.Sprintf("- **Total Providers Tested:** %d\n", len(results)))
298+
report.WriteString(fmt.Sprintf("- **Successful:** %d\n", successful))
299+
report.WriteString(fmt.Sprintf("- **Failed:** %d\n\n", failed))
300+
301+
// Successful results table
302+
if successful > 0 {
303+
report.WriteString("## Successful Tests\n\n")
304+
report.WriteString("| Provider | Model | E2E Latency | TTFT | Throughput | Tokens |\n")
305+
report.WriteString("|----------|-------|-------------|------|------------|--------|\n")
306+
307+
for _, r := range results {
308+
if r.Success {
309+
report.WriteString(fmt.Sprintf("| %s | %s | %v | %v | %.2f tok/s | %d |\n",
310+
r.Provider,
311+
r.Model,
312+
r.E2ELatency,
313+
r.TTFT,
314+
r.Throughput,
315+
r.CompletionTokens))
316+
}
317+
}
318+
report.WriteString("\n")
319+
}
320+
321+
// Failed results
322+
if failed > 0 {
323+
report.WriteString("## Failed Tests\n\n")
324+
report.WriteString("| Provider | Model | Error |\n")
325+
report.WriteString("|----------|-------|-------|\n")
326+
327+
for _, r := range results {
328+
if !r.Success {
329+
report.WriteString(fmt.Sprintf("| %s | %s | %s |\n",
330+
r.Provider,
331+
r.Model,
332+
r.Error))
333+
}
334+
}
335+
report.WriteString("\n")
336+
}
337+
338+
// Leaderboard (sorted by throughput)
339+
if successful > 0 {
340+
report.WriteString("## 🏆 Performance Leaderboard\n\n")
341+
report.WriteString("### By Throughput (Tokens/sec)\n\n")
342+
343+
// Sort by throughput
344+
successfulResults := make([]TestResult, 0)
345+
for _, r := range results {
346+
if r.Success {
347+
successfulResults = append(successfulResults, r)
348+
}
349+
}
350+
351+
// Simple bubble sort by throughput descending
352+
for i := 0; i < len(successfulResults); i++ {
353+
for j := i + 1; j < len(successfulResults); j++ {
354+
if successfulResults[j].Throughput > successfulResults[i].Throughput {
355+
successfulResults[i], successfulResults[j] = successfulResults[j], successfulResults[i]
356+
}
357+
}
358+
}
359+
360+
report.WriteString("| Rank | Provider | Throughput | TTFT | E2E Latency |\n")
361+
report.WriteString("|------|----------|------------|------|-------------|\n")
362+
363+
for i, r := range successfulResults {
364+
report.WriteString(fmt.Sprintf("| %d | %s | %.2f tok/s | %v | %v |\n",
365+
i+1,
366+
r.Provider,
367+
r.Throughput,
368+
r.TTFT,
369+
r.E2ELatency))
370+
}
371+
report.WriteString("\n")
372+
373+
// Sort by TTFT
374+
report.WriteString("### By Time to First Token (TTFT)\n\n")
375+
376+
for i := 0; i < len(successfulResults); i++ {
377+
for j := i + 1; j < len(successfulResults); j++ {
378+
if successfulResults[j].TTFT < successfulResults[i].TTFT {
379+
successfulResults[i], successfulResults[j] = successfulResults[j], successfulResults[i]
380+
}
381+
}
382+
}
383+
384+
report.WriteString("| Rank | Provider | TTFT | Throughput | E2E Latency |\n")
385+
report.WriteString("|------|----------|------|------------|-------------|\n")
386+
387+
for i, r := range successfulResults {
388+
report.WriteString(fmt.Sprintf("| %d | %s | %v | %.2f tok/s | %v |\n",
389+
i+1,
390+
r.Provider,
391+
r.TTFT,
392+
r.Throughput,
393+
r.E2ELatency))
394+
}
395+
report.WriteString("\n")
396+
}
397+
398+
report.WriteString("---\n\n")
399+
report.WriteString(fmt.Sprintf("*Report generated at %s*\n", time.Now().Format("2006-01-02 15:04:05")))
400+
401+
if err := os.WriteFile(filename, []byte(report.String()), 0644); err != nil {
402+
return fmt.Errorf("error writing report: %v", err)
403+
}
404+
405+
log.Printf("Report generated: %s", filename)
406+
return nil
247407
}
248408

249409
func main() {
@@ -269,9 +429,11 @@ func main() {
269429
flagGenericModel := flag.String("model", "", "Model name for 'generic' provider (required if --provider is not set)")
270430
flag.Parse()
271431

272-
// 3. Create log and results directories
273-
logDir := "logs"
274-
resultsDir := "results"
432+
// 3. Create session-based folder structure
433+
sessionTimestamp := time.Now().Format("20060102-150405")
434+
sessionDir := filepath.Join("results", fmt.Sprintf("session-%s", sessionTimestamp))
435+
logDir := filepath.Join(sessionDir, "logs")
436+
resultsDir := sessionDir
275437

276438
if err := os.MkdirAll(logDir, 0755); err != nil {
277439
log.Fatalf("Error creating logs directory: %v", err)
@@ -281,8 +443,9 @@ func main() {
281443
log.Fatalf("Error creating results directory: %v", err)
282444
}
283445

284-
log.Printf("📁 Logs will be saved to: %s/", logDir)
285-
log.Printf("📁 Results will be saved to: %s/", resultsDir)
446+
log.Printf("Session folder: %s/", sessionDir)
447+
log.Printf("Logs will be saved to: %s/", logDir)
448+
log.Printf("Results will be saved to: %s/", resultsDir)
286449

287450
// 4. Initialize Tokenizer
288451
tke, err := tiktoken.GetEncoding("cl100k_base")
@@ -395,14 +558,17 @@ func main() {
395558

396559
// 6. Run Tests
397560
var wg sync.WaitGroup
561+
var results []TestResult
562+
var resultsMutex sync.Mutex
563+
398564
for _, provider := range providersToTest {
399565
if *testAll {
400566
// Run all tests concurrently
401567
wg.Add(1)
402-
go testProviderMetrics(provider, tke, &wg, logDir, resultsDir)
568+
go testProviderMetrics(provider, tke, &wg, logDir, resultsDir, &results, &resultsMutex)
403569
} else {
404570
// Run a single test sequentially
405-
testProviderMetrics(provider, tke, nil, logDir, resultsDir)
571+
testProviderMetrics(provider, tke, nil, logDir, resultsDir, &results, &resultsMutex)
406572
}
407573
}
408574

@@ -411,4 +577,12 @@ func main() {
411577
wg.Wait()
412578
log.Println("--- All provider tests complete. ---")
413579
}
580+
581+
// Generate markdown report
582+
log.Println("Generating summary report...")
583+
if err := generateMarkdownReport(resultsDir, results, sessionTimestamp); err != nil {
584+
log.Printf("Warning: Failed to generate report: %v", err)
585+
}
586+
587+
log.Printf("All tests complete. Results saved to: %s/", sessionDir)
414588
}

0 commit comments

Comments
 (0)