-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest-gpt5-accuracy.js
More file actions
119 lines (101 loc) · 4.63 KB
/
test-gpt5-accuracy.js
File metadata and controls
119 lines (101 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
const openaiTokenCounter = require('./src/index.js');
const OpenAI = require('openai');
// Initialize OpenAI client
const openai = new OpenAI({
apiKey: process.env['OPENAI_API_KEY']
});
// Test cases for GPT-5 models
const testCases = [
{
name: "Simple greeting",
messages: [{ role: "user", content: "Hello, how are you?" }]
},
{
name: "Code generation request",
messages: [
{ role: "user", content: "Write a Python function to calculate fibonacci numbers" },
{ role: "assistant", content: "Here's a Python function to calculate Fibonacci numbers:\n\ndef fibonacci(n):\n if n <= 1:\n return n\n return fibonacci(n-1) + fibonacci(n-2)" },
{ role: "user", content: "Can you optimize this for better performance?" }
]
},
{
name: "Complex reasoning task",
messages: [
{ role: "system", content: "You are a helpful assistant that solves complex problems step by step." },
{ role: "user", content: "If a train leaves station A at 60 mph and another train leaves station B at 80 mph, and they are 200 miles apart, when will they meet?" }
]
},
{
name: "Long conversation",
messages: [
{ role: "user", content: "What is machine learning?" },
{ role: "assistant", content: "Machine learning is a subset of artificial intelligence that focuses on algorithms and statistical models that enable computer systems to improve their performance on a specific task through experience, without being explicitly programmed." },
{ role: "user", content: "Can you explain the different types of machine learning?" },
{ role: "assistant", content: "There are three main types of machine learning: 1) Supervised learning - uses labeled training data, 2) Unsupervised learning - finds patterns in data without labels, 3) Reinforcement learning - learns through interaction with an environment using rewards and penalties." },
{ role: "user", content: "What are some popular machine learning algorithms?" }
]
}
];
// GPT-5 models to test
const gpt5Models = [
'gpt-5',
'gpt-5-mini',
'gpt-5-nano'
];
async function testModelAccuracy(model) {
console.log(`\n=== Testing ${model} ===`);
for (const testCase of testCases) {
console.log(`\nTest: ${testCase.name}`);
try {
// Get token count from our package
const ourTokenCount = openaiTokenCounter.chat(testCase.messages, model);
console.log(`Our package: ${ourTokenCount} tokens`);
// Get token count from OpenAI API
const response = await openai.chat.completions.create({
model: model,
messages: testCase.messages,
max_completion_tokens: 10 // Minimal response to save tokens
});
const openaiTokenCount = response.usage.prompt_tokens;
console.log(`OpenAI API: ${openaiTokenCount} tokens`);
// Calculate accuracy
const difference = Math.abs(ourTokenCount - openaiTokenCount);
const accuracy = ((1 - difference / openaiTokenCount) * 100).toFixed(2);
if (difference === 0) {
console.log(`✅ Perfect match! (100% accurate)`);
} else if (difference <= 2) {
console.log(`✅ Very close! (${accuracy}% accurate, ${difference} token difference)`);
} else if (difference <= 5) {
console.log(`⚠️ Close! (${accuracy}% accurate, ${difference} token difference)`);
} else {
console.log(`❌ Significant difference! (${accuracy}% accurate, ${difference} token difference)`);
}
// Show message preview
const messagePreview = testCase.messages.map(msg =>
`${msg.role}: ${msg.content.substring(0, 50)}${msg.content.length > 50 ? '...' : ''}`
).join('\n');
console.log(`Messages:\n${messagePreview}`);
} catch (error) {
console.log(`❌ Error testing ${model}: ${error.message}`);
if (error.message.includes('Invalid model') || error.message.includes('not found')) {
console.log(` Note: ${model} may not be available in your OpenAI account yet.`);
}
}
}
}
async function runTests() {
console.log('🚀 GPT-5 Token Counter Accuracy Test');
console.log('=====================================');
console.log('Testing our package against OpenAI API token counts...\n');
for (const model of gpt5Models) {
await testModelAccuracy(model);
}
console.log('\n=== Summary ===');
console.log('✅ = Perfect match (100% accurate)');
console.log('✅ = Very close (98%+ accurate)');
console.log('⚠️ = Close (90%+ accurate)');
console.log('❌ = Significant difference (<90% accurate)');
console.log('\nNote: Some models may not be available in your OpenAI account yet.');
}
// Run the tests
runTests().catch(console.error);