Skip to content

Commit 2721685

Browse files
committed
refactor: move comparing
1 parent 6228b94 commit 2721685

File tree

1 file changed

+72
-112
lines changed

1 file changed

+72
-112
lines changed

example/e-verify.cpp

Lines changed: 72 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,78 @@ Model::GenerationResult deserialize(std::string filename) {
277277
return res;
278278
}
279279

280+
void runTest(Model::GenerationResult& r1, Model::GenerationResult& r2) {
281+
std::vector<float> jsdResults;
282+
std::vector<float> similarityResults;
283+
float totalWeightedDist = 0.0f;
284+
float totalWeight = 0.0f;
285+
286+
for (size_t i = 0; i < r1.steps.size(); i++) {
287+
auto& step1 = r1.steps[i];
288+
auto& step2 = r2.steps[i];
289+
290+
// Calculate distance
291+
float dist = ac::llama::LogitComparer::cosineDistance(step1.data, step2.data);
292+
293+
// Calculate weight based on normalized entropy
294+
float weight = normalizedEntropy(step1.data);
295+
totalWeightedDist += weight * dist;
296+
totalWeight += weight;
297+
298+
// Calculate JSD
299+
float jsd = ac::llama::LogitComparer::JSD(step1.data, step2.data);
300+
jsdResults.push_back(jsd);
301+
302+
// Calculate similarity
303+
float similarity = ac::llama::LogitComparer::logitSimilarity(step1.data, step2.data);
304+
similarityResults.push_back(similarity);
305+
306+
std::cout << "Token: " << step1.tokenStr
307+
<< ", Weight: " << weight
308+
<< ", JSD: " << jsd
309+
<< ", Similarity: " << similarity
310+
<< ", Distance: " << dist
311+
<< "\n";
312+
}
313+
314+
315+
{
316+
// Final step: Normalize
317+
318+
// Score range | Interpretation
319+
// 0.0 | Perfect match (identical predictions)
320+
// 0.0001 - 0.001 | Practically indistinguishable
321+
// 0.001 - 0.01 | Very close, slight variation
322+
// 0.01 - 0.1 | Moderate variation, likely different versions/settings
323+
// 0.1 - 1.0 | Large differences, likely different models
324+
float finalScore = (totalWeight > 0.0f) ? (totalWeightedDist / totalWeight) : 0.0f;
325+
std::cout << "Final weighted distance score: " << finalScore << "\n";
326+
}
280327

328+
{
329+
// Final score interpretation
330+
// average JSD score
331+
// 0.0 | Perfect match (identical predictions)
332+
// 0.0001 - 0.001 | Practically indistinguishable
333+
// 0.001 - 0.01 | Moderate variation, likely different versions/settings
334+
// 0.01 - 0.1 | Large differences, likely different models
335+
float jsdSum = 0.0f;
336+
for (const auto& jsd : jsdResults) {
337+
jsdSum += jsd;
338+
}
339+
float jsdAvg = jsdSum / jsdResults.size();
340+
std::cout << "Average JSD score: " << jsdAvg << "\n";
341+
}
342+
343+
{
344+
float similaritySum = 0.0f;
345+
for (const auto& similarity : similarityResults) {
346+
similaritySum += similarity;
347+
}
348+
float similarityAvg = similaritySum / similarityResults.size();
349+
std::cout << "Average similarity score: " << similarityAvg << "\n";
350+
}
351+
}
281352

282353

283354
int main() try {
@@ -323,118 +394,7 @@ int main() try {
323394
std::cout << "Models to compare:\n" << modelGguf << "\n" << modelGguf2 << "\n";
324395
std::cout << "Comparing...\n";
325396

326-
std::vector<float> jsdResults;
327-
std::vector<float> similarityResults;
328-
for (int i = 0; i < 1; ++i) {
329-
float totalWeightedDist = 0.0f;
330-
float totalWeight = 0.0f;
331-
332-
333-
// auto r1 = m1.generate(prompt, 100);
334-
// std::cout << "Model 1 generated: " << r1.result << "\n";
335-
// std::string genPrompt = r1.initalPrompt;
336-
// auto genPromptTokens = m2.tokenize(genPrompt);
337-
338-
// Model::GenerationResult r2;
339-
// for (size_t i = 0; i < r1.steps.size(); i++) {
340-
// auto& step = r1.steps[i];
341-
// if (i > 0) {
342-
// if (m2.tokenExists(step.token)) {
343-
// genPromptTokens.push_back(step.token);
344-
// }
345-
// else {
346-
// // Instead of skipping, penalize fully
347-
// float fakeDist = 1.0f; // Maximum possible distance
348-
// float weight = 1.0f; // Assume maximum confidence since we can't know entropy
349-
// totalWeightedDist += weight * fakeDist;
350-
// totalWeight += weight;
351-
352-
// jsdResults.push_back(1);
353-
354-
// similarityResults.push_back(0.0f);
355-
356-
// std::cout << "Token not found in model 2: " << step.tokenStr << "\n";
357-
// continue;
358-
// }
359-
// }
360-
361-
// if (i == 0) {
362-
// r2 = m2.generate(genPromptTokens, 0);
363-
// } else {
364-
// std::vector<ac::llama::Token> token{step.token};
365-
// Model::GenerationResult res2 = m2.generate(token, 0);
366-
// assert(res2.steps.size() == 1);
367-
// r2.steps.push_back(res2.steps[0]);
368-
// }
369-
// }
370-
371-
for (size_t i = 0; i < r1.steps.size(); i++) {
372-
auto& step1 = r1.steps[i];
373-
auto& step2 = r2.steps[i];
374-
375-
// Calculate distance
376-
float dist = ac::llama::LogitComparer::cosineDistance(step1.data, step2.data);
377-
378-
// Calculate weight based on normalized entropy
379-
float weight = normalizedEntropy(step1.data);
380-
totalWeightedDist += weight * dist;
381-
totalWeight += weight;
382-
383-
// Calculate JSD
384-
float jsd = ac::llama::LogitComparer::JSD(step1.data, step2.data);
385-
jsdResults.push_back(jsd);
386-
387-
// Calculate similarity
388-
float similarity = ac::llama::LogitComparer::logitSimilarity(step1.data, step2.data);
389-
similarityResults.push_back(similarity);
390-
391-
std::cout << "Token: " << step1.tokenStr
392-
<< ", Weight: " << weight
393-
<< ", JSD: " << jsd
394-
<< ", Similarity: " << similarity
395-
<< ", Distance: " << dist
396-
<< "\n";
397-
}
398-
399-
400-
{
401-
// Final step: Normalize
402-
403-
// Score range | Interpretation
404-
// 0.0 | Perfect match (identical predictions)
405-
// 0.0001 - 0.001 | Practically indistinguishable
406-
// 0.001 - 0.01 | Very close, slight variation
407-
// 0.01 - 0.1 | Moderate variation, likely different versions/settings
408-
// 0.1 - 1.0 | Large differences, likely different models
409-
float finalScore = (totalWeight > 0.0f) ? (totalWeightedDist / totalWeight) : 0.0f;
410-
std::cout << "Final weighted distance score: " << finalScore << "\n";
411-
}
412-
413-
{
414-
// Final score interpretation
415-
// average JSD score
416-
// 0.0 | Perfect match (identical predictions)
417-
// 0.0001 - 0.001 | Practically indistinguishable
418-
// 0.001 - 0.01 | Moderate variation, likely different versions/settings
419-
// 0.01 - 0.1 | Large differences, likely different models
420-
float jsdSum = 0.0f;
421-
for (const auto& jsd : jsdResults) {
422-
jsdSum += jsd;
423-
}
424-
float jsdAvg = jsdSum / jsdResults.size();
425-
std::cout << "Average JSD score: " << jsdAvg << "\n";
426-
}
427-
428-
{
429-
float similaritySum = 0.0f;
430-
for (const auto& similarity : similarityResults) {
431-
similaritySum += similarity;
432-
}
433-
float similarityAvg = similaritySum / similarityResults.size();
434-
std::cout << "Average similarity score: " << similarityAvg << "\n";
435-
}
436-
}
437-
std::cout << '\n';
397+
runTest(r1, r2);
438398

439399
return 0;
440400
}

0 commit comments

Comments
 (0)