@@ -277,7 +277,78 @@ Model::GenerationResult deserialize(std::string filename) {
277277 return res;
278278}
279279
280+ void runTest (Model::GenerationResult& r1, Model::GenerationResult& r2) {
281+ std::vector<float > jsdResults;
282+ std::vector<float > similarityResults;
283+ float totalWeightedDist = 0 .0f ;
284+ float totalWeight = 0 .0f ;
285+
286+ for (size_t i = 0 ; i < r1.steps .size (); i++) {
287+ auto & step1 = r1.steps [i];
288+ auto & step2 = r2.steps [i];
289+
290+ // Calculate distance
291+ float dist = ac::llama::LogitComparer::cosineDistance (step1.data , step2.data );
292+
293+ // Calculate weight based on normalized entropy
294+ float weight = normalizedEntropy (step1.data );
295+ totalWeightedDist += weight * dist;
296+ totalWeight += weight;
297+
298+ // Calculate JSD
299+ float jsd = ac::llama::LogitComparer::JSD (step1.data , step2.data );
300+ jsdResults.push_back (jsd);
301+
302+ // Calculate similarity
303+ float similarity = ac::llama::LogitComparer::logitSimilarity (step1.data , step2.data );
304+ similarityResults.push_back (similarity);
305+
306+ std::cout << " Token: " << step1.tokenStr
307+ << " , Weight: " << weight
308+ << " , JSD: " << jsd
309+ << " , Similarity: " << similarity
310+ << " , Distance: " << dist
311+ << " \n " ;
312+ }
313+
314+
315+ {
316+ // Final step: Normalize
317+
318+ // Score range | Interpretation
319+ // 0.0 | Perfect match (identical predictions)
320+ // 0.0001 - 0.001 | Practically indistinguishable
321+ // 0.001 - 0.01 | Very close, slight variation
322+ // 0.01 - 0.1 | Moderate variation, likely different versions/settings
323+ // 0.1 - 1.0 | Large differences, likely different models
324+ float finalScore = (totalWeight > 0 .0f ) ? (totalWeightedDist / totalWeight) : 0 .0f ;
325+ std::cout << " Final weighted distance score: " << finalScore << " \n " ;
326+ }
280327
328+ {
329+ // Final score interpretation
330+ // average JSD score
331+ // 0.0 | Perfect match (identical predictions)
332+ // 0.0001 - 0.001 | Practically indistinguishable
333+ // 0.001 - 0.01 | Moderate variation, likely different versions/settings
334+ // 0.01 - 0.1 | Large differences, likely different models
335+ float jsdSum = 0 .0f ;
336+ for (const auto & jsd : jsdResults) {
337+ jsdSum += jsd;
338+ }
339+ float jsdAvg = jsdSum / jsdResults.size ();
340+ std::cout << " Average JSD score: " << jsdAvg << " \n " ;
341+ }
342+
343+ {
344+ float similaritySum = 0 .0f ;
345+ for (const auto & similarity : similarityResults) {
346+ similaritySum += similarity;
347+ }
348+ float similarityAvg = similaritySum / similarityResults.size ();
349+ std::cout << " Average similarity score: " << similarityAvg << " \n " ;
350+ }
351+ }
281352
282353
283354int main () try {
@@ -323,118 +394,7 @@ int main() try {
323394 std::cout << " Models to compare:\n " << modelGguf << " \n " << modelGguf2 << " \n " ;
324395 std::cout << " Comparing...\n " ;
325396
326- std::vector<float > jsdResults;
327- std::vector<float > similarityResults;
328- for (int i = 0 ; i < 1 ; ++i) {
329- float totalWeightedDist = 0 .0f ;
330- float totalWeight = 0 .0f ;
331-
332-
333- // auto r1 = m1.generate(prompt, 100);
334- // std::cout << "Model 1 generated: " << r1.result << "\n";
335- // std::string genPrompt = r1.initalPrompt;
336- // auto genPromptTokens = m2.tokenize(genPrompt);
337-
338- // Model::GenerationResult r2;
339- // for (size_t i = 0; i < r1.steps.size(); i++) {
340- // auto& step = r1.steps[i];
341- // if (i > 0) {
342- // if (m2.tokenExists(step.token)) {
343- // genPromptTokens.push_back(step.token);
344- // }
345- // else {
346- // // Instead of skipping, penalize fully
347- // float fakeDist = 1.0f; // Maximum possible distance
348- // float weight = 1.0f; // Assume maximum confidence since we can't know entropy
349- // totalWeightedDist += weight * fakeDist;
350- // totalWeight += weight;
351-
352- // jsdResults.push_back(1);
353-
354- // similarityResults.push_back(0.0f);
355-
356- // std::cout << "Token not found in model 2: " << step.tokenStr << "\n";
357- // continue;
358- // }
359- // }
360-
361- // if (i == 0) {
362- // r2 = m2.generate(genPromptTokens, 0);
363- // } else {
364- // std::vector<ac::llama::Token> token{step.token};
365- // Model::GenerationResult res2 = m2.generate(token, 0);
366- // assert(res2.steps.size() == 1);
367- // r2.steps.push_back(res2.steps[0]);
368- // }
369- // }
370-
371- for (size_t i = 0 ; i < r1.steps .size (); i++) {
372- auto & step1 = r1.steps [i];
373- auto & step2 = r2.steps [i];
374-
375- // Calculate distance
376- float dist = ac::llama::LogitComparer::cosineDistance (step1.data , step2.data );
377-
378- // Calculate weight based on normalized entropy
379- float weight = normalizedEntropy (step1.data );
380- totalWeightedDist += weight * dist;
381- totalWeight += weight;
382-
383- // Calculate JSD
384- float jsd = ac::llama::LogitComparer::JSD (step1.data , step2.data );
385- jsdResults.push_back (jsd);
386-
387- // Calculate similarity
388- float similarity = ac::llama::LogitComparer::logitSimilarity (step1.data , step2.data );
389- similarityResults.push_back (similarity);
390-
391- std::cout << " Token: " << step1.tokenStr
392- << " , Weight: " << weight
393- << " , JSD: " << jsd
394- << " , Similarity: " << similarity
395- << " , Distance: " << dist
396- << " \n " ;
397- }
398-
399-
400- {
401- // Final step: Normalize
402-
403- // Score range | Interpretation
404- // 0.0 | Perfect match (identical predictions)
405- // 0.0001 - 0.001 | Practically indistinguishable
406- // 0.001 - 0.01 | Very close, slight variation
407- // 0.01 - 0.1 | Moderate variation, likely different versions/settings
408- // 0.1 - 1.0 | Large differences, likely different models
409- float finalScore = (totalWeight > 0 .0f ) ? (totalWeightedDist / totalWeight) : 0 .0f ;
410- std::cout << " Final weighted distance score: " << finalScore << " \n " ;
411- }
412-
413- {
414- // Final score interpretation
415- // average JSD score
416- // 0.0 | Perfect match (identical predictions)
417- // 0.0001 - 0.001 | Practically indistinguishable
418- // 0.001 - 0.01 | Moderate variation, likely different versions/settings
419- // 0.01 - 0.1 | Large differences, likely different models
420- float jsdSum = 0 .0f ;
421- for (const auto & jsd : jsdResults) {
422- jsdSum += jsd;
423- }
424- float jsdAvg = jsdSum / jsdResults.size ();
425- std::cout << " Average JSD score: " << jsdAvg << " \n " ;
426- }
427-
428- {
429- float similaritySum = 0 .0f ;
430- for (const auto & similarity : similarityResults) {
431- similaritySum += similarity;
432- }
433- float similarityAvg = similaritySum / similarityResults.size ();
434- std::cout << " Average similarity score: " << similarityAvg << " \n " ;
435- }
436- }
437- std::cout << ' \n ' ;
397+ runTest (r1, r2);
438398
439399 return 0 ;
440400}
0 commit comments