Skip to content

Commit 2265994

Browse files
committed
fix: clean-up
1 parent fb5b5bb commit 2265994

File tree

3 files changed

+2
-100
lines changed

3 files changed

+2
-100
lines changed

code/ac/llama/LogitComparer.cpp

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -46,29 +46,7 @@ bool LogitComparer::compare(const TokenDataVector& data1, const TokenDataVector&
4646
return jsd(prob_map, prob_map2) < 0.01f; // 1% divergence allowed
4747
}
4848

49-
float LogitComparer::JSD(const TokenDataVector& data1, const TokenDataVector& data2) {
50-
std::unordered_map<int32_t, float> prob_map, prob_map2;
51-
52-
for (const auto& p : data1) prob_map[p.token] = p.prob;
53-
for (const auto& p : data2) prob_map2[p.token] = p.prob;
54-
55-
return jsd(prob_map, prob_map2);
56-
}
57-
58-
float LogitComparer::cosineDistance(const TokenDataVector& data1, const TokenDataVector& data2) {
59-
assert(data1.size() == data2.size());
60-
float dot = 0.0f, normA = 0.0f, normB = 0.0f;
61-
for (size_t i = 0; i < data1.size(); ++i) {
62-
dot += data1[i].logit * data2[i].logit;
63-
normA += data1[i].logit * data1[i].logit;
64-
normB += data2[i].logit * data2[i].logit;
65-
}
66-
return 1.0f - (dot / (std::sqrt(normA) * std::sqrt(normB)));
67-
}
68-
6949
float LogitComparer::logitSimilarity(const TokenDataVector& data1, const TokenDataVector& data2) {
70-
float res = 0.0f;
71-
7250
assert(data1.size() == data2.size());
7351
std::unordered_map<int32_t, float> l_map, l2_map;
7452

code/ac/llama/LogitComparer.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@ class LogitComparer {
1212
public:
1313
static bool compare(const TokenDataVector& data1, const TokenDataVector& data2);
1414

15-
static float JSD(const TokenDataVector& data1, const TokenDataVector& data2);
16-
17-
static float cosineDistance(const TokenDataVector& data1, const TokenDataVector& data2);
18-
1915
static float logitSimilarity(const TokenDataVector& data1, const TokenDataVector& data2);
2016

2117
private:

example/e-verify.cpp

Lines changed: 2 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -128,32 +128,6 @@ class Model {
128128
ac::llama::Session* m_session;
129129
};
130130

131-
// -- Helper function to compute normalized entropy --
132-
float normalizedEntropy(const ac::llama::TokenDataVector& data) {
133-
std::vector<float> probs(data.size());
134-
float sum = 0.0f;
135-
136-
// Calculate softmax probabilities
137-
for (auto& val : data) {
138-
sum += std::exp(val.logit);
139-
}
140-
for (size_t i = 0; i < data.size(); ++i) {
141-
probs[i] = std::exp(data[i].logit) / sum;
142-
}
143-
144-
// Calculate entropy
145-
float entropy = 0.0f;
146-
for (float p : probs) {
147-
if (p > 0.0f) {
148-
entropy -= p * std::log(p);
149-
}
150-
}
151-
152-
// Normalize entropy by maximum possible entropy (log(number of classes))
153-
float maxEntropy = std::log(float(probs.size()));
154-
return entropy / maxEntropy;
155-
}
156-
157131

158132
std::vector<Model::GenerationResult> modelGeneration(Model& m1, Model& m2, std::string_view prompt, uint32_t maxTokens) {
159133
auto res = m1.generate(prompt, maxTokens);
@@ -280,66 +254,20 @@ Model::GenerationResult deserialize(std::string_view filename) {
280254
void runCompare(Model::GenerationResult& r1, Model::GenerationResult& r2) {
281255
std::vector<float> jsdResults;
282256
std::vector<float> similarityResults;
283-
float totalWeightedDist = 0.0f;
284-
float totalWeight = 0.0f;
285257

286258
for (size_t i = 0; i < r1.steps.size(); i++) {
287259
auto& step1 = r1.steps[i];
288260
auto& step2 = r2.steps[i];
289261

290-
// Calculate distance
291-
float dist = ac::llama::LogitComparer::cosineDistance(step1.data, step2.data);
292-
293-
// Calculate weight based on normalized entropy
294-
float weight = normalizedEntropy(step1.data);
295-
totalWeightedDist += weight * dist;
296-
totalWeight += weight;
297-
298-
// Calculate JSD
299-
float jsd = ac::llama::LogitComparer::JSD(step1.data, step2.data);
300-
jsdResults.push_back(jsd);
301-
302262
// Calculate similarity
303263
float similarity = ac::llama::LogitComparer::logitSimilarity(step1.data, step2.data);
304264
similarityResults.push_back(similarity);
305265

306266
std::cout << "Token: " << step1.tokenStr
307-
<< ", Weight: " << weight
308-
<< ", JSD: " << jsd
309267
<< ", Similarity: " << similarity
310-
<< ", Distance: " << dist
311268
<< "\n";
312269
}
313270

314-
315-
{
316-
// Final step: Normalize
317-
318-
// Score range | Interpretation
319-
// 0.0 | Perfect match (identical predictions)
320-
// 0.0001 - 0.001 | Practically indistinguishable
321-
// 0.001 - 0.01 | Very close, slight variation
322-
// 0.01 - 0.1 | Moderate variation, likely different versions/settings
323-
// 0.1 - 1.0 | Large differences, likely different models
324-
float finalScore = (totalWeight > 0.0f) ? (totalWeightedDist / totalWeight) : 0.0f;
325-
std::cout << "Final weighted distance score: " << finalScore << "\n";
326-
}
327-
328-
{
329-
// Final score interpretation
330-
// average JSD score
331-
// 0.0 | Perfect match (identical predictions)
332-
// 0.0001 - 0.001 | Practically indistinguishable
333-
// 0.001 - 0.01 | Moderate variation, likely different versions/settings
334-
// 0.01 - 0.1 | Large differences, likely different models
335-
float jsdSum = 0.0f;
336-
for (const auto& jsd : jsdResults) {
337-
jsdSum += jsd;
338-
}
339-
float jsdAvg = jsdSum / jsdResults.size();
340-
std::cout << "Average JSD score: " << jsdAvg << "\n";
341-
}
342-
343271
{
344272
float similaritySum = 0.0f;
345273
for (const auto& similarity : similarityResults) {
@@ -362,8 +290,8 @@ int main() try {
362290

363291
// load model
364292
std::string tmpFolder = AC_TEST_DATA_LLAMA_DIR "/../../../tmp/";
365-
std::string modelGguf = "Meta-Llama-3.1-70B-Instruct-Q5_K_S.gguf";
366-
// std::string modelGguf = "Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf";
293+
// std::string modelGguf = "Meta-Llama-3.1-70B-Instruct-Q5_K_S.gguf";
294+
std::string modelGguf = "Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf";
367295
// std::string modelGguf = "BgGPT-Gemma-2-2B-IT-v1.0.Q8_0.gguf";
368296
// std::string modelGguf = "Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf";
369297
// std::string modelGguf2 = "Meta-Llama-3.1-70B-Instruct-Q5_K_S.gguf";

0 commit comments

Comments
 (0)