|
| 1 | +/** |
| 2 | + * Retrieval metrics for evaluation |
| 3 | + * |
| 4 | + * Computes precision, recall, and nDCG for retrieval evaluation. |
| 5 | + */ |
| 6 | + |
| 7 | +/** |
| 8 | + * Compute precision, recall, and nDCG for a single query |
| 9 | + * |
| 10 | + * @param retrievedIds - Ordered list of retrieved chunk IDs |
| 11 | + * @param relevantSet - Set of relevant (ground truth) chunk IDs |
| 12 | + * @param k - Number of results to consider |
| 13 | + */ |
| 14 | +export function computeMetrics( |
| 15 | + retrievedIds: string[], |
| 16 | + relevantSet: Set<string>, |
| 17 | + k: number, |
| 18 | +): { precision: number; recall: number; ndcg: number } { |
| 19 | + const topK = retrievedIds.slice(0, k) |
| 20 | + |
| 21 | + // Precision@k: fraction of retrieved that are relevant |
| 22 | + const relevantInTopK = topK.filter((id) => relevantSet.has(id)).length |
| 23 | + const precision = relevantInTopK / k |
| 24 | + |
| 25 | + // Recall@k: fraction of relevant that are retrieved |
| 26 | + const recall = relevantSet.size > 0 ? relevantInTopK / relevantSet.size : 0 |
| 27 | + |
| 28 | + // nDCG@k: normalized discounted cumulative gain |
| 29 | + const dcg = topK.reduce((sum, id, i) => { |
| 30 | + const rel = relevantSet.has(id) ? 1 : 0 |
| 31 | + return sum + rel / Math.log2(i + 2) // i+2 because log2(1) = 0 |
| 32 | + }, 0) |
| 33 | + |
| 34 | + // Ideal DCG: all relevant docs at top |
| 35 | + const idealK = Math.min(k, relevantSet.size) |
| 36 | + const idcg = Array.from({ length: idealK }).reduce<number>( |
| 37 | + (sum, _, i) => sum + 1 / Math.log2(i + 2), |
| 38 | + 0, |
| 39 | + ) |
| 40 | + |
| 41 | + const ndcg = idcg > 0 ? dcg / idcg : 0 |
| 42 | + |
| 43 | + return { precision, recall, ndcg } |
| 44 | +} |
| 45 | + |
| 46 | +/** |
| 47 | + * Aggregate metrics across multiple queries |
| 48 | + * |
| 49 | + * @param metrics - Array of metric objects |
| 50 | + */ |
| 51 | +export function aggregateMetrics( |
| 52 | + metrics: Array<{ precision: number; recall: number; ndcg: number }>, |
| 53 | +): { precision: number; recall: number; ndcg: number } { |
| 54 | + if (metrics.length === 0) { |
| 55 | + return { precision: 0, recall: 0, ndcg: 0 } |
| 56 | + } |
| 57 | + |
| 58 | + const sum = metrics.reduce( |
| 59 | + (acc, m) => ({ |
| 60 | + precision: acc.precision + m.precision, |
| 61 | + recall: acc.recall + m.recall, |
| 62 | + ndcg: acc.ndcg + m.ndcg, |
| 63 | + }), |
| 64 | + { precision: 0, recall: 0, ndcg: 0 }, |
| 65 | + ) |
| 66 | + |
| 67 | + return { |
| 68 | + precision: sum.precision / metrics.length, |
| 69 | + recall: sum.recall / metrics.length, |
| 70 | + ndcg: sum.ndcg / metrics.length, |
| 71 | + } |
| 72 | +} |
0 commit comments