Skip to content

Commit d81a5e6

Browse files
committed
feat(usage-analytics): implement token cost breakdown and anomaly detection
1 parent 9a892f0 commit d81a5e6

15 files changed

+1530
-133
lines changed

src/web-server/usage-routes.ts

Lines changed: 289 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,15 @@ import {
2121
loadSessionData,
2222
loadAllUsageData,
2323
} from './data-aggregator';
24-
import type { DailyUsage, MonthlyUsage, SessionUsage } from './usage-types';
24+
import type {
25+
DailyUsage,
26+
MonthlyUsage,
27+
SessionUsage,
28+
Anomaly,
29+
AnomalySummary,
30+
TokenBreakdown,
31+
} from './usage-types';
32+
import { getModelPricing } from './model-pricing';
2533
import {
2634
readDiskCache,
2735
writeDiskCache,
@@ -606,6 +614,45 @@ function errorResponse(res: Response, error: unknown, defaultMessage: string): v
606614
});
607615
}
608616

617+
/**
618+
* Calculate cost breakdown for token categories
619+
* Uses weighted average pricing across models in the dataset
620+
*/
621+
function calculateTokenBreakdownCosts(dailyData: DailyUsage[]): TokenBreakdown {
622+
let inputTokens = 0;
623+
let outputTokens = 0;
624+
let cacheCreationTokens = 0;
625+
let cacheReadTokens = 0;
626+
let inputCost = 0;
627+
let outputCost = 0;
628+
let cacheCreationCost = 0;
629+
let cacheReadCost = 0;
630+
631+
for (const day of dailyData) {
632+
for (const breakdown of day.modelBreakdowns) {
633+
const pricing = getModelPricing(breakdown.modelName);
634+
635+
inputTokens += breakdown.inputTokens;
636+
outputTokens += breakdown.outputTokens;
637+
cacheCreationTokens += breakdown.cacheCreationTokens;
638+
cacheReadTokens += breakdown.cacheReadTokens;
639+
640+
inputCost += (breakdown.inputTokens / 1_000_000) * pricing.inputPerMillion;
641+
outputCost += (breakdown.outputTokens / 1_000_000) * pricing.outputPerMillion;
642+
cacheCreationCost +=
643+
(breakdown.cacheCreationTokens / 1_000_000) * pricing.cacheCreationPerMillion;
644+
cacheReadCost += (breakdown.cacheReadTokens / 1_000_000) * pricing.cacheReadPerMillion;
645+
}
646+
}
647+
648+
return {
649+
input: { tokens: inputTokens, cost: Math.round(inputCost * 100) / 100 },
650+
output: { tokens: outputTokens, cost: Math.round(outputCost * 100) / 100 },
651+
cacheCreation: { tokens: cacheCreationTokens, cost: Math.round(cacheCreationCost * 100) / 100 },
652+
cacheRead: { tokens: cacheReadTokens, cost: Math.round(cacheReadCost * 100) / 100 },
653+
};
654+
}
655+
609656
/**
610657
* GET /api/usage/summary
611658
*
@@ -625,17 +672,23 @@ usageRoutes.get(
625672
// Calculate totals
626673
let totalInputTokens = 0;
627674
let totalOutputTokens = 0;
628-
let totalCacheTokens = 0;
675+
let totalCacheCreationTokens = 0;
676+
let totalCacheReadTokens = 0;
629677
let totalCost = 0;
630678

631679
for (const day of filtered) {
632680
totalInputTokens += day.inputTokens;
633681
totalOutputTokens += day.outputTokens;
634-
totalCacheTokens += day.cacheCreationTokens + day.cacheReadTokens;
682+
totalCacheCreationTokens += day.cacheCreationTokens;
683+
totalCacheReadTokens += day.cacheReadTokens;
635684
totalCost += day.totalCost;
636685
}
637686

638687
const totalTokens = totalInputTokens + totalOutputTokens;
688+
const totalCacheTokens = totalCacheCreationTokens + totalCacheReadTokens;
689+
690+
// Calculate detailed token breakdown with costs
691+
const tokenBreakdown = calculateTokenBreakdownCosts(filtered);
639692

640693
res.json({
641694
success: true,
@@ -644,7 +697,10 @@ usageRoutes.get(
644697
totalInputTokens,
645698
totalOutputTokens,
646699
totalCacheTokens,
700+
totalCacheCreationTokens,
701+
totalCacheReadTokens,
647702
totalCost: Math.round(totalCost * 100) / 100,
703+
tokenBreakdown,
648704
totalDays: filtered.length,
649705
averageTokensPerDay: filtered.length > 0 ? Math.round(totalTokens / filtered.length) : 0,
650706
averageCostPerDay:
@@ -710,14 +766,15 @@ usageRoutes.get(
710766
const dailyData = await getCachedDailyData();
711767
const filtered = filterByDateRange(dailyData, since, until);
712768

713-
// Aggregate model usage across all days
769+
// Aggregate model usage across all days with detailed breakdown
714770
const modelMap = new Map<
715771
string,
716772
{
717773
model: string;
718774
inputTokens: number;
719775
outputTokens: number;
720-
cacheTokens: number;
776+
cacheCreationTokens: number;
777+
cacheReadTokens: number;
721778
cost: number;
722779
}
723780
>();
@@ -728,13 +785,15 @@ usageRoutes.get(
728785
model: breakdown.modelName,
729786
inputTokens: 0,
730787
outputTokens: 0,
731-
cacheTokens: 0,
788+
cacheCreationTokens: 0,
789+
cacheReadTokens: 0,
732790
cost: 0,
733791
};
734792

735793
existing.inputTokens += breakdown.inputTokens;
736794
existing.outputTokens += breakdown.outputTokens;
737-
existing.cacheTokens += breakdown.cacheCreationTokens + breakdown.cacheReadTokens;
795+
existing.cacheCreationTokens += breakdown.cacheCreationTokens;
796+
existing.cacheReadTokens += breakdown.cacheReadTokens;
738797
existing.cost += breakdown.cost;
739798

740799
modelMap.set(breakdown.modelName, existing);
@@ -745,17 +804,46 @@ usageRoutes.get(
745804
const models = Array.from(modelMap.values());
746805
const totalTokens = models.reduce((sum, m) => sum + m.inputTokens + m.outputTokens, 0);
747806

748-
// Add percentage and sort by tokens
807+
// Add percentage, cost breakdown, and I/O ratio
749808
const result = models
750-
.map((m) => ({
751-
...m,
752-
tokens: m.inputTokens + m.outputTokens,
753-
cost: Math.round(m.cost * 100) / 100,
754-
percentage:
755-
totalTokens > 0
756-
? Math.round(((m.inputTokens + m.outputTokens) / totalTokens) * 1000) / 10
757-
: 0,
758-
}))
809+
.map((m) => {
810+
const pricing = getModelPricing(m.model);
811+
812+
// Calculate cost breakdown
813+
const inputCost = (m.inputTokens / 1_000_000) * pricing.inputPerMillion;
814+
const outputCost = (m.outputTokens / 1_000_000) * pricing.outputPerMillion;
815+
const cacheCreationCost =
816+
(m.cacheCreationTokens / 1_000_000) * pricing.cacheCreationPerMillion;
817+
const cacheReadCost = (m.cacheReadTokens / 1_000_000) * pricing.cacheReadPerMillion;
818+
819+
// Calculate I/O ratio
820+
const ioRatio = m.outputTokens > 0 ? m.inputTokens / m.outputTokens : 0;
821+
822+
return {
823+
model: m.model,
824+
tokens: m.inputTokens + m.outputTokens,
825+
inputTokens: m.inputTokens,
826+
outputTokens: m.outputTokens,
827+
cacheCreationTokens: m.cacheCreationTokens,
828+
cacheReadTokens: m.cacheReadTokens,
829+
cacheTokens: m.cacheCreationTokens + m.cacheReadTokens,
830+
cost: Math.round(m.cost * 100) / 100,
831+
percentage:
832+
totalTokens > 0
833+
? Math.round(((m.inputTokens + m.outputTokens) / totalTokens) * 1000) / 10
834+
: 0,
835+
costBreakdown: {
836+
input: { tokens: m.inputTokens, cost: Math.round(inputCost * 100) / 100 },
837+
output: { tokens: m.outputTokens, cost: Math.round(outputCost * 100) / 100 },
838+
cacheCreation: {
839+
tokens: m.cacheCreationTokens,
840+
cost: Math.round(cacheCreationCost * 100) / 100,
841+
},
842+
cacheRead: { tokens: m.cacheReadTokens, cost: Math.round(cacheReadCost * 100) / 100 },
843+
},
844+
ioRatio: Math.round(ioRatio * 10) / 10,
845+
};
846+
})
759847
.sort((a, b) => b.tokens - a.tokens);
760848

761849
res.json({
@@ -900,3 +988,187 @@ usageRoutes.get('/status', (_req: Request, res: Response) => {
900988
},
901989
});
902990
});
991+
992+
// ============================================================================
993+
// ANOMALY DETECTION
994+
// ============================================================================
995+
996+
/** Anomaly detection thresholds */
997+
const ANOMALY_THRESHOLDS = {
998+
HIGH_INPUT_TOKENS: 10_000_000, // 10M tokens/day/model
999+
HIGH_IO_RATIO: 100, // 100x input/output ratio
1000+
COST_SPIKE_MULTIPLIER: 2, // 2x average daily cost
1001+
HIGH_CACHE_READ_TOKENS: 1_000_000_000, // 1B cache read tokens
1002+
};
1003+
1004+
/**
1005+
* Detect anomalies in usage data
1006+
*/
1007+
function detectAnomalies(dailyData: DailyUsage[]): Anomaly[] {
1008+
const anomalies: Anomaly[] = [];
1009+
1010+
// Calculate average daily cost for spike detection
1011+
const totalCost = dailyData.reduce((sum, day) => sum + day.totalCost, 0);
1012+
const avgDailyCost = dailyData.length > 0 ? totalCost / dailyData.length : 0;
1013+
const costSpikeThreshold = avgDailyCost * ANOMALY_THRESHOLDS.COST_SPIKE_MULTIPLIER;
1014+
1015+
for (const day of dailyData) {
1016+
// Check for cost spikes
1017+
if (avgDailyCost > 0 && day.totalCost > costSpikeThreshold) {
1018+
const multiplier = Math.round((day.totalCost / avgDailyCost) * 10) / 10;
1019+
anomalies.push({
1020+
date: day.date,
1021+
type: 'cost_spike',
1022+
value: day.totalCost,
1023+
threshold: avgDailyCost,
1024+
message: `Cost ${multiplier}x above daily average ($${Math.round(day.totalCost)} vs $${Math.round(avgDailyCost)})`,
1025+
});
1026+
}
1027+
1028+
// Check per-model anomalies
1029+
for (const breakdown of day.modelBreakdowns) {
1030+
// High input tokens per model
1031+
if (breakdown.inputTokens > ANOMALY_THRESHOLDS.HIGH_INPUT_TOKENS) {
1032+
const multiplier =
1033+
Math.round((breakdown.inputTokens / ANOMALY_THRESHOLDS.HIGH_INPUT_TOKENS) * 10) / 10;
1034+
anomalies.push({
1035+
date: day.date,
1036+
type: 'high_input',
1037+
model: breakdown.modelName,
1038+
value: breakdown.inputTokens,
1039+
threshold: ANOMALY_THRESHOLDS.HIGH_INPUT_TOKENS,
1040+
message: `Input tokens ${multiplier}x above threshold (${formatTokenCount(breakdown.inputTokens)})`,
1041+
});
1042+
}
1043+
1044+
// High I/O ratio
1045+
if (breakdown.outputTokens > 0) {
1046+
const ioRatio = breakdown.inputTokens / breakdown.outputTokens;
1047+
if (ioRatio > ANOMALY_THRESHOLDS.HIGH_IO_RATIO) {
1048+
const multiplier = Math.round((ioRatio / ANOMALY_THRESHOLDS.HIGH_IO_RATIO) * 10) / 10;
1049+
anomalies.push({
1050+
date: day.date,
1051+
type: 'high_io_ratio',
1052+
model: breakdown.modelName,
1053+
value: ioRatio,
1054+
threshold: ANOMALY_THRESHOLDS.HIGH_IO_RATIO,
1055+
message: `I/O ratio ${multiplier}x above threshold (${Math.round(ioRatio)}:1)`,
1056+
});
1057+
}
1058+
}
1059+
1060+
// High cache read tokens
1061+
if (breakdown.cacheReadTokens > ANOMALY_THRESHOLDS.HIGH_CACHE_READ_TOKENS) {
1062+
const multiplier =
1063+
Math.round((breakdown.cacheReadTokens / ANOMALY_THRESHOLDS.HIGH_CACHE_READ_TOKENS) * 10) /
1064+
10;
1065+
anomalies.push({
1066+
date: day.date,
1067+
type: 'high_cache_read',
1068+
model: breakdown.modelName,
1069+
value: breakdown.cacheReadTokens,
1070+
threshold: ANOMALY_THRESHOLDS.HIGH_CACHE_READ_TOKENS,
1071+
message: `Cache reads ${multiplier}x above threshold (${formatTokenCount(breakdown.cacheReadTokens)})`,
1072+
});
1073+
}
1074+
}
1075+
}
1076+
1077+
// Sort by date descending
1078+
return anomalies.sort((a, b) => b.date.localeCompare(a.date));
1079+
}
1080+
1081+
/**
1082+
* Format token count for human readability
1083+
*/
1084+
function formatTokenCount(tokens: number): string {
1085+
if (tokens >= 1_000_000_000) {
1086+
return `${(tokens / 1_000_000_000).toFixed(1)}B`;
1087+
} else if (tokens >= 1_000_000) {
1088+
return `${(tokens / 1_000_000).toFixed(1)}M`;
1089+
} else if (tokens >= 1_000) {
1090+
return `${(tokens / 1_000).toFixed(1)}K`;
1091+
}
1092+
return tokens.toString();
1093+
}
1094+
1095+
/**
1096+
* Summarize anomalies by type
1097+
*/
1098+
function summarizeAnomalies(anomalies: Anomaly[]): AnomalySummary {
1099+
const uniqueDates = new Set<string>();
1100+
let highInputDays = 0;
1101+
let highIoRatioDays = 0;
1102+
let costSpikeDays = 0;
1103+
let highCacheReadDays = 0;
1104+
1105+
// Track unique dates per anomaly type
1106+
const highInputDates = new Set<string>();
1107+
const highIoRatioDates = new Set<string>();
1108+
const costSpikeDates = new Set<string>();
1109+
const highCacheReadDates = new Set<string>();
1110+
1111+
for (const anomaly of anomalies) {
1112+
uniqueDates.add(anomaly.date);
1113+
1114+
switch (anomaly.type) {
1115+
case 'high_input':
1116+
highInputDates.add(anomaly.date);
1117+
break;
1118+
case 'high_io_ratio':
1119+
highIoRatioDates.add(anomaly.date);
1120+
break;
1121+
case 'cost_spike':
1122+
costSpikeDates.add(anomaly.date);
1123+
break;
1124+
case 'high_cache_read':
1125+
highCacheReadDates.add(anomaly.date);
1126+
break;
1127+
}
1128+
}
1129+
1130+
highInputDays = highInputDates.size;
1131+
highIoRatioDays = highIoRatioDates.size;
1132+
costSpikeDays = costSpikeDates.size;
1133+
highCacheReadDays = highCacheReadDates.size;
1134+
1135+
return {
1136+
totalAnomalies: anomalies.length,
1137+
highInputDays,
1138+
highIoRatioDays,
1139+
costSpikeDays,
1140+
highCacheReadDays,
1141+
};
1142+
}
1143+
1144+
/**
1145+
* GET /api/usage/insights
1146+
*
1147+
* Returns anomaly detection results for usage patterns.
1148+
* Query: ?since=YYYYMMDD&until=YYYYMMDD
1149+
*/
1150+
usageRoutes.get(
1151+
'/insights',
1152+
async (req: Request<object, object, object, UsageQuery>, res: Response) => {
1153+
try {
1154+
const since = validateDate(req.query.since);
1155+
const until = validateDate(req.query.until);
1156+
1157+
const dailyData = await getCachedDailyData();
1158+
const filtered = filterByDateRange(dailyData, since, until);
1159+
1160+
const anomalies = detectAnomalies(filtered);
1161+
const summary = summarizeAnomalies(anomalies);
1162+
1163+
res.json({
1164+
success: true,
1165+
data: {
1166+
anomalies,
1167+
summary,
1168+
},
1169+
});
1170+
} catch (error) {
1171+
errorResponse(res, error, 'Failed to fetch usage insights');
1172+
}
1173+
}
1174+
);

0 commit comments

Comments
 (0)