@@ -21,7 +21,15 @@ import {
2121 loadSessionData ,
2222 loadAllUsageData ,
2323} from './data-aggregator' ;
24- import type { DailyUsage , MonthlyUsage , SessionUsage } from './usage-types' ;
24+ import type {
25+ DailyUsage ,
26+ MonthlyUsage ,
27+ SessionUsage ,
28+ Anomaly ,
29+ AnomalySummary ,
30+ TokenBreakdown ,
31+ } from './usage-types' ;
32+ import { getModelPricing } from './model-pricing' ;
2533import {
2634 readDiskCache ,
2735 writeDiskCache ,
@@ -606,6 +614,45 @@ function errorResponse(res: Response, error: unknown, defaultMessage: string): v
606614 } ) ;
607615}
608616
617+ /**
618+ * Calculate cost breakdown for token categories
619+ * Uses weighted average pricing across models in the dataset
620+ */
621+ function calculateTokenBreakdownCosts ( dailyData : DailyUsage [ ] ) : TokenBreakdown {
622+ let inputTokens = 0 ;
623+ let outputTokens = 0 ;
624+ let cacheCreationTokens = 0 ;
625+ let cacheReadTokens = 0 ;
626+ let inputCost = 0 ;
627+ let outputCost = 0 ;
628+ let cacheCreationCost = 0 ;
629+ let cacheReadCost = 0 ;
630+
631+ for ( const day of dailyData ) {
632+ for ( const breakdown of day . modelBreakdowns ) {
633+ const pricing = getModelPricing ( breakdown . modelName ) ;
634+
635+ inputTokens += breakdown . inputTokens ;
636+ outputTokens += breakdown . outputTokens ;
637+ cacheCreationTokens += breakdown . cacheCreationTokens ;
638+ cacheReadTokens += breakdown . cacheReadTokens ;
639+
640+ inputCost += ( breakdown . inputTokens / 1_000_000 ) * pricing . inputPerMillion ;
641+ outputCost += ( breakdown . outputTokens / 1_000_000 ) * pricing . outputPerMillion ;
642+ cacheCreationCost +=
643+ ( breakdown . cacheCreationTokens / 1_000_000 ) * pricing . cacheCreationPerMillion ;
644+ cacheReadCost += ( breakdown . cacheReadTokens / 1_000_000 ) * pricing . cacheReadPerMillion ;
645+ }
646+ }
647+
648+ return {
649+ input : { tokens : inputTokens , cost : Math . round ( inputCost * 100 ) / 100 } ,
650+ output : { tokens : outputTokens , cost : Math . round ( outputCost * 100 ) / 100 } ,
651+ cacheCreation : { tokens : cacheCreationTokens , cost : Math . round ( cacheCreationCost * 100 ) / 100 } ,
652+ cacheRead : { tokens : cacheReadTokens , cost : Math . round ( cacheReadCost * 100 ) / 100 } ,
653+ } ;
654+ }
655+
609656/**
610657 * GET /api/usage/summary
611658 *
@@ -625,17 +672,23 @@ usageRoutes.get(
625672 // Calculate totals
626673 let totalInputTokens = 0 ;
627674 let totalOutputTokens = 0 ;
628- let totalCacheTokens = 0 ;
675+ let totalCacheCreationTokens = 0 ;
676+ let totalCacheReadTokens = 0 ;
629677 let totalCost = 0 ;
630678
631679 for ( const day of filtered ) {
632680 totalInputTokens += day . inputTokens ;
633681 totalOutputTokens += day . outputTokens ;
634- totalCacheTokens += day . cacheCreationTokens + day . cacheReadTokens ;
682+ totalCacheCreationTokens += day . cacheCreationTokens ;
683+ totalCacheReadTokens += day . cacheReadTokens ;
635684 totalCost += day . totalCost ;
636685 }
637686
638687 const totalTokens = totalInputTokens + totalOutputTokens ;
688+ const totalCacheTokens = totalCacheCreationTokens + totalCacheReadTokens ;
689+
690+ // Calculate detailed token breakdown with costs
691+ const tokenBreakdown = calculateTokenBreakdownCosts ( filtered ) ;
639692
640693 res . json ( {
641694 success : true ,
@@ -644,7 +697,10 @@ usageRoutes.get(
644697 totalInputTokens,
645698 totalOutputTokens,
646699 totalCacheTokens,
700+ totalCacheCreationTokens,
701+ totalCacheReadTokens,
647702 totalCost : Math . round ( totalCost * 100 ) / 100 ,
703+ tokenBreakdown,
648704 totalDays : filtered . length ,
649705 averageTokensPerDay : filtered . length > 0 ? Math . round ( totalTokens / filtered . length ) : 0 ,
650706 averageCostPerDay :
@@ -710,14 +766,15 @@ usageRoutes.get(
710766 const dailyData = await getCachedDailyData ( ) ;
711767 const filtered = filterByDateRange ( dailyData , since , until ) ;
712768
713- // Aggregate model usage across all days
769+ // Aggregate model usage across all days with detailed breakdown
714770 const modelMap = new Map <
715771 string ,
716772 {
717773 model : string ;
718774 inputTokens : number ;
719775 outputTokens : number ;
720- cacheTokens : number ;
776+ cacheCreationTokens : number ;
777+ cacheReadTokens : number ;
721778 cost : number ;
722779 }
723780 > ( ) ;
@@ -728,13 +785,15 @@ usageRoutes.get(
728785 model : breakdown . modelName ,
729786 inputTokens : 0 ,
730787 outputTokens : 0 ,
731- cacheTokens : 0 ,
788+ cacheCreationTokens : 0 ,
789+ cacheReadTokens : 0 ,
732790 cost : 0 ,
733791 } ;
734792
735793 existing . inputTokens += breakdown . inputTokens ;
736794 existing . outputTokens += breakdown . outputTokens ;
737- existing . cacheTokens += breakdown . cacheCreationTokens + breakdown . cacheReadTokens ;
795+ existing . cacheCreationTokens += breakdown . cacheCreationTokens ;
796+ existing . cacheReadTokens += breakdown . cacheReadTokens ;
738797 existing . cost += breakdown . cost ;
739798
740799 modelMap . set ( breakdown . modelName , existing ) ;
@@ -745,17 +804,46 @@ usageRoutes.get(
745804 const models = Array . from ( modelMap . values ( ) ) ;
746805 const totalTokens = models . reduce ( ( sum , m ) => sum + m . inputTokens + m . outputTokens , 0 ) ;
747806
748- // Add percentage and sort by tokens
807+ // Add percentage, cost breakdown, and I/O ratio
749808 const result = models
750- . map ( ( m ) => ( {
751- ...m ,
752- tokens : m . inputTokens + m . outputTokens ,
753- cost : Math . round ( m . cost * 100 ) / 100 ,
754- percentage :
755- totalTokens > 0
756- ? Math . round ( ( ( m . inputTokens + m . outputTokens ) / totalTokens ) * 1000 ) / 10
757- : 0 ,
758- } ) )
809+ . map ( ( m ) => {
810+ const pricing = getModelPricing ( m . model ) ;
811+
812+ // Calculate cost breakdown
813+ const inputCost = ( m . inputTokens / 1_000_000 ) * pricing . inputPerMillion ;
814+ const outputCost = ( m . outputTokens / 1_000_000 ) * pricing . outputPerMillion ;
815+ const cacheCreationCost =
816+ ( m . cacheCreationTokens / 1_000_000 ) * pricing . cacheCreationPerMillion ;
817+ const cacheReadCost = ( m . cacheReadTokens / 1_000_000 ) * pricing . cacheReadPerMillion ;
818+
819+ // Calculate I/O ratio
820+ const ioRatio = m . outputTokens > 0 ? m . inputTokens / m . outputTokens : 0 ;
821+
822+ return {
823+ model : m . model ,
824+ tokens : m . inputTokens + m . outputTokens ,
825+ inputTokens : m . inputTokens ,
826+ outputTokens : m . outputTokens ,
827+ cacheCreationTokens : m . cacheCreationTokens ,
828+ cacheReadTokens : m . cacheReadTokens ,
829+ cacheTokens : m . cacheCreationTokens + m . cacheReadTokens ,
830+ cost : Math . round ( m . cost * 100 ) / 100 ,
831+ percentage :
832+ totalTokens > 0
833+ ? Math . round ( ( ( m . inputTokens + m . outputTokens ) / totalTokens ) * 1000 ) / 10
834+ : 0 ,
835+ costBreakdown : {
836+ input : { tokens : m . inputTokens , cost : Math . round ( inputCost * 100 ) / 100 } ,
837+ output : { tokens : m . outputTokens , cost : Math . round ( outputCost * 100 ) / 100 } ,
838+ cacheCreation : {
839+ tokens : m . cacheCreationTokens ,
840+ cost : Math . round ( cacheCreationCost * 100 ) / 100 ,
841+ } ,
842+ cacheRead : { tokens : m . cacheReadTokens , cost : Math . round ( cacheReadCost * 100 ) / 100 } ,
843+ } ,
844+ ioRatio : Math . round ( ioRatio * 10 ) / 10 ,
845+ } ;
846+ } )
759847 . sort ( ( a , b ) => b . tokens - a . tokens ) ;
760848
761849 res . json ( {
@@ -900,3 +988,187 @@ usageRoutes.get('/status', (_req: Request, res: Response) => {
900988 } ,
901989 } ) ;
902990} ) ;
991+
992+ // ============================================================================
993+ // ANOMALY DETECTION
994+ // ============================================================================
995+
996+ /** Anomaly detection thresholds */
997+ const ANOMALY_THRESHOLDS = {
998+ HIGH_INPUT_TOKENS : 10_000_000 , // 10M tokens/day/model
999+ HIGH_IO_RATIO : 100 , // 100x input/output ratio
1000+ COST_SPIKE_MULTIPLIER : 2 , // 2x average daily cost
1001+ HIGH_CACHE_READ_TOKENS : 1_000_000_000 , // 1B cache read tokens
1002+ } ;
1003+
1004+ /**
1005+ * Detect anomalies in usage data
1006+ */
1007+ function detectAnomalies ( dailyData : DailyUsage [ ] ) : Anomaly [ ] {
1008+ const anomalies : Anomaly [ ] = [ ] ;
1009+
1010+ // Calculate average daily cost for spike detection
1011+ const totalCost = dailyData . reduce ( ( sum , day ) => sum + day . totalCost , 0 ) ;
1012+ const avgDailyCost = dailyData . length > 0 ? totalCost / dailyData . length : 0 ;
1013+ const costSpikeThreshold = avgDailyCost * ANOMALY_THRESHOLDS . COST_SPIKE_MULTIPLIER ;
1014+
1015+ for ( const day of dailyData ) {
1016+ // Check for cost spikes
1017+ if ( avgDailyCost > 0 && day . totalCost > costSpikeThreshold ) {
1018+ const multiplier = Math . round ( ( day . totalCost / avgDailyCost ) * 10 ) / 10 ;
1019+ anomalies . push ( {
1020+ date : day . date ,
1021+ type : 'cost_spike' ,
1022+ value : day . totalCost ,
1023+ threshold : avgDailyCost ,
1024+ message : `Cost ${ multiplier } x above daily average ($${ Math . round ( day . totalCost ) } vs $${ Math . round ( avgDailyCost ) } )` ,
1025+ } ) ;
1026+ }
1027+
1028+ // Check per-model anomalies
1029+ for ( const breakdown of day . modelBreakdowns ) {
1030+ // High input tokens per model
1031+ if ( breakdown . inputTokens > ANOMALY_THRESHOLDS . HIGH_INPUT_TOKENS ) {
1032+ const multiplier =
1033+ Math . round ( ( breakdown . inputTokens / ANOMALY_THRESHOLDS . HIGH_INPUT_TOKENS ) * 10 ) / 10 ;
1034+ anomalies . push ( {
1035+ date : day . date ,
1036+ type : 'high_input' ,
1037+ model : breakdown . modelName ,
1038+ value : breakdown . inputTokens ,
1039+ threshold : ANOMALY_THRESHOLDS . HIGH_INPUT_TOKENS ,
1040+ message : `Input tokens ${ multiplier } x above threshold (${ formatTokenCount ( breakdown . inputTokens ) } )` ,
1041+ } ) ;
1042+ }
1043+
1044+ // High I/O ratio
1045+ if ( breakdown . outputTokens > 0 ) {
1046+ const ioRatio = breakdown . inputTokens / breakdown . outputTokens ;
1047+ if ( ioRatio > ANOMALY_THRESHOLDS . HIGH_IO_RATIO ) {
1048+ const multiplier = Math . round ( ( ioRatio / ANOMALY_THRESHOLDS . HIGH_IO_RATIO ) * 10 ) / 10 ;
1049+ anomalies . push ( {
1050+ date : day . date ,
1051+ type : 'high_io_ratio' ,
1052+ model : breakdown . modelName ,
1053+ value : ioRatio ,
1054+ threshold : ANOMALY_THRESHOLDS . HIGH_IO_RATIO ,
1055+ message : `I/O ratio ${ multiplier } x above threshold (${ Math . round ( ioRatio ) } :1)` ,
1056+ } ) ;
1057+ }
1058+ }
1059+
1060+ // High cache read tokens
1061+ if ( breakdown . cacheReadTokens > ANOMALY_THRESHOLDS . HIGH_CACHE_READ_TOKENS ) {
1062+ const multiplier =
1063+ Math . round ( ( breakdown . cacheReadTokens / ANOMALY_THRESHOLDS . HIGH_CACHE_READ_TOKENS ) * 10 ) /
1064+ 10 ;
1065+ anomalies . push ( {
1066+ date : day . date ,
1067+ type : 'high_cache_read' ,
1068+ model : breakdown . modelName ,
1069+ value : breakdown . cacheReadTokens ,
1070+ threshold : ANOMALY_THRESHOLDS . HIGH_CACHE_READ_TOKENS ,
1071+ message : `Cache reads ${ multiplier } x above threshold (${ formatTokenCount ( breakdown . cacheReadTokens ) } )` ,
1072+ } ) ;
1073+ }
1074+ }
1075+ }
1076+
1077+ // Sort by date descending
1078+ return anomalies . sort ( ( a , b ) => b . date . localeCompare ( a . date ) ) ;
1079+ }
1080+
1081+ /**
1082+ * Format token count for human readability
1083+ */
1084+ function formatTokenCount ( tokens : number ) : string {
1085+ if ( tokens >= 1_000_000_000 ) {
1086+ return `${ ( tokens / 1_000_000_000 ) . toFixed ( 1 ) } B` ;
1087+ } else if ( tokens >= 1_000_000 ) {
1088+ return `${ ( tokens / 1_000_000 ) . toFixed ( 1 ) } M` ;
1089+ } else if ( tokens >= 1_000 ) {
1090+ return `${ ( tokens / 1_000 ) . toFixed ( 1 ) } K` ;
1091+ }
1092+ return tokens . toString ( ) ;
1093+ }
1094+
1095+ /**
1096+ * Summarize anomalies by type
1097+ */
1098+ function summarizeAnomalies ( anomalies : Anomaly [ ] ) : AnomalySummary {
1099+ const uniqueDates = new Set < string > ( ) ;
1100+ let highInputDays = 0 ;
1101+ let highIoRatioDays = 0 ;
1102+ let costSpikeDays = 0 ;
1103+ let highCacheReadDays = 0 ;
1104+
1105+ // Track unique dates per anomaly type
1106+ const highInputDates = new Set < string > ( ) ;
1107+ const highIoRatioDates = new Set < string > ( ) ;
1108+ const costSpikeDates = new Set < string > ( ) ;
1109+ const highCacheReadDates = new Set < string > ( ) ;
1110+
1111+ for ( const anomaly of anomalies ) {
1112+ uniqueDates . add ( anomaly . date ) ;
1113+
1114+ switch ( anomaly . type ) {
1115+ case 'high_input' :
1116+ highInputDates . add ( anomaly . date ) ;
1117+ break ;
1118+ case 'high_io_ratio' :
1119+ highIoRatioDates . add ( anomaly . date ) ;
1120+ break ;
1121+ case 'cost_spike' :
1122+ costSpikeDates . add ( anomaly . date ) ;
1123+ break ;
1124+ case 'high_cache_read' :
1125+ highCacheReadDates . add ( anomaly . date ) ;
1126+ break ;
1127+ }
1128+ }
1129+
1130+ highInputDays = highInputDates . size ;
1131+ highIoRatioDays = highIoRatioDates . size ;
1132+ costSpikeDays = costSpikeDates . size ;
1133+ highCacheReadDays = highCacheReadDates . size ;
1134+
1135+ return {
1136+ totalAnomalies : anomalies . length ,
1137+ highInputDays,
1138+ highIoRatioDays,
1139+ costSpikeDays,
1140+ highCacheReadDays,
1141+ } ;
1142+ }
1143+
1144+ /**
1145+ * GET /api/usage/insights
1146+ *
1147+ * Returns anomaly detection results for usage patterns.
1148+ * Query: ?since=YYYYMMDD&until=YYYYMMDD
1149+ */
1150+ usageRoutes . get (
1151+ '/insights' ,
1152+ async ( req : Request < object , object , object , UsageQuery > , res : Response ) => {
1153+ try {
1154+ const since = validateDate ( req . query . since ) ;
1155+ const until = validateDate ( req . query . until ) ;
1156+
1157+ const dailyData = await getCachedDailyData ( ) ;
1158+ const filtered = filterByDateRange ( dailyData , since , until ) ;
1159+
1160+ const anomalies = detectAnomalies ( filtered ) ;
1161+ const summary = summarizeAnomalies ( anomalies ) ;
1162+
1163+ res . json ( {
1164+ success : true ,
1165+ data : {
1166+ anomalies,
1167+ summary,
1168+ } ,
1169+ } ) ;
1170+ } catch ( error ) {
1171+ errorResponse ( res , error , 'Failed to fetch usage insights' ) ;
1172+ }
1173+ }
1174+ ) ;
0 commit comments