@@ -647,6 +647,265 @@ export class QueryUtils {
647647 }
648648 }
649649
650+ /**
651+ * Get token consumption grouped by model
652+ */
653+ async getTokensByModel ( ) : Promise <
654+ Array < {
655+ model : string ;
656+ total_tokens : number ;
657+ avg_tokens_per_round : number ;
658+ sample_count : number ;
659+ } >
660+ > {
661+ try {
662+ const conversationIndexName = chatSystemIndex ( 'conversations' ) ;
663+ const response = await this . esClient . search ( {
664+ index : conversationIndexName ,
665+ size : 0 ,
666+ aggs : {
667+ all_rounds : {
668+ nested : {
669+ path : 'conversation_rounds' ,
670+ } ,
671+ aggs : {
672+ by_model : {
673+ terms : {
674+ field : 'conversation_rounds.model_usage.model' ,
675+ size : 50 ,
676+ missing : 'unknown' ,
677+ } ,
678+ aggs : {
679+ input_tokens : {
680+ sum : {
681+ field : 'conversation_rounds.model_usage.input_tokens' ,
682+ } ,
683+ } ,
684+ output_tokens : {
685+ sum : {
686+ field : 'conversation_rounds.model_usage.output_tokens' ,
687+ } ,
688+ } ,
689+ } ,
690+ } ,
691+ } ,
692+ } ,
693+ } ,
694+ } ) ;
695+
696+ const buckets = ( response . aggregations ?. all_rounds as any ) ?. by_model ?. buckets || [ ] ;
697+
698+ const results : Array < {
699+ model : string ;
700+ total_tokens : number ;
701+ avg_tokens_per_round : number ;
702+ sample_count : number ;
703+ } > = [ ] ;
704+
705+ for ( const bucket of buckets ) {
706+ const inputTokens = bucket . input_tokens ?. value || 0 ;
707+ const outputTokens = bucket . output_tokens ?. value || 0 ;
708+ const totalTokens = inputTokens + outputTokens ;
709+ const sampleCount = bucket . doc_count || 0 ;
710+ const avgTokensPerRound =
711+ sampleCount > 0 ? Math . round ( ( totalTokens / sampleCount ) * 100 ) / 100 : 0 ;
712+
713+ results . push ( {
714+ model : bucket . key as string ,
715+ total_tokens : Math . round ( totalTokens ) ,
716+ avg_tokens_per_round : avgTokensPerRound ,
717+ sample_count : sampleCount ,
718+ } ) ;
719+ }
720+
721+ results . sort ( ( a , b ) => b . total_tokens - a . total_tokens ) ;
722+
723+ return results ;
724+ } catch ( error ) {
725+ if ( ! isIndexNotFoundError ( error ) ) {
726+ this . logger . warn ( `Failed to fetch tokens by model: ${ error . message } ` ) ;
727+ }
728+ return [ ] ;
729+ }
730+ }
731+
732+ /**
733+ * Get query-to-result time (TTLT) grouped by model
734+ */
735+ async getQueryToResultTimeByModel ( ) : Promise <
736+ Array < {
737+ model : string ;
738+ p50 : number ;
739+ p75 : number ;
740+ p90 : number ;
741+ p95 : number ;
742+ p99 : number ;
743+ mean : number ;
744+ total_samples : number ;
745+ sample_count : number ;
746+ } >
747+ > {
748+ try {
749+ const conversationIndexName = chatSystemIndex ( 'conversations' ) ;
750+ const response = await this . esClient . search ( {
751+ index : conversationIndexName ,
752+ size : 0 ,
753+ aggs : {
754+ all_rounds : {
755+ nested : {
756+ path : 'conversation_rounds' ,
757+ } ,
758+ aggs : {
759+ by_model : {
760+ terms : {
761+ field : 'conversation_rounds.model_usage.model' ,
762+ size : 50 ,
763+ missing : 'unknown' ,
764+ } ,
765+ aggs : {
766+ ttl_percentiles : {
767+ percentiles : {
768+ field : 'conversation_rounds.time_to_last_token' ,
769+ percents : [ 50 , 75 , 90 , 95 , 99 ] ,
770+ } ,
771+ } ,
772+ ttl_avg : {
773+ avg : {
774+ field : 'conversation_rounds.time_to_last_token' ,
775+ } ,
776+ } ,
777+ ttl_count : {
778+ value_count : {
779+ field : 'conversation_rounds.time_to_last_token' ,
780+ } ,
781+ } ,
782+ } ,
783+ } ,
784+ } ,
785+ } ,
786+ } ,
787+ } ) ;
788+
789+ const buckets = ( response . aggregations ?. all_rounds as any ) ?. by_model ?. buckets || [ ] ;
790+
791+ const results : Array < {
792+ model : string ;
793+ p50 : number ;
794+ p75 : number ;
795+ p90 : number ;
796+ p95 : number ;
797+ p99 : number ;
798+ mean : number ;
799+ total_samples : number ;
800+ sample_count : number ;
801+ } > = [ ] ;
802+
803+ for ( const bucket of buckets ) {
804+ const percentiles = bucket . ttl_percentiles ?. values || { } ;
805+ results . push ( {
806+ model : bucket . key as string ,
807+ p50 : Math . round ( percentiles [ '50.0' ] || 0 ) ,
808+ p75 : Math . round ( percentiles [ '75.0' ] || 0 ) ,
809+ p90 : Math . round ( percentiles [ '90.0' ] || 0 ) ,
810+ p95 : Math . round ( percentiles [ '95.0' ] || 0 ) ,
811+ p99 : Math . round ( percentiles [ '99.0' ] || 0 ) ,
812+ mean : Math . round ( bucket . ttl_avg ?. value || 0 ) ,
813+ total_samples : bucket . ttl_count ?. value || 0 ,
814+ sample_count : bucket . doc_count || 0 ,
815+ } ) ;
816+ }
817+
818+ results . sort ( ( a , b ) => b . sample_count - a . sample_count ) ;
819+
820+ return results ;
821+ } catch ( error ) {
822+ if ( ! isIndexNotFoundError ( error ) ) {
823+ this . logger . warn ( `Failed to fetch query-to-result time by model: ${ error . message } ` ) ;
824+ }
825+ return [ ] ;
826+ }
827+ }
828+
829+ /**
830+ * Get tool call counts grouped by model based on round steps
831+ */
832+ async getToolCallsByModel ( ) : Promise <
833+ Array < {
834+ model : string ;
835+ count : number ;
836+ } >
837+ > {
838+ try {
839+ const conversationIndexName = chatSystemIndex ( 'conversations' ) ;
840+ const response = await this . esClient . search ( {
841+ index : conversationIndexName ,
842+ size : 0 ,
843+ aggs : {
844+ tool_calls_by_model : {
845+ scripted_metric : {
846+ init_script : 'state.modelCalls = new HashMap();' ,
847+ map_script : `
848+ def rounds = params._source.conversation_rounds;
849+ if (rounds == null) return;
850+ for (def round : rounds) {
851+ def modelUsage = round.model_usage;
852+ def model = (modelUsage != null && modelUsage.model != null) ? modelUsage.model : 'unknown';
853+ def steps = round.steps;
854+ if (steps == null) continue;
855+ int callCount = 0;
856+ for (def step : steps) {
857+ if (step.type != null && step.type == 'tool_call') {
858+ callCount += 1;
859+ }
860+ }
861+ if (callCount == 0) continue;
862+ def current = state.modelCalls.get(model);
863+ if (current == null) {
864+ state.modelCalls.put(model, callCount);
865+ } else {
866+ state.modelCalls.put(model, current + callCount);
867+ }
868+ }
869+ ` ,
870+ combine_script : 'return state.modelCalls;' ,
871+ reduce_script : `
872+ Map combined = new HashMap();
873+ for (state in states) {
874+ for (entry in state.entrySet()) {
875+ def model = entry.getKey();
876+ def value = entry.getValue();
877+ if (combined.containsKey(model)) {
878+ combined.put(model, combined.get(model) + value);
879+ } else {
880+ combined.put(model, value);
881+ }
882+ }
883+ }
884+ return combined;
885+ ` ,
886+ } ,
887+ } ,
888+ } ,
889+ } ) ;
890+
891+ const aggregated = ( response . aggregations as any ) ?. tool_calls_by_model ?. value || { } ;
892+ const results : Array < { model : string ; count : number } > = [ ] ;
893+
894+ for ( const [ model , count ] of Object . entries ( aggregated ) ) {
895+ results . push ( { model, count : Number ( count ) } ) ;
896+ }
897+
898+ results . sort ( ( a , b ) => b . count - a . count ) ;
899+
900+ return results ;
901+ } catch ( error ) {
902+ if ( ! isIndexNotFoundError ( error ) ) {
903+ this . logger . warn ( `Failed to fetch tool calls by model: ${ error . message } ` ) ;
904+ }
905+ return [ ] ;
906+ }
907+ }
908+
650909 /**
651910 * Calculate percentiles from bucketed time data
652911 * @param buckets - Map of bucket name → count
0 commit comments