Skip to content

Commit aec1686

Browse files
ppisljardevamanv
authored andcommitted
[agent builder] telemetry: tokens, ttl and tool cals by model (elastic#246688)
1 parent 9bba44a commit aec1686

File tree

4 files changed

+508
-0
lines changed

4 files changed

+508
-0
lines changed

x-pack/platform/plugins/private/telemetry_collection_xpack/schema/xpack_platform.json

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,93 @@
628628
}
629629
}
630630
},
631+
"tokens_by_model": {
632+
"type": "array",
633+
"items": {
634+
"properties": {
635+
"model": {
636+
"type": "keyword",
637+
"_meta": {
638+
"description": "Model identifier for token usage grouping"
639+
}
640+
},
641+
"total_tokens": {
642+
"type": "long",
643+
"_meta": {
644+
"description": "Total tokens (input + output) consumed by this model"
645+
}
646+
},
647+
"avg_tokens_per_round": {
648+
"type": "float",
649+
"_meta": {
650+
"description": "Average tokens per conversation round for this model"
651+
}
652+
},
653+
"sample_count": {
654+
"type": "long",
655+
"_meta": {
656+
"description": "Number of rounds sampled for this model"
657+
}
658+
}
659+
}
660+
}
661+
},
662+
"query_to_result_time_by_model": {
663+
"type": "array",
664+
"items": {
665+
"properties": {
666+
"model": {
667+
"type": "keyword",
668+
"_meta": {
669+
"description": "Model identifier for QTRT grouping"
670+
}
671+
},
672+
"p50": {
673+
"type": "long"
674+
},
675+
"p75": {
676+
"type": "long"
677+
},
678+
"p90": {
679+
"type": "long"
680+
},
681+
"p95": {
682+
"type": "long"
683+
},
684+
"p99": {
685+
"type": "long"
686+
},
687+
"mean": {
688+
"type": "long"
689+
},
690+
"total_samples": {
691+
"type": "long"
692+
},
693+
"sample_count": {
694+
"type": "long"
695+
}
696+
}
697+
}
698+
},
699+
"tool_calls_by_model": {
700+
"type": "array",
701+
"items": {
702+
"properties": {
703+
"model": {
704+
"type": "keyword",
705+
"_meta": {
706+
"description": "Model identifier for tool-call grouping"
707+
}
708+
},
709+
"count": {
710+
"type": "long",
711+
"_meta": {
712+
"description": "Tool calls counted for this model"
713+
}
714+
}
715+
}
716+
}
717+
},
631718
"tool_calls": {
632719
"properties": {
633720
"total": {

x-pack/platform/plugins/shared/agent_builder/server/telemetry/query_utils.ts

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,265 @@ export class QueryUtils {
647647
}
648648
}
649649

650+
/**
651+
* Get token consumption grouped by model
652+
*/
653+
async getTokensByModel(): Promise<
654+
Array<{
655+
model: string;
656+
total_tokens: number;
657+
avg_tokens_per_round: number;
658+
sample_count: number;
659+
}>
660+
> {
661+
try {
662+
const conversationIndexName = chatSystemIndex('conversations');
663+
const response = await this.esClient.search({
664+
index: conversationIndexName,
665+
size: 0,
666+
aggs: {
667+
all_rounds: {
668+
nested: {
669+
path: 'conversation_rounds',
670+
},
671+
aggs: {
672+
by_model: {
673+
terms: {
674+
field: 'conversation_rounds.model_usage.model',
675+
size: 50,
676+
missing: 'unknown',
677+
},
678+
aggs: {
679+
input_tokens: {
680+
sum: {
681+
field: 'conversation_rounds.model_usage.input_tokens',
682+
},
683+
},
684+
output_tokens: {
685+
sum: {
686+
field: 'conversation_rounds.model_usage.output_tokens',
687+
},
688+
},
689+
},
690+
},
691+
},
692+
},
693+
},
694+
});
695+
696+
const buckets = (response.aggregations?.all_rounds as any)?.by_model?.buckets || [];
697+
698+
const results: Array<{
699+
model: string;
700+
total_tokens: number;
701+
avg_tokens_per_round: number;
702+
sample_count: number;
703+
}> = [];
704+
705+
for (const bucket of buckets) {
706+
const inputTokens = bucket.input_tokens?.value || 0;
707+
const outputTokens = bucket.output_tokens?.value || 0;
708+
const totalTokens = inputTokens + outputTokens;
709+
const sampleCount = bucket.doc_count || 0;
710+
const avgTokensPerRound =
711+
sampleCount > 0 ? Math.round((totalTokens / sampleCount) * 100) / 100 : 0;
712+
713+
results.push({
714+
model: bucket.key as string,
715+
total_tokens: Math.round(totalTokens),
716+
avg_tokens_per_round: avgTokensPerRound,
717+
sample_count: sampleCount,
718+
});
719+
}
720+
721+
results.sort((a, b) => b.total_tokens - a.total_tokens);
722+
723+
return results;
724+
} catch (error) {
725+
if (!isIndexNotFoundError(error)) {
726+
this.logger.warn(`Failed to fetch tokens by model: ${error.message}`);
727+
}
728+
return [];
729+
}
730+
}
731+
732+
/**
733+
* Get query-to-result time (TTLT) grouped by model
734+
*/
735+
async getQueryToResultTimeByModel(): Promise<
736+
Array<{
737+
model: string;
738+
p50: number;
739+
p75: number;
740+
p90: number;
741+
p95: number;
742+
p99: number;
743+
mean: number;
744+
total_samples: number;
745+
sample_count: number;
746+
}>
747+
> {
748+
try {
749+
const conversationIndexName = chatSystemIndex('conversations');
750+
const response = await this.esClient.search({
751+
index: conversationIndexName,
752+
size: 0,
753+
aggs: {
754+
all_rounds: {
755+
nested: {
756+
path: 'conversation_rounds',
757+
},
758+
aggs: {
759+
by_model: {
760+
terms: {
761+
field: 'conversation_rounds.model_usage.model',
762+
size: 50,
763+
missing: 'unknown',
764+
},
765+
aggs: {
766+
ttl_percentiles: {
767+
percentiles: {
768+
field: 'conversation_rounds.time_to_last_token',
769+
percents: [50, 75, 90, 95, 99],
770+
},
771+
},
772+
ttl_avg: {
773+
avg: {
774+
field: 'conversation_rounds.time_to_last_token',
775+
},
776+
},
777+
ttl_count: {
778+
value_count: {
779+
field: 'conversation_rounds.time_to_last_token',
780+
},
781+
},
782+
},
783+
},
784+
},
785+
},
786+
},
787+
});
788+
789+
const buckets = (response.aggregations?.all_rounds as any)?.by_model?.buckets || [];
790+
791+
const results: Array<{
792+
model: string;
793+
p50: number;
794+
p75: number;
795+
p90: number;
796+
p95: number;
797+
p99: number;
798+
mean: number;
799+
total_samples: number;
800+
sample_count: number;
801+
}> = [];
802+
803+
for (const bucket of buckets) {
804+
const percentiles = bucket.ttl_percentiles?.values || {};
805+
results.push({
806+
model: bucket.key as string,
807+
p50: Math.round(percentiles['50.0'] || 0),
808+
p75: Math.round(percentiles['75.0'] || 0),
809+
p90: Math.round(percentiles['90.0'] || 0),
810+
p95: Math.round(percentiles['95.0'] || 0),
811+
p99: Math.round(percentiles['99.0'] || 0),
812+
mean: Math.round(bucket.ttl_avg?.value || 0),
813+
total_samples: bucket.ttl_count?.value || 0,
814+
sample_count: bucket.doc_count || 0,
815+
});
816+
}
817+
818+
results.sort((a, b) => b.sample_count - a.sample_count);
819+
820+
return results;
821+
} catch (error) {
822+
if (!isIndexNotFoundError(error)) {
823+
this.logger.warn(`Failed to fetch query-to-result time by model: ${error.message}`);
824+
}
825+
return [];
826+
}
827+
}
828+
829+
/**
830+
* Get tool call counts grouped by model based on round steps
831+
*/
832+
async getToolCallsByModel(): Promise<
833+
Array<{
834+
model: string;
835+
count: number;
836+
}>
837+
> {
838+
try {
839+
const conversationIndexName = chatSystemIndex('conversations');
840+
const response = await this.esClient.search({
841+
index: conversationIndexName,
842+
size: 0,
843+
aggs: {
844+
tool_calls_by_model: {
845+
scripted_metric: {
846+
init_script: 'state.modelCalls = new HashMap();',
847+
map_script: `
848+
def rounds = params._source.conversation_rounds;
849+
if (rounds == null) return;
850+
for (def round : rounds) {
851+
def modelUsage = round.model_usage;
852+
def model = (modelUsage != null && modelUsage.model != null) ? modelUsage.model : 'unknown';
853+
def steps = round.steps;
854+
if (steps == null) continue;
855+
int callCount = 0;
856+
for (def step : steps) {
857+
if (step.type != null && step.type == 'tool_call') {
858+
callCount += 1;
859+
}
860+
}
861+
if (callCount == 0) continue;
862+
def current = state.modelCalls.get(model);
863+
if (current == null) {
864+
state.modelCalls.put(model, callCount);
865+
} else {
866+
state.modelCalls.put(model, current + callCount);
867+
}
868+
}
869+
`,
870+
combine_script: 'return state.modelCalls;',
871+
reduce_script: `
872+
Map combined = new HashMap();
873+
for (state in states) {
874+
for (entry in state.entrySet()) {
875+
def model = entry.getKey();
876+
def value = entry.getValue();
877+
if (combined.containsKey(model)) {
878+
combined.put(model, combined.get(model) + value);
879+
} else {
880+
combined.put(model, value);
881+
}
882+
}
883+
}
884+
return combined;
885+
`,
886+
},
887+
},
888+
},
889+
});
890+
891+
const aggregated = (response.aggregations as any)?.tool_calls_by_model?.value || {};
892+
const results: Array<{ model: string; count: number }> = [];
893+
894+
for (const [model, count] of Object.entries(aggregated)) {
895+
results.push({ model, count: Number(count) });
896+
}
897+
898+
results.sort((a, b) => b.count - a.count);
899+
900+
return results;
901+
} catch (error) {
902+
if (!isIndexNotFoundError(error)) {
903+
this.logger.warn(`Failed to fetch tool calls by model: ${error.message}`);
904+
}
905+
return [];
906+
}
907+
}
908+
650909
/**
651910
* Calculate percentiles from bucketed time data
652911
* @param buckets - Map of bucket name → count

0 commit comments

Comments
 (0)