@@ -317,6 +317,9 @@ private void searchEventGroupedByStackTrace(
317317 GetStackTracesResponseBuilder responseBuilder ,
318318 EventsIndex eventsIndex
319319 ) {
320+ // We have nested aggregations, which in theory might blow up to MAX_TRACE_EVENTS_RESULT_SIZE^2 items
321+ // reported. But we know that the total number of items is limited by our down-sampling to
322+ // a maximum of ~100k (MAX_TRACE_EVENTS_RESULT_SIZE is higher to be on the safe side).
320323 responseBuilder .setSamplingRate (eventsIndex .getSampleRate ());
321324 TermsAggregationBuilder groupByStackTraceId = new TermsAggregationBuilder ("group_by" )
322325 // 'size' should be max 100k, but might be slightly more. Better be on the safe side.
@@ -326,6 +329,14 @@ private void searchEventGroupedByStackTrace(
326329 // Especially with high cardinality fields, this makes aggregations really slow.
327330 .executionHint ("map" )
328331 .subAggregation (new SumAggregationBuilder ("count" ).field ("Stacktrace.count" ));
332+ TermsAggregationBuilder groupByHostId = new TermsAggregationBuilder ("group_by" )
333+ // 'size' specifies the max number of host ID we support per request.
334+ .size (MAX_TRACE_EVENTS_RESULT_SIZE )
335+ .field ("host.id" )
336+ // 'execution_hint: map' skips the slow building of ordinals that we don't need.
337+ // Especially with high cardinality fields, this makes aggregations really slow.
338+ .executionHint ("map" )
339+ .subAggregation (groupByStackTraceId );
329340 SubGroupCollector subGroups = SubGroupCollector .attach (
330341 groupByStackTraceId ,
331342 request .getAggregationFields (),
@@ -341,62 +352,74 @@ private void searchEventGroupedByStackTrace(
341352 .addAggregation (new MinAggregationBuilder ("min_time" ).field ("@timestamp" ))
342353 .addAggregation (new MaxAggregationBuilder ("max_time" ).field ("@timestamp" ))
343354 .addAggregation (
344- // We have nested aggregations, which in theory might blow up to MAX_TRACE_EVENTS_RESULT_SIZE^2 items
345- // reported. But we know that the total number of items is limited by our down-sampling to
346- // a maximum of ~100k (MAX_TRACE_EVENTS_RESULT_SIZE is higher to be on the safe side).
347355 new TermsAggregationBuilder ("group_by" )
348356 // 'size' specifies the max number of host ID we support per request.
349357 .size (MAX_TRACE_EVENTS_RESULT_SIZE )
350- .field ("host.id " )
358+ .field ("process.executable.name " )
351359 // 'execution_hint: map' skips the slow building of ordinals that we don't need.
352360 // Especially with high cardinality fields, this makes aggregations really slow.
353361 .executionHint ("map" )
354- .subAggregation (groupByStackTraceId )
362+ .subAggregation (groupByHostId )
355363 )
356364 .addAggregation (new SumAggregationBuilder ("total_count" ).field ("Stacktrace.count" ))
357365 .execute (handleEventsGroupedByStackTrace (submitTask , client , responseBuilder , submitListener , searchResponse -> {
358366 long totalCount = getAggValueAsLong (searchResponse , "total_count" );
359367
360368 Resampler resampler = new Resampler (request , responseBuilder .getSamplingRate (), totalCount );
361- Terms hosts = searchResponse .getAggregations ().get ("group_by" );
362369
363370 // Sort items lexicographically to access Lucene's term dictionary more efficiently when issuing an mget request.
364371 // The term dictionary is lexicographically sorted and using the same order reduces the number of page faults
365372 // needed to load it.
366373 long totalFinalCount = 0 ;
367374 List <HostEventCount > hostEventCounts = new ArrayList <>(MAX_TRACE_EVENTS_RESULT_SIZE );
375+ List <ExecutableEventCount > executableEventCounts = new ArrayList <>(MAX_TRACE_EVENTS_RESULT_SIZE );
368376 Map <String , TraceEvent > stackTraceEvents = new TreeMap <>();
369- for (Terms .Bucket hostBucket : hosts .getBuckets ()) {
370- String hostid = hostBucket .getKeyAsString ();
371-
372- Terms stacktraces = hostBucket .getAggregations ().get ("group_by" );
373- for (Terms .Bucket stacktraceBucket : stacktraces .getBuckets ()) {
374- Sum count = stacktraceBucket .getAggregations ().get ("count" );
375- int finalCount = resampler .adjustSampleCount ((int ) count .value ());
376- if (finalCount <= 0 ) {
377- continue ;
378- }
379- totalFinalCount += finalCount ;
380-
381- /*
382- The same stacktraces may come from different hosts (eventually from different datacenters).
383- We make a list of the triples here. As soon as we have the host metadata, we can calculate
384- the CO2 emission and the costs for each TraceEvent.
385- */
386- String stackTraceID = stacktraceBucket .getKeyAsString ();
387- hostEventCounts .add (new HostEventCount (hostid , stackTraceID , finalCount ));
388-
389- TraceEvent event = stackTraceEvents .get (stackTraceID );
390- if (event == null ) {
391- event = new TraceEvent (stackTraceID );
392- stackTraceEvents .put (stackTraceID , event );
377+
378+ Terms executableNames = searchResponse .getAggregations ().get ("group_by" );
379+ for (Terms .Bucket executableBucket : executableNames .getBuckets ()) {
380+ String executableName = executableBucket .getKeyAsString ();
381+
382+ Terms hosts = executableBucket .getAggregations ().get ("group_by" );
383+ for (Terms .Bucket hostBucket : hosts .getBuckets ()) {
384+ String hostid = hostBucket .getKeyAsString ();
385+
386+ Terms stacktraces = hostBucket .getAggregations ().get ("group_by" );
387+ for (Terms .Bucket stacktraceBucket : stacktraces .getBuckets ()) {
388+ Sum count = stacktraceBucket .getAggregations ().get ("count" );
389+ int finalCount = resampler .adjustSampleCount ((int ) count .value ());
390+ if (finalCount <= 0 ) {
391+ continue ;
392+ }
393+ totalFinalCount += finalCount ;
394+
395+ String stackTraceID = stacktraceBucket .getKeyAsString ();
396+
397+ /*
398+ The same stacktraces may come from different executables.
399+ We make a list of the triples here.
400+ */
401+ executableEventCounts .add (new ExecutableEventCount (executableName , stackTraceID , finalCount ));
402+
403+ /*
404+ The same stacktraces may come from different hosts (eventually from different datacenters).
405+ We make a list of the triples here. As soon as we have the host metadata, we can calculate
406+ the CO2 emission and the costs for each TraceEvent.
407+ */
408+ hostEventCounts .add (new HostEventCount (hostid , stackTraceID , finalCount ));
409+
410+ TraceEvent event = stackTraceEvents .get (stackTraceID );
411+ if (event == null ) {
412+ event = new TraceEvent (stackTraceID );
413+ stackTraceEvents .put (stackTraceID , event );
414+ }
415+ event .count += finalCount ;
416+ subGroups .collectResults (stacktraceBucket , event );
393417 }
394- event .count += finalCount ;
395- subGroups .collectResults (stacktraceBucket , event );
396418 }
397419 }
398420 responseBuilder .setTotalSamples (totalFinalCount );
399421 responseBuilder .setHostEventCounts (hostEventCounts );
422+ responseBuilder .setExecutableEventCounts (executableEventCounts );
400423 log .debug (
401424 "Found [{}] stacktrace events, resampled with sample rate [{}] to [{}] events ([{}] unique stack traces)." ,
402425 totalCount ,
@@ -834,4 +857,6 @@ private void mget(Client client, List<Index> indices, List<String> slice, Action
834857 }
835858
836859 record HostEventCount (String hostID , String stacktraceID , int count ) {}
860+
861+ record ExecutableEventCount (String executableName , String stacktraceID , int count ) {}
837862}
0 commit comments