6363import org .elasticsearch .xpack .inference .mapper .SemanticTextFieldMapper ;
6464import org .elasticsearch .xpack .inference .mapper .SemanticTextUtils ;
6565import org .elasticsearch .xpack .inference .registry .ModelRegistry ;
66+ import org .elasticsearch .xpack .inference .telemetry .InferenceStats ;
6667
6768import java .io .IOException ;
6869import java .util .ArrayList ;
7879import static org .elasticsearch .xpack .inference .InferencePlugin .INFERENCE_API_FEATURE ;
7980import static org .elasticsearch .xpack .inference .mapper .SemanticTextField .toSemanticTextFieldChunks ;
8081import static org .elasticsearch .xpack .inference .mapper .SemanticTextField .toSemanticTextFieldChunksLegacy ;
82+ import static org .elasticsearch .xpack .inference .telemetry .InferenceStats .modelAttributes ;
83+ import static org .elasticsearch .xpack .inference .telemetry .InferenceStats .responseAttributes ;
8184
8285/**
8386 * A {@link MappedActionFilter} that intercepts {@link BulkShardRequest} to apply inference on fields specified
@@ -112,20 +115,23 @@ public class ShardBulkInferenceActionFilter implements MappedActionFilter {
112115 private final ModelRegistry modelRegistry ;
113116 private final XPackLicenseState licenseState ;
114117 private final IndexingPressure indexingPressure ;
118+ private final InferenceStats inferenceStats ;
115119 private volatile long batchSizeInBytes ;
116120
117121 public ShardBulkInferenceActionFilter (
118122 ClusterService clusterService ,
119123 InferenceServiceRegistry inferenceServiceRegistry ,
120124 ModelRegistry modelRegistry ,
121125 XPackLicenseState licenseState ,
122- IndexingPressure indexingPressure
126+ IndexingPressure indexingPressure ,
127+ InferenceStats inferenceStats
123128 ) {
124129 this .clusterService = clusterService ;
125130 this .inferenceServiceRegistry = inferenceServiceRegistry ;
126131 this .modelRegistry = modelRegistry ;
127132 this .licenseState = licenseState ;
128133 this .indexingPressure = indexingPressure ;
134+ this .inferenceStats = inferenceStats ;
129135 this .batchSizeInBytes = INDICES_INFERENCE_BATCH_SIZE .get (clusterService .getSettings ()).getBytes ();
130136 clusterService .getClusterSettings ().addSettingsUpdateConsumer (INDICES_INFERENCE_BATCH_SIZE , this ::setBatchSize );
131137 }
@@ -386,10 +392,12 @@ public void onFailure(Exception exc) {
386392 public void onResponse (List <ChunkedInference > results ) {
387393 try (onFinish ) {
388394 var requestsIterator = requests .iterator ();
395+ int success = 0 ;
389396 for (ChunkedInference result : results ) {
390397 var request = requestsIterator .next ();
391398 var acc = inferenceResults .get (request .bulkItemIndex );
392399 if (result instanceof ChunkedInferenceError error ) {
400+ recordRequestCountMetrics (inferenceProvider .model , 1 , error .exception ());
393401 acc .addFailure (
394402 new InferenceException (
395403 "Exception when running inference id [{}] on field [{}]" ,
@@ -399,6 +407,7 @@ public void onResponse(List<ChunkedInference> results) {
399407 )
400408 );
401409 } else {
410+ success ++;
402411 acc .addOrUpdateResponse (
403412 new FieldInferenceResponse (
404413 request .field (),
@@ -412,12 +421,16 @@ public void onResponse(List<ChunkedInference> results) {
412421 );
413422 }
414423 }
424+ if (success > 0 ) {
425+ recordRequestCountMetrics (inferenceProvider .model , success , null );
426+ }
415427 }
416428 }
417429
418430 @ Override
419431 public void onFailure (Exception exc ) {
420432 try (onFinish ) {
433+ recordRequestCountMetrics (inferenceProvider .model , requests .size (), exc );
421434 for (FieldInferenceRequest request : requests ) {
422435 addInferenceResponseFailure (
423436 request .bulkItemIndex ,
@@ -444,6 +457,14 @@ public void onFailure(Exception exc) {
444457 );
445458 }
446459
460+ private void recordRequestCountMetrics (Model model , int incrementBy , Throwable throwable ) {
461+ Map <String , Object > requestCountAttributes = new HashMap <>();
462+ requestCountAttributes .putAll (modelAttributes (model ));
463+ requestCountAttributes .putAll (responseAttributes (throwable ));
464+ requestCountAttributes .put ("inference_source" , "semantic_text_bulk" );
465+ inferenceStats .requestCount ().incrementBy (incrementBy , requestCountAttributes );
466+ }
467+
447468 /**
448469 * Adds all inference requests associated with their respective inference IDs to the given {@code requestsMap}
449470 * for the specified {@code item}.
0 commit comments