21
21
import org .elasticsearch .cluster .ClusterState ;
22
22
import org .elasticsearch .cluster .ClusterStateListener ;
23
23
import org .elasticsearch .cluster .metadata .IndexAbstraction ;
24
+ import org .elasticsearch .cluster .metadata .IndexMetadata ;
24
25
import org .elasticsearch .cluster .node .DiscoveryNode ;
25
26
import org .elasticsearch .cluster .service .ClusterService ;
26
27
import org .elasticsearch .common .settings .Setting ;
27
28
import org .elasticsearch .common .settings .Settings ;
29
+ import org .elasticsearch .common .util .Maps ;
28
30
import org .elasticsearch .core .TimeValue ;
29
31
import org .elasticsearch .gateway .GatewayService ;
30
32
import org .elasticsearch .index .Index ;
@@ -247,11 +249,14 @@ static boolean hasAtLeastOneGeoipProcessor(ClusterState clusterState) {
247
249
return false ;
248
250
}
249
251
250
- return clusterState .getMetadata ().indices ().values (). stream (). anyMatch ( indexMetadata -> {
252
+ for ( IndexMetadata indexMetadata : clusterState .getMetadata ().indices ().values ()) {
251
253
String defaultPipeline = IndexSettings .DEFAULT_PIPELINE .get (indexMetadata .getSettings ());
252
254
String finalPipeline = IndexSettings .FINAL_PIPELINE .get (indexMetadata .getSettings ());
253
- return checkReferencedPipelines .contains (defaultPipeline ) || checkReferencedPipelines .contains (finalPipeline );
254
- });
255
+ if (checkReferencedPipelines .contains (defaultPipeline ) || checkReferencedPipelines .contains (finalPipeline )) {
256
+ return true ;
257
+ }
258
+ }
259
+ return false ;
255
260
}
256
261
257
262
/**
@@ -264,12 +269,26 @@ static boolean hasAtLeastOneGeoipProcessor(ClusterState clusterState) {
264
269
@ SuppressWarnings ("unchecked" )
265
270
private static Set <String > pipelinesWithGeoIpProcessor (ClusterState clusterState , boolean downloadDatabaseOnPipelineCreation ) {
266
271
List <PipelineConfiguration > configurations = IngestService .getPipelines (clusterState );
272
+ Map <String , PipelineConfiguration > pipelineConfigById = Maps .newHashMapWithExpectedSize (configurations .size ());
273
+ for (PipelineConfiguration configuration : configurations ) {
274
+ pipelineConfigById .put (configuration .getId (), configuration );
275
+ }
276
+ // this map is used to keep track of pipelines that have already been checked
277
+ Map <String , Boolean > pipelineHasGeoProcessorById = Maps .newHashMapWithExpectedSize (configurations .size ());
267
278
Set <String > ids = new HashSet <>();
268
279
// note: this loop is unrolled rather than streaming-style because it's hot enough to show up in a flamegraph
269
280
for (PipelineConfiguration configuration : configurations ) {
270
281
List <Map <String , Object >> processors = (List <Map <String , Object >>) configuration .getConfig ().get (Pipeline .PROCESSORS_KEY );
271
- if (hasAtLeastOneGeoipProcessor (processors , downloadDatabaseOnPipelineCreation )) {
272
- ids .add (configuration .getId ());
282
+ String pipelineName = configuration .getId ();
283
+ if (pipelineHasGeoProcessorById .containsKey (pipelineName ) == false ) {
284
+ if (hasAtLeastOneGeoipProcessor (
285
+ processors ,
286
+ downloadDatabaseOnPipelineCreation ,
287
+ pipelineConfigById ,
288
+ pipelineHasGeoProcessorById
289
+ )) {
290
+ ids .add (pipelineName );
291
+ }
273
292
}
274
293
}
275
294
return Collections .unmodifiableSet (ids );
@@ -279,13 +298,27 @@ private static Set<String> pipelinesWithGeoIpProcessor(ClusterState clusterState
279
298
* Check if a list of processor contains at least a geoip processor.
280
299
* @param processors List of processors.
281
300
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
301
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
302
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
303
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
304
+ * out (null).
282
305
* @return true if a geoip processor is found in the processor list.
283
306
*/
284
- private static boolean hasAtLeastOneGeoipProcessor (List <Map <String , Object >> processors , boolean downloadDatabaseOnPipelineCreation ) {
307
+ private static boolean hasAtLeastOneGeoipProcessor (
308
+ List <Map <String , Object >> processors ,
309
+ boolean downloadDatabaseOnPipelineCreation ,
310
+ Map <String , PipelineConfiguration > pipelineConfigById ,
311
+ Map <String , Boolean > pipelineHasGeoProcessorById
312
+ ) {
285
313
if (processors != null ) {
286
314
// note: this loop is unrolled rather than streaming-style because it's hot enough to show up in a flamegraph
287
315
for (Map <String , Object > processor : processors ) {
288
- if (hasAtLeastOneGeoipProcessor (processor , downloadDatabaseOnPipelineCreation )) {
316
+ if (hasAtLeastOneGeoipProcessor (
317
+ processor ,
318
+ downloadDatabaseOnPipelineCreation ,
319
+ pipelineConfigById ,
320
+ pipelineHasGeoProcessorById
321
+ )) {
289
322
return true ;
290
323
}
291
324
}
@@ -297,10 +330,19 @@ private static boolean hasAtLeastOneGeoipProcessor(List<Map<String, Object>> pro
297
330
* Check if a processor config is a geoip processor or contains at least a geoip processor.
298
331
* @param processor Processor config.
299
332
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
333
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
334
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
335
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
336
+ * out (null).
300
337
* @return true if a geoip processor is found in the processor list.
301
338
*/
302
339
@ SuppressWarnings ("unchecked" )
303
- private static boolean hasAtLeastOneGeoipProcessor (Map <String , Object > processor , boolean downloadDatabaseOnPipelineCreation ) {
340
+ private static boolean hasAtLeastOneGeoipProcessor (
341
+ Map <String , Object > processor ,
342
+ boolean downloadDatabaseOnPipelineCreation ,
343
+ Map <String , PipelineConfiguration > pipelineConfigById ,
344
+ Map <String , Boolean > pipelineHasGeoProcessorById
345
+ ) {
304
346
if (processor == null ) {
305
347
return false ;
306
348
}
@@ -319,27 +361,51 @@ private static boolean hasAtLeastOneGeoipProcessor(Map<String, Object> processor
319
361
}
320
362
}
321
363
322
- return isProcessorWithOnFailureGeoIpProcessor (processor , downloadDatabaseOnPipelineCreation )
323
- || isForeachProcessorWithGeoipProcessor (processor , downloadDatabaseOnPipelineCreation );
364
+ return isProcessorWithOnFailureGeoIpProcessor (
365
+ processor ,
366
+ downloadDatabaseOnPipelineCreation ,
367
+ pipelineConfigById ,
368
+ pipelineHasGeoProcessorById
369
+ )
370
+ || isForeachProcessorWithGeoipProcessor (
371
+ processor ,
372
+ downloadDatabaseOnPipelineCreation ,
373
+ pipelineConfigById ,
374
+ pipelineHasGeoProcessorById
375
+ )
376
+ || isPipelineProcessorWithGeoIpProcessor (
377
+ processor ,
378
+ downloadDatabaseOnPipelineCreation ,
379
+ pipelineConfigById ,
380
+ pipelineHasGeoProcessorById
381
+ );
324
382
}
325
383
326
384
/**
327
385
* Check if a processor config has an on_failure clause containing at least a geoip processor.
328
386
* @param processor Processor config.
329
387
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
388
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
389
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
390
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
391
+ * out (null).
330
392
* @return true if a geoip processor is found in the processor list.
331
393
*/
332
394
@ SuppressWarnings ("unchecked" )
333
395
private static boolean isProcessorWithOnFailureGeoIpProcessor (
334
396
Map <String , Object > processor ,
335
- boolean downloadDatabaseOnPipelineCreation
397
+ boolean downloadDatabaseOnPipelineCreation ,
398
+ Map <String , PipelineConfiguration > pipelineConfigById ,
399
+ Map <String , Boolean > pipelineHasGeoProcessorById
336
400
) {
337
401
// note: this loop is unrolled rather than streaming-style because it's hot enough to show up in a flamegraph
338
402
for (Object value : processor .values ()) {
339
403
if (value instanceof Map
340
404
&& hasAtLeastOneGeoipProcessor (
341
405
((Map <String , List <Map <String , Object >>>) value ).get ("on_failure" ),
342
- downloadDatabaseOnPipelineCreation
406
+ downloadDatabaseOnPipelineCreation ,
407
+ pipelineConfigById ,
408
+ pipelineHasGeoProcessorById
343
409
)) {
344
410
return true ;
345
411
}
@@ -351,13 +417,84 @@ && hasAtLeastOneGeoipProcessor(
351
417
* Check if a processor is a foreach processor containing at least a geoip processor.
352
418
* @param processor Processor config.
353
419
* @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
420
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
421
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
422
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
423
+ * out (null).
354
424
* @return true if a geoip processor is found in the processor list.
355
425
*/
356
426
@ SuppressWarnings ("unchecked" )
357
- private static boolean isForeachProcessorWithGeoipProcessor (Map <String , Object > processor , boolean downloadDatabaseOnPipelineCreation ) {
427
+ private static boolean isForeachProcessorWithGeoipProcessor (
428
+ Map <String , Object > processor ,
429
+ boolean downloadDatabaseOnPipelineCreation ,
430
+ Map <String , PipelineConfiguration > pipelineConfigById ,
431
+ Map <String , Boolean > pipelineHasGeoProcessorById
432
+ ) {
358
433
final Map <String , Object > processorConfig = (Map <String , Object >) processor .get ("foreach" );
359
434
return processorConfig != null
360
- && hasAtLeastOneGeoipProcessor ((Map <String , Object >) processorConfig .get ("processor" ), downloadDatabaseOnPipelineCreation );
435
+ && hasAtLeastOneGeoipProcessor (
436
+ (Map <String , Object >) processorConfig .get ("processor" ),
437
+ downloadDatabaseOnPipelineCreation ,
438
+ pipelineConfigById ,
439
+ pipelineHasGeoProcessorById
440
+ );
441
+ }
442
+
443
+ /**
444
+ * Check if a processor is a pipeline processor containing at least a geoip processor. This method also updates
445
+ * pipelineHasGeoProcessorById with a result for any pipelines it looks at.
446
+ * @param processor Processor config.
447
+ * @param downloadDatabaseOnPipelineCreation Should the download_database_on_pipeline_creation of the geoip processor be true or false.
448
+ * @param pipelineConfigById A Map of pipeline id to PipelineConfiguration
449
+ * @param pipelineHasGeoProcessorById A Map of pipeline id to Boolean, indicating whether the pipeline references a geoip processor
450
+ * (true), does not reference a geoip processor (false), or we are currently trying to figure that
451
+ * out (null).
452
+ * @return true if a geoip processor is found in the processors of this processor if this processor is a pipeline processor.
453
+ */
454
+ @ SuppressWarnings ("unchecked" )
455
+ private static boolean isPipelineProcessorWithGeoIpProcessor (
456
+ Map <String , Object > processor ,
457
+ boolean downloadDatabaseOnPipelineCreation ,
458
+ Map <String , PipelineConfiguration > pipelineConfigById ,
459
+ Map <String , Boolean > pipelineHasGeoProcessorById
460
+ ) {
461
+ final Map <String , Object > processorConfig = (Map <String , Object >) processor .get ("pipeline" );
462
+ if (processorConfig != null ) {
463
+ String pipelineName = (String ) processorConfig .get ("name" );
464
+ if (pipelineName != null ) {
465
+ if (pipelineHasGeoProcessorById .containsKey (pipelineName )) {
466
+ if (pipelineHasGeoProcessorById .get (pipelineName ) == null ) {
467
+ /*
468
+ * If the value is null here, it indicates that this method has been called recursively with the same pipeline name.
469
+ * This will cause a runtime error when the pipeline is executed, but we're avoiding changing existing behavior at
470
+ * server startup time. Instead, we just bail out as quickly as possible. It is possible that this could lead to a
471
+ * geo database not being downloaded for the pipeline, but it doesn't really matter since the pipeline was going to
472
+ * fail anyway.
473
+ */
474
+ pipelineHasGeoProcessorById .put (pipelineName , false );
475
+ }
476
+ } else {
477
+ List <Map <String , Object >> childProcessors = null ;
478
+ PipelineConfiguration config = pipelineConfigById .get (pipelineName );
479
+ if (config != null ) {
480
+ childProcessors = (List <Map <String , Object >>) config .getConfig ().get (Pipeline .PROCESSORS_KEY );
481
+ }
482
+ // We initialize this to null so that we know it's in progress and can use it to avoid stack overflow errors:
483
+ pipelineHasGeoProcessorById .put (pipelineName , null );
484
+ pipelineHasGeoProcessorById .put (
485
+ pipelineName ,
486
+ hasAtLeastOneGeoipProcessor (
487
+ childProcessors ,
488
+ downloadDatabaseOnPipelineCreation ,
489
+ pipelineConfigById ,
490
+ pipelineHasGeoProcessorById
491
+ )
492
+ );
493
+ }
494
+ return pipelineHasGeoProcessorById .get (pipelineName );
495
+ }
496
+ }
497
+ return false ;
361
498
}
362
499
363
500
private static final TimeValue MASTER_TIMEOUT = TimeValue .MAX_VALUE ;
0 commit comments