5555import java .util .Map ;
5656import java .util .Objects ;
5757import java .util .Set ;
58+ import java .util .concurrent .atomic .AtomicInteger ;
5859import java .util .function .Function ;
5960import java .util .function .Supplier ;
6061import java .util .regex .Pattern ;
@@ -108,7 +109,14 @@ public class EnterpriseGeoIpDownloader extends AllocatedPersistentTask {
108109
109110 // visible for testing
110111 protected volatile EnterpriseGeoIpTaskState state ;
111- private volatile Scheduler .ScheduledCancellable scheduled ;
112+ /**
113+ * The currently scheduled periodic run. Only null before first periodic run.
114+ */
115+ private volatile Scheduler .ScheduledCancellable scheduledPeriodicRun ;
116+ /**
117+ * The number of requested runs. If this is greater than 0, then a run is either in progress or scheduled to run as soon as possible.
118+ */
119+ private final AtomicInteger queuedRuns = new AtomicInteger (0 );
112120 private final Supplier <TimeValue > pollIntervalSupplier ;
113121 private final Function <String , char []> tokenProvider ;
114122
@@ -390,50 +398,120 @@ static byte[] getChunk(InputStream is) throws IOException {
390398 }
391399
392400 /**
393- * Downloads the geoip databases now, and schedules them to be downloaded again after pollInterval.
401+ * Cancels the currently scheduled run (if any) and schedules a new periodic run using the current poll interval, then requests
402+ * that the downloader runs on demand now. The main reason we need that last step is that if this persistent task
403+ * gets reassigned to a different node, we want to run the downloader immediately on that new node, not wait for the next periodic run.
394404 */
395- synchronized void runDownloader () {
396- // by the time we reach here, the state will never be null
397- assert this .state != null : "this.setState() is null. You need to call setState() before calling runDownloader()" ;
405+ public void restartPeriodicRun () {
406+ if (isCancelled () || isCompleted () || threadPool .scheduler ().isShutdown ()) {
407+ logger .debug ("Not restarting periodic run because task is cancelled, completed, or shutting down" );
408+ return ;
409+ }
410+ logger .debug ("Restarting periodic run" );
411+ // We synchronize to ensure we only have one scheduledPeriodicRun at a time.
412+ synchronized (this ) {
413+ if (scheduledPeriodicRun != null ) {
414+ // Technically speaking, there's a chance that the scheduled run is already running, in which case cancelling it here does
415+ // nothing. That means that we might end up with two periodic runs scheduled close together. However, that's unlikely to
416+ // happen and relatively harmless if it does, as we only end up running the downloader more often than strictly necessary.
417+ final boolean cancelSuccessful = scheduledPeriodicRun .cancel ();
418+ logger .debug ("Cancelled scheduled run: [{}]" , cancelSuccessful );
419+ }
420+ // This is based on the premise that the poll interval is sufficiently large that we don't need to worry about
421+ // the scheduled `runPeriodic` running before this method completes.
422+ scheduledPeriodicRun = threadPool .schedule (this ::runPeriodic , pollIntervalSupplier .get (), threadPool .generic ());
423+ }
424+ // Technically, with multiple rapid calls to restartPeriodicRun, we could end up with multiple calls to requestRunOnDemand, but
425+ // that's unlikely to happen and harmless if it does, as we only end up running the downloader more often than strictly necessary.
426+ requestRunOnDemand ();
427+ }
428+
429+ /**
430+ * Runs the downloader now and schedules the next periodic run using the poll interval.
431+ */
432+ private void runPeriodic () {
433+ if (isCancelled () || isCompleted () || threadPool .scheduler ().isShutdown ()) {
434+ logger .debug ("Not running periodic downloader because task is cancelled, completed, or shutting down" );
435+ return ;
436+ }
398437
399- // there's a race condition between here and requestReschedule. originally this scheduleNextRun call was at the end of this
400- // block, but remember that updateDatabases can take seconds to run (it's downloading bytes from the internet), and so during the
401- // very first run there would be no future run scheduled to reschedule in requestReschedule. which meant that if you went from zero
402- // to N(>=2) databases in quick succession, then all but the first database wouldn't necessarily get downloaded, because the
403- // requestReschedule call in the EnterpriseGeoIpDownloaderTaskExecutor's clusterChanged wouldn't have a scheduled future run to
404- // reschedule. scheduling the next run at the beginning of this run means that there's a much smaller window (milliseconds?, rather
405- // than seconds) in which such a race could occur. technically there's a window here, still, but i think it's _greatly_ reduced.
406- scheduleNextRun (pollIntervalSupplier .get ());
407- // TODO regardless of the above comment, i like the idea of checking the lowest last-checked time and then running the math to get
408- // to the next interval from then -- maybe that's a neat future enhancement to add
438+ logger .trace ("Running periodic downloader" );
439+ // There's a chance that an on-demand run is already in progress, in which case this periodic run is redundant.
440+ // However, we don't try to avoid that case here, as it's harmless to run the downloader more than strictly necessary (due to
441+ // the high default poll interval of 3d), and it simplifies the logic considerably.
442+ requestRunOnDemand ();
409443
444+ synchronized (this ) {
445+ scheduledPeriodicRun = threadPool .schedule (this ::runPeriodic , pollIntervalSupplier .get (), threadPool .generic ());
446+ }
447+ }
448+
449+ /**
450+ * This method requests that the downloader runs on the latest cluster state, which likely contains a change in the GeoIP metadata.
451+ * This method does nothing if this task is cancelled or completed.
452+ */
453+ public void requestRunOnDemand () {
410454 if (isCancelled () || isCompleted ()) {
455+ logger .debug ("Not requesting downloader to run on demand because task is cancelled or completed" );
411456 return ;
412457 }
413- try {
414- updateDatabases (); // n.b. this downloads bytes from the internet, it can take a while
415- } catch (Exception e ) {
416- logger .error ("exception during databases update" , e );
458+ logger .trace ("Requesting downloader run on demand" );
459+ // If queuedRuns was greater than 0, then either a run is in progress and it will fire off another run when it finishes,
460+ // or a run is scheduled to run as soon as possible and it will include the latest cluster state.
461+ // If it was 0, we set it to 1 to indicate that a run is scheduled to run as soon as possible and schedule it now.
462+ if (queuedRuns .getAndIncrement () == 0 ) {
463+ logger .trace ("Scheduling downloader run on demand" );
464+ threadPool .generic ().submit (this ::runOnDemand );
465+ }
466+ }
467+
468+ /**
469+ * Runs the downloader on the latest cluster state. {@link #queuedRuns} protects against multiple concurrent runs and ensures that
470+ * if a run is requested while this method is running, then another run will be scheduled to run as soon as this method finishes.
471+ */
472+ private void runOnDemand () {
473+ if (isCancelled () || isCompleted ()) {
474+ logger .debug ("Not running downloader on demand because task is cancelled or completed" );
475+ return ;
417476 }
477+ // Capture the current queue size, so that if another run is requested while we're running, we'll know at the end of this method
478+ // whether we need to run again.
479+ final int currentQueueSize = queuedRuns .get ();
480+ logger .trace ("Running downloader on demand" );
418481 try {
419- cleanDatabases ();
420- } catch (Exception e ) {
421- logger .error ("exception during databases cleanup" , e );
482+ runDownloader ();
483+ logger .trace ("Downloader completed successfully" );
484+ } finally {
485+ // If any exception was thrown during runDownloader, we still want to check queuedRuns.
486+ // Subtract this "batch" of runs from queuedRuns.
487+ // If queuedRuns is still > 0, then a run was requested while we were running, so we need to run again.
488+ if (queuedRuns .addAndGet (-currentQueueSize ) > 0 ) {
489+ logger .debug ("Downloader on demand requested again while running, scheduling another run" );
490+ threadPool .generic ().submit (this ::runOnDemand );
491+ }
422492 }
423493 }
424494
425495 /**
426- * This method requests that the downloader be rescheduled to run immediately (presumably because a dynamic property supplied by
427- * pollIntervalSupplier or eagerDownloadSupplier has changed, or a pipeline with a geoip processor has been added). This method does
428- * nothing if this task is cancelled, completed, or has not yet been scheduled to run for the first time. It cancels any existing
429- * scheduled run.
496+ * Downloads the geoip databases now based on the supplied cluster state.
430497 */
431- public void requestReschedule () {
498+ void runDownloader () {
432499 if (isCancelled () || isCompleted ()) {
500+ logger .debug ("Not running downloader because task is cancelled or completed" );
433501 return ;
434502 }
435- if (scheduled != null && scheduled .cancel ()) {
436- scheduleNextRun (TimeValue .ZERO );
503+ // by the time we reach here, the state will never be null
504+ assert this .state != null : "this.setState() is null. You need to call setState() before calling runDownloader()" ;
505+
506+ try {
507+ updateDatabases (); // n.b. this downloads bytes from the internet, it can take a while
508+ } catch (Exception e ) {
509+ logger .error ("exception during databases update" , e );
510+ }
511+ try {
512+ cleanDatabases ();
513+ } catch (Exception e ) {
514+ logger .error ("exception during databases cleanup" , e );
437515 }
438516 }
439517
@@ -455,18 +533,14 @@ private void cleanDatabases() {
455533
456534 @ Override
457535 protected void onCancelled () {
458- if (scheduled != null ) {
459- scheduled .cancel ();
536+ synchronized (this ) {
537+ if (scheduledPeriodicRun != null ) {
538+ scheduledPeriodicRun .cancel ();
539+ }
460540 }
461541 markAsCompleted ();
462542 }
463543
464- private void scheduleNextRun (TimeValue time ) {
465- if (threadPool .scheduler ().isShutdown () == false ) {
466- scheduled = threadPool .schedule (this ::runDownloader , time , threadPool .generic ());
467- }
468- }
469-
470544 private ProviderDownload downloaderFor (DatabaseConfiguration database ) {
471545 if (database .provider () instanceof DatabaseConfiguration .Maxmind maxmind ) {
472546 return new MaxmindDownload (database .name (), maxmind );
0 commit comments