|
9 | 9 |
|
10 | 10 | package org.elasticsearch.common.util.concurrent; |
11 | 11 |
|
| 12 | +import org.elasticsearch.common.metrics.ExponentialBucketHistogram; |
12 | 13 | import org.elasticsearch.common.settings.Settings; |
13 | 14 | import org.elasticsearch.common.util.concurrent.EsExecutors.TaskTrackingConfig; |
| 15 | +import org.elasticsearch.telemetry.InstrumentType; |
| 16 | +import org.elasticsearch.telemetry.Measurement; |
| 17 | +import org.elasticsearch.telemetry.RecordingMeterRegistry; |
14 | 18 | import org.elasticsearch.test.ESTestCase; |
| 19 | +import org.elasticsearch.threadpool.ThreadPool; |
15 | 20 |
|
| 21 | +import java.util.List; |
16 | 22 | import java.util.concurrent.CountDownLatch; |
| 23 | +import java.util.concurrent.CyclicBarrier; |
| 24 | +import java.util.concurrent.Future; |
17 | 25 | import java.util.concurrent.TimeUnit; |
18 | 26 | import java.util.function.Function; |
19 | 27 |
|
20 | 28 | import static org.elasticsearch.common.util.concurrent.EsExecutors.TaskTrackingConfig.DEFAULT_EWMA_ALPHA; |
21 | 29 | import static org.hamcrest.Matchers.equalTo; |
22 | 30 | import static org.hamcrest.Matchers.greaterThan; |
23 | 31 | import static org.hamcrest.Matchers.greaterThanOrEqualTo; |
| 32 | +import static org.hamcrest.Matchers.hasSize; |
24 | 33 |
|
25 | 34 | /** |
26 | 35 | * Tests for the automatic queue resizing of the {@code QueueResizingEsThreadPoolExecutorTests} |
@@ -147,6 +156,85 @@ public void testGetOngoingTasks() throws Exception { |
147 | 156 | executor.awaitTermination(10, TimeUnit.SECONDS); |
148 | 157 | } |
149 | 158 |
|
| 159 | + public void testQueueLatencyMetrics() { |
| 160 | + RecordingMeterRegistry meterRegistry = new RecordingMeterRegistry(); |
| 161 | + final var threadPoolName = randomIdentifier(); |
| 162 | + var executor = new TaskExecutionTimeTrackingEsThreadPoolExecutor( |
| 163 | + threadPoolName, |
| 164 | + 1, |
| 165 | + 1, |
| 166 | + 1000, |
| 167 | + TimeUnit.MILLISECONDS, |
| 168 | + ConcurrentCollections.newBlockingQueue(), |
| 169 | + TimedRunnable::new, |
| 170 | + EsExecutors.daemonThreadFactory("queuetest"), |
| 171 | + new EsAbortPolicy(), |
| 172 | + new ThreadContext(Settings.EMPTY), |
| 173 | + new TaskTrackingConfig(true, DEFAULT_EWMA_ALPHA) |
| 174 | + ); |
| 175 | + executor.setupMetrics(meterRegistry, threadPoolName); |
| 176 | + |
| 177 | + try { |
| 178 | + final var barrier = new CyclicBarrier(2); |
| 179 | + final ExponentialBucketHistogram expectedHistogram = new ExponentialBucketHistogram( |
| 180 | + TaskExecutionTimeTrackingEsThreadPoolExecutor.QUEUE_LATENCY_HISTOGRAM_BUCKETS |
| 181 | + ); |
| 182 | + |
| 183 | + /* |
| 184 | + * The thread pool has a single thread, so we submit a task that will occupy that thread |
| 185 | + * and cause subsequent tasks to be queued |
| 186 | + */ |
| 187 | + Future<?> runningTask = executor.submit(() -> { |
| 188 | + safeAwait(barrier); |
| 189 | + safeAwait(barrier); |
| 190 | + }); |
| 191 | + safeAwait(barrier); // wait till the first task starts |
| 192 | + expectedHistogram.addObservation(0L); // the first task should not be delayed |
| 193 | + |
| 194 | + /* |
| 195 | + * On each iteration we submit a task - which will be queued because of the |
| 196 | + * currently running task, pause for some random interval, then unblock the |
| 197 | + * new task by releasing the currently running task. This gives us a lower |
| 198 | + * bound for the real delays (the real delays will be greater than or equal |
| 199 | + * to the synthetic delays we add, i.e. each percentile should be >= our |
| 200 | + * expected values) |
| 201 | + */ |
| 202 | + for (int i = 0; i < 10; i++) { |
| 203 | + Future<?> waitingTask = executor.submit(() -> { |
| 204 | + safeAwait(barrier); |
| 205 | + safeAwait(barrier); |
| 206 | + }); |
| 207 | + final long delayTimeMs = randomLongBetween(1, 50); |
| 208 | + safeSleep(delayTimeMs); |
| 209 | + safeAwait(barrier); // let the running task complete |
| 210 | + safeAwait(barrier); // wait for the next task to start |
| 211 | + safeGet(runningTask); // ensure previous task is complete |
| 212 | + expectedHistogram.addObservation(delayTimeMs); |
| 213 | + runningTask = waitingTask; |
| 214 | + } |
| 215 | + safeAwait(barrier); // let the last task finish |
| 216 | + safeGet(runningTask); |
| 217 | + meterRegistry.getRecorder().collect(); |
| 218 | + |
| 219 | + List<Measurement> measurements = meterRegistry.getRecorder() |
| 220 | + .getMeasurements( |
| 221 | + InstrumentType.LONG_GAUGE, |
| 222 | + ThreadPool.THREAD_POOL_METRIC_PREFIX + threadPoolName + ThreadPool.THREAD_POOL_METRIC_NAME_QUEUE_TIME |
| 223 | + ); |
| 224 | + assertThat(measurements, hasSize(3)); |
| 225 | + // we have to use greater than or equal to because the actual delay might be higher than what we imposed |
| 226 | + assertThat(getPercentile(measurements, "99"), greaterThanOrEqualTo(expectedHistogram.getPercentile(0.99f))); |
| 227 | + assertThat(getPercentile(measurements, "90"), greaterThanOrEqualTo(expectedHistogram.getPercentile(0.9f))); |
| 228 | + assertThat(getPercentile(measurements, "50"), greaterThanOrEqualTo(expectedHistogram.getPercentile(0.5f))); |
| 229 | + } finally { |
| 230 | + ThreadPool.terminate(executor, 10, TimeUnit.SECONDS); |
| 231 | + } |
| 232 | + } |
| 233 | + |
| 234 | + private long getPercentile(List<Measurement> measurements, String percentile) { |
| 235 | + return measurements.stream().filter(m -> m.attributes().get("percentile").equals(percentile)).findFirst().orElseThrow().getLong(); |
| 236 | + } |
| 237 | + |
150 | 238 | /** |
151 | 239 | * The returned function outputs a WrappedRunnabled that simulates the case |
152 | 240 | * where {@link TimedRunnable#getTotalExecutionNanos()} always returns {@code timeTakenNanos}. |
|
0 commit comments