|
43 | 43 | import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; |
44 | 44 | import org.elasticsearch.common.Randomness; |
45 | 45 | import org.elasticsearch.common.UUIDs; |
| 46 | +import org.elasticsearch.common.settings.ClusterSettings; |
46 | 47 | import org.elasticsearch.common.settings.Settings; |
47 | 48 | import org.elasticsearch.common.time.TimeProvider; |
48 | 49 | import org.elasticsearch.common.time.TimeProviderUtils; |
49 | 50 | import org.elasticsearch.common.unit.ByteSizeValue; |
50 | 51 | import org.elasticsearch.common.util.Maps; |
| 52 | +import org.elasticsearch.core.Strings; |
| 53 | +import org.elasticsearch.core.TimeValue; |
51 | 54 | import org.elasticsearch.index.IndexVersion; |
52 | 55 | import org.elasticsearch.index.shard.ShardId; |
53 | 56 | import org.elasticsearch.repositories.IndexId; |
|
69 | 72 | import java.util.concurrent.atomic.AtomicBoolean; |
70 | 73 | import java.util.concurrent.atomic.AtomicInteger; |
71 | 74 | import java.util.concurrent.atomic.AtomicLong; |
| 75 | +import java.util.concurrent.atomic.AtomicReference; |
| 76 | +import java.util.function.Consumer; |
| 77 | +import java.util.function.Function; |
| 78 | +import java.util.function.Predicate; |
72 | 79 |
|
73 | 80 | import static java.util.stream.Collectors.toMap; |
74 | 81 | import static org.elasticsearch.cluster.ClusterInfo.shardIdentifierFromRouting; |
@@ -1231,7 +1238,7 @@ public void testShouldLogComputationIteration() { |
1231 | 1238 | "Should report long computation based on time", |
1232 | 1239 | DesiredBalanceComputer.class.getCanonicalName(), |
1233 | 1240 | Level.INFO, |
1234 | | - "Desired balance computation for [*] is still not converged after [1m] and [60] iterations" |
| 1241 | + "Desired balance computation for [*] is still not converged after [1m] and [59] iterations" |
1235 | 1242 | ) |
1236 | 1243 | ); |
1237 | 1244 | } |
@@ -1273,28 +1280,174 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing |
1273 | 1280 | } |
1274 | 1281 | }); |
1275 | 1282 |
|
1276 | | - assertThatLogger(() -> { |
| 1283 | + assertLoggerExpectationsFor(() -> { |
1277 | 1284 | var iteration = new AtomicInteger(0); |
1278 | 1285 | desiredBalanceComputer.compute( |
1279 | 1286 | DesiredBalance.BECOME_MASTER_INITIAL, |
1280 | 1287 | createInput(createInitialClusterState(3)), |
1281 | 1288 | queue(), |
1282 | 1289 | input -> iteration.incrementAndGet() < iterations |
1283 | 1290 | ); |
1284 | | - }, DesiredBalanceComputer.class, expectation); |
| 1291 | + }, expectation); |
| 1292 | + } |
| 1293 | + |
| 1294 | + private void assertLoggerExpectationsFor(Runnable action, MockLog.LoggingExpectation... expectations) { |
| 1295 | + assertThatLogger(action, DesiredBalanceComputer.class, expectations); |
| 1296 | + } |
| 1297 | + |
| 1298 | + public void testLoggingOfComputeCallsAndIterationsSinceConvergence() { |
| 1299 | + final var clusterSettings = new ClusterSettings( |
| 1300 | + Settings.builder().put(DesiredBalanceComputer.PROGRESS_LOG_INTERVAL_SETTING.getKey(), TimeValue.timeValueMillis(5L)).build(), |
| 1301 | + ClusterSettings.BUILT_IN_CLUSTER_SETTINGS |
| 1302 | + ); |
| 1303 | + final var timeInMillis = new AtomicLong(-1L); |
| 1304 | + final var iterationCounter = new AtomicInteger(0); |
| 1305 | + final var requiredIterations = new AtomicInteger(2); |
| 1306 | + final var desiredBalance = new AtomicReference<DesiredBalance>(DesiredBalance.BECOME_MASTER_INITIAL); |
| 1307 | + final var indexSequence = new AtomicLong(0); |
| 1308 | + final var clusterState = createInitialClusterState(1, 1, 0); |
| 1309 | + |
| 1310 | + final var computer = new DesiredBalanceComputer( |
| 1311 | + clusterSettings, |
| 1312 | + TimeProviderUtils.create(timeInMillis::incrementAndGet), |
| 1313 | + new BalancedShardsAllocator(Settings.EMPTY) |
| 1314 | + ) { |
| 1315 | + @Override |
| 1316 | + boolean hasEnoughIterations(int currentIteration) { |
| 1317 | + iterationCounter.incrementAndGet(); |
| 1318 | + return currentIteration >= requiredIterations.get(); |
| 1319 | + } |
| 1320 | + }; |
| 1321 | + computer.setConvergenceLogMsgLevel(Level.INFO); |
| 1322 | + |
| 1323 | + record ExpectedLastConvergenceInfo(int numComputeCalls, int numTotalIterations, long timestampMillis) {} |
| 1324 | + |
| 1325 | + Consumer<ExpectedLastConvergenceInfo> assertLastConvergenceInfo = data -> { |
| 1326 | + assertEquals(data.numComputeCalls(), computer.getNumComputeCallsSinceLastConverged()); |
| 1327 | + assertEquals(data.numTotalIterations(), computer.getNumIterationsSinceLastConverged()); |
| 1328 | + assertEquals(data.timestampMillis(), computer.getLastConvergedTimeMillis()); |
| 1329 | + }; |
| 1330 | + |
| 1331 | + final Function<Predicate<DesiredBalanceInput>, Runnable> getComputeRunnableForIsFreshPredicate = isFreshFunc -> { |
| 1332 | + final var input = new DesiredBalanceInput(indexSequence.incrementAndGet(), routingAllocationOf(clusterState), List.of()); |
| 1333 | + return () -> desiredBalance.set(computer.compute(desiredBalance.get(), input, queue(), isFreshFunc)); |
| 1334 | + }; |
| 1335 | + |
| 1336 | + record LogExpectationData( |
| 1337 | + boolean isConverged, |
| 1338 | + String timeSinceConverged, |
| 1339 | + int totalIterations, |
| 1340 | + int totalComputeCalls, |
| 1341 | + int currentIterations, |
| 1342 | + String currentDuration |
| 1343 | + ) { |
| 1344 | + LogExpectationData(boolean isConverged, String timeSinceConverged, int totalIterations) { |
| 1345 | + this(isConverged, timeSinceConverged, totalIterations, 0, 0, ""); |
| 1346 | + } |
| 1347 | + } |
| 1348 | + |
| 1349 | + Function<LogExpectationData, MockLog.SeenEventExpectation> getLogExpectation = data -> { |
| 1350 | + final var singleComputeCallMsg = "Desired balance computation for [%d] " |
| 1351 | + + (data.isConverged ? "" : "is still not ") |
| 1352 | + + "converged after [%s] and [%d] iterations"; |
| 1353 | + return new MockLog.SeenEventExpectation( |
| 1354 | + "expected a " + (data.isConverged ? "converged" : "not converged") + " log message", |
| 1355 | + DesiredBalanceComputer.class.getCanonicalName(), |
| 1356 | + Level.INFO, |
| 1357 | + (data.totalComputeCalls > 1 |
| 1358 | + ? Strings.format( |
| 1359 | + singleComputeCallMsg + ", resumed computation [%d] times with [%d] iterations since the last resumption [%s] ago", |
| 1360 | + indexSequence.get(), |
| 1361 | + data.timeSinceConverged, |
| 1362 | + data.totalIterations, |
| 1363 | + data.totalComputeCalls, |
| 1364 | + data.currentIterations, |
| 1365 | + data.currentDuration |
| 1366 | + ) |
| 1367 | + : Strings.format(singleComputeCallMsg, indexSequence.get(), data.timeSinceConverged, data.totalIterations)) |
| 1368 | + ); |
| 1369 | + }; |
| 1370 | + |
| 1371 | + final Consumer<DesiredBalance.ComputationFinishReason> assertFinishReason = reason -> { |
| 1372 | + assertEquals(reason, desiredBalance.get().finishReason()); |
| 1373 | + if (DesiredBalance.ComputationFinishReason.CONVERGED == reason) { |
| 1374 | + // Verify the number of compute() calls and total iterations have been reset after converging. |
| 1375 | + assertLastConvergenceInfo.accept(new ExpectedLastConvergenceInfo(0, 0, timeInMillis.get())); |
| 1376 | + } |
| 1377 | + }; |
| 1378 | + |
| 1379 | + // No compute() calls yet, last convergence timestamp is the startup time. |
| 1380 | + assertLastConvergenceInfo.accept(new ExpectedLastConvergenceInfo(0, 0, timeInMillis.get())); |
| 1381 | + |
| 1382 | + // Converges right away, verify the debug level convergence message. |
| 1383 | + assertLoggerExpectationsFor( |
| 1384 | + getComputeRunnableForIsFreshPredicate.apply(ignored -> true), |
| 1385 | + getLogExpectation.apply(new LogExpectationData(true, "3ms", 2)) |
| 1386 | + ); |
| 1387 | + assertFinishReason.accept(DesiredBalance.ComputationFinishReason.CONVERGED); |
| 1388 | + final var lastConvergenceTimestampMillis = computer.getLastConvergedTimeMillis(); |
| 1389 | + |
| 1390 | + // Test a series of compute() calls that don't converge. |
| 1391 | + iterationCounter.set(0); |
| 1392 | + requiredIterations.set(10); |
| 1393 | + // This INFO is triggered from the interval since last convergence timestamp. |
| 1394 | + assertLoggerExpectationsFor( |
| 1395 | + getComputeRunnableForIsFreshPredicate.apply(ignored -> iterationCounter.get() < 6), |
| 1396 | + getLogExpectation.apply(new LogExpectationData(false, "5ms", 4)) |
| 1397 | + ); |
| 1398 | + assertFinishReason.accept(DesiredBalance.ComputationFinishReason.YIELD_TO_NEW_INPUT); |
| 1399 | + assertLastConvergenceInfo.accept(new ExpectedLastConvergenceInfo(1, 6, lastConvergenceTimestampMillis)); |
| 1400 | + |
| 1401 | + iterationCounter.set(0); |
| 1402 | + // The next INFO is triggered from the interval since last INFO message logged, and then another after the interval period. |
| 1403 | + assertLoggerExpectationsFor( |
| 1404 | + getComputeRunnableForIsFreshPredicate.apply(ignored -> iterationCounter.get() < 8), |
| 1405 | + getLogExpectation.apply(new LogExpectationData(false, "10ms", 8, 2, 2, "2ms")), |
| 1406 | + getLogExpectation.apply(new LogExpectationData(false, "15ms", 13, 2, 7, "7ms")) |
| 1407 | + ); |
| 1408 | + assertFinishReason.accept(DesiredBalance.ComputationFinishReason.YIELD_TO_NEW_INPUT); |
| 1409 | + assertLastConvergenceInfo.accept(new ExpectedLastConvergenceInfo(2, 14, lastConvergenceTimestampMillis)); |
| 1410 | + |
| 1411 | + assertLoggerExpectationsFor( |
| 1412 | + getComputeRunnableForIsFreshPredicate.apply(ignored -> true), |
| 1413 | + getLogExpectation.apply(new LogExpectationData(false, "20ms", 17, 3, 3, "3ms")), |
| 1414 | + getLogExpectation.apply(new LogExpectationData(false, "25ms", 22, 3, 8, "8ms")), |
| 1415 | + getLogExpectation.apply(new LogExpectationData(true, "27ms", 24, 3, 10, "10ms")) |
| 1416 | + ); |
| 1417 | + assertFinishReason.accept(DesiredBalance.ComputationFinishReason.CONVERGED); |
| 1418 | + |
| 1419 | + // First INFO is triggered from interval since last converged, second is triggered from the inverval since the last INFO log. |
| 1420 | + assertLoggerExpectationsFor( |
| 1421 | + getComputeRunnableForIsFreshPredicate.apply(ignored -> true), |
| 1422 | + getLogExpectation.apply(new LogExpectationData(false, "5ms", 4)), |
| 1423 | + getLogExpectation.apply(new LogExpectationData(false, "10ms", 9)), |
| 1424 | + getLogExpectation.apply(new LogExpectationData(true, "11ms", 10)) |
| 1425 | + ); |
| 1426 | + assertFinishReason.accept(DesiredBalance.ComputationFinishReason.CONVERGED); |
| 1427 | + |
| 1428 | + // Verify the final assignment mappings after converging. |
| 1429 | + final var index = clusterState.metadata().getProject(Metadata.DEFAULT_PROJECT_ID).index(TEST_INDEX).getIndex(); |
| 1430 | + final var expectedAssignmentsMap = Map.of(new ShardId(index, 0), new ShardAssignment(Set.of("node-0"), 1, 0, 0)); |
| 1431 | + assertDesiredAssignments(desiredBalance.get(), expectedAssignmentsMap); |
1285 | 1432 | } |
1286 | 1433 |
|
1287 | 1434 | private static ShardId findShardId(ClusterState clusterState, String name) { |
1288 | 1435 | return clusterState.getRoutingTable().index(name).shard(0).shardId(); |
1289 | 1436 | } |
1290 | 1437 |
|
1291 | 1438 | static ClusterState createInitialClusterState(int dataNodesCount) { |
| 1439 | + return createInitialClusterState(dataNodesCount, 2, 1); |
| 1440 | + } |
| 1441 | + |
| 1442 | + static ClusterState createInitialClusterState(int dataNodesCount, int numShards, int numReplicas) { |
1292 | 1443 | var discoveryNodes = DiscoveryNodes.builder().add(newNode("master", Set.of(DiscoveryNodeRole.MASTER_ROLE))); |
1293 | 1444 | for (int i = 0; i < dataNodesCount; i++) { |
1294 | 1445 | discoveryNodes.add(newNode("node-" + i, Set.of(DiscoveryNodeRole.DATA_ROLE))); |
1295 | 1446 | } |
1296 | 1447 |
|
1297 | | - var indexMetadata = IndexMetadata.builder(TEST_INDEX).settings(indexSettings(IndexVersion.current(), 2, 1)).build(); |
| 1448 | + var indexMetadata = IndexMetadata.builder(TEST_INDEX) |
| 1449 | + .settings(indexSettings(IndexVersion.current(), numShards, numReplicas)) |
| 1450 | + .build(); |
1298 | 1451 |
|
1299 | 1452 | return ClusterState.builder(ClusterName.DEFAULT) |
1300 | 1453 | .nodes(discoveryNodes.masterNodeId("master").localNodeId("master")) |
|
0 commit comments