diff --git a/docs/changelog/134198.yaml b/docs/changelog/134198.yaml new file mode 100644 index 0000000000000..934ff7a8f2664 --- /dev/null +++ b/docs/changelog/134198.yaml @@ -0,0 +1,5 @@ +pr: 134198 +summary: Improve `ShardLockObtainFailedException` message +area: Store +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java index ddc36cc81dda1..893d3b906e148 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java +++ b/server/src/main/java/org/elasticsearch/env/NodeEnvironment.java @@ -1009,12 +1009,16 @@ void acquire(long timeoutInMillis, final String details) throws ShardLockObtainF setDetails(details); } else { final Tuple lockDetails = this.lockDetails; // single volatile read + final var stateAge = TimeValue.timeValueNanos(System.nanoTime() - lockDetails.v1()); final var message = format( - "obtaining shard lock for [%s] timed out after [%dms], lock already held for [%s] with age [%dms]", + """ + obtaining shard lock for [%s] timed out after [%dms]; \ + this shard lock is still held by a different instance of the shard and has been in state [%s] for [%s/%dms]""", details, timeoutInMillis, lockDetails.v2(), - TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lockDetails.v1()) + stateAge, + stateAge.millis() ); maybeLogThreadDump(shardId, message); throw new ShardLockObtainFailedException(shardId, message); diff --git a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java index 9a7fadad33a53..eac8a2d01e626 100644 --- a/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java +++ b/server/src/test/java/org/elasticsearch/env/NodeEnvironmentTests.java @@ -70,6 +70,7 @@ import static org.hamcrest.Matchers.arrayWithSize; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.matchesPattern; import static org.hamcrest.Matchers.startsWith; @LuceneTestCase.SuppressFileSystems("ExtrasFS") // TODO: fix test to allow extras @@ -134,25 +135,28 @@ public void testShardLock() throws Exception { try (var mockLog = MockLog.capture(NodeEnvironment.class); var lock = env.shardLock(new ShardId(index, 0), "1")) { mockLog.addExpectation( - new MockLog.SeenEventExpectation( - "hot threads logging", - NODE_ENVIRONMENT_LOGGER_NAME, - Level.DEBUG, - "hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [2] timed out after *" - ) + new MockLog.SeenEventExpectation("hot threads logging", NODE_ENVIRONMENT_LOGGER_NAME, Level.DEBUG, """ + hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [2] timed out after [*ms]; \ + this shard lock is still held by a different instance of the shard and has been in state [1] for [*/*ms]*""") ); mockLog.addExpectation( new MockLog.UnseenEventExpectation( "second attempt should be suppressed due to throttling", NODE_ENVIRONMENT_LOGGER_NAME, Level.DEBUG, - "hot threads while failing to obtain shard lock for [foo][0]: obtaining shard lock for [3] timed out after *" + "*obtaining shard lock for [3] timed out*" ) ); assertEquals(new ShardId(index, 0), lock.getShardId()); - expectThrows(ShardLockObtainFailedException.class, () -> env.shardLock(new ShardId(index, 0), "2")); + assertThat( + expectThrows(ShardLockObtainFailedException.class, () -> env.shardLock(new ShardId(index, 0), "2")).getMessage(), + matchesPattern(""" + \\[foo]\\[0]: obtaining shard lock for \\[2] timed out after \\[0ms]; \ + this shard lock is still held by a different instance of the shard \ + and has been in state \\[1] for \\[.*/[0-9]+ms]""") + ); for (Path path : env.indexPaths(index)) { Files.createDirectories(path.resolve("0"));