Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.cluster.routing.RecoverySource;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;

/**
Expand Down Expand Up @@ -49,14 +50,30 @@ public Decision canAllocate(final ShardRouting shardRouting, final RoutingAlloca
return allocation.decision(Decision.YES, NAME, "shard is currently being restored");
}
}

/**
* POST: the RestoreInProgress.ShardRestoreStatus is either failed or succeeded. This section turns a
* turn a shard failure into a NO decision to allocate. See {@link AllocationService.applyFailedShards}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"turns a turn a shard..." typo in javadoc?

* for details on how it updates UnassignedInfo.
*/
UnassignedInfo unassignedInfo = shardRouting.unassignedInfo();
if (unassignedInfo.failedAllocations() > 0) {
Comment on lines +60 to +61
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking good, matches with what David said in the #100233

return allocation.decision(
Decision.NO,
NAME,
"shard has failed to be restored from the snapshot [%s] - manually close or delete the index [%s] in order to retry "
+ "to restore the snapshot again or use the reroute API to force the allocation of an empty primary shard. Check the "
+ "logs for more information about the failure. Details: [%s]",
source.snapshot(),
shardRouting.getIndexName(),
unassignedInfo.details()
);
}

return allocation.decision(
Decision.NO,
Decision.YES,
NAME,
"shard has failed to be restored from the snapshot [%s] - manually close or delete the index [%s] in order to retry "
+ "to restore the snapshot again or use the reroute API to force the allocation of an empty primary shard. Details: [%s]",
source.snapshot(),
shardRouting.getIndexName(),
shardRouting.unassignedInfo().details()
"shard was prevented from being allocated on all nodes because of other allocation deciders"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats a strange way to say YES, and not always true. Looking at the code above it should be "YES/shard successfully restored".

);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,10 @@ public void testCannotAllocatePrimaryMissingInRestoreInProgress() {
assertEquals(RecoverySource.Type.SNAPSHOT, primary.recoverySource().getType());

final Decision decision = executeAllocation(clusterState, primary);
assertEquals(Decision.Type.NO, decision.type());
assertEquals(Decision.Type.YES, decision.type());
assertThat(
decision.getExplanation(),
equalTo(
"shard has failed to be restored from the snapshot [default:_repository:_missing/_uuid] - manually close or "
+ "delete the index [test] in order to retry to restore the snapshot again or use the reroute API "
+ "to force the allocation of an empty primary shard. Details: [restore_source[_repository/_missing]]"
)
equalTo("shard was prevented from being allocated on all nodes because of other allocation deciders")
);
}

Expand All @@ -108,14 +104,25 @@ public void testCanAllocatePrimaryExistingInRestoreInProgress() {
if (randomBoolean()) {
shardState = randomFrom(RestoreInProgress.State.STARTED, RestoreInProgress.State.INIT);
} else {
shardState = RestoreInProgress.State.FAILURE;

UnassignedInfo currentInfo = primary.unassignedInfo();
UnassignedInfo.Reason reason;

final int failureCount;
if (randomBoolean()) {
shardState = RestoreInProgress.State.FAILURE;
failureCount = 1;
reason = UnassignedInfo.Reason.ALLOCATION_FAILED;
} else {
shardState = RestoreInProgress.State.SUCCESS;
failureCount = 0;
reason = currentInfo.reason();
}

UnassignedInfo newInfo = new UnassignedInfo(
currentInfo.reason(),
reason,
currentInfo.message(),
new IOException("i/o failure"),
currentInfo.failedAllocations(),
failureCount,
currentInfo.unassignedTimeNanos(),
currentInfo.unassignedTimeMillis(),
currentInfo.delayed(),
Expand Down Expand Up @@ -171,10 +178,15 @@ public void testCanAllocatePrimaryExistingInRestoreInProgress() {
"shard has failed to be restored from the snapshot [default:_repository:_existing/_uuid]"
+ " - manually close or delete the index "
+ "[test] in order to retry to restore the snapshot again or use the reroute API to force the allocation of "
+ "an empty primary shard. Details: [restore_source[_repository/_existing], failure "
+ "java.io.IOException: i/o failure"
+ "an empty primary shard. Check the logs for more information about the failure. Failure details:"
)
);
} else if (shardState == RestoreInProgress.State.SUCCESS) {
assertEquals(Decision.Type.YES, decision.type());
assertThat(
decision.getExplanation(),
startsWith("shard was prevented from being allocated on all nodes because of other allocation deciders")
);
} else {
assertEquals(Decision.Type.YES, decision.type());
assertEquals("shard is currently being restored", decision.getExplanation());
Expand Down