Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import org.apache.logging.log4j.Level;
import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.ActionRequestBuilder;
import org.elasticsearch.action.admin.cluster.allocation.ClusterAllocationExplainRequest;
import org.elasticsearch.action.admin.cluster.allocation.TransportClusterAllocationExplainAction;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
import org.elasticsearch.action.admin.indices.settings.get.GetSettingsResponse;
import org.elasticsearch.action.admin.indices.template.get.GetIndexTemplatesResponse;
Expand All @@ -20,6 +22,8 @@
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.MappingMetadata;
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.core.TimeValue;
Expand All @@ -41,6 +45,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
Expand Down Expand Up @@ -1025,4 +1030,64 @@ public void testNoWarningsOnRestoreOverClosedIndex() throws IllegalAccessExcepti
mockLog.assertAllExpectationsMatched();
}
}

public void testExplainUnassigableDuringRestore() {
final String repoName = "repo-" + randomIdentifier();
createRepository(repoName, FsRepository.TYPE);
final String indexName = "index-" + randomIdentifier();
createIndexWithContent(indexName);
final String snapshotName = "snapshot-" + randomIdentifier();
createSnapshot(repoName, snapshotName, List.of(indexName));
assertAcked(indicesAdmin().prepareDelete(indexName));

final RestoreSnapshotResponse restoreSnapshotResponse = clusterAdmin().prepareRestoreSnapshot(
TEST_REQUEST_TIMEOUT,
repoName,
snapshotName
)
.setIndices(indexName)
.setRestoreGlobalState(false)
.setWaitForCompletion(true)
.setIndexSettings(
Settings.builder().put(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_PREFIX + "._name", "not-a-node-" + randomIdentifier())
)
.get();

logger.info("--> restoreSnapshotResponse: {}", Strings.toString(restoreSnapshotResponse, true, true));
assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), greaterThan(0));

final var clusterExplainResponse1 = client().execute(
TransportClusterAllocationExplainAction.TYPE,
new ClusterAllocationExplainRequest(TEST_REQUEST_TIMEOUT).setIndex(indexName).setShard(0).setPrimary(true)
).actionGet();

logger.info("--> clusterExplainResponse1: {}", Strings.toString(clusterExplainResponse1, true, true));
for (var nodeDecision : clusterExplainResponse1.getExplanation()
.getShardAllocationDecision()
.getAllocateDecision()
.getNodeDecisions()) {
assertEquals(
Set.of("restore_in_progress", "filter"),
nodeDecision.getCanAllocateDecision().getDecisions().stream().map(Decision::label).collect(Collectors.toSet())
);
}

updateIndexSettings(Settings.builder().putNull(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_PREFIX + "._name"), indexName);

final var clusterExplainResponse2 = client().execute(
TransportClusterAllocationExplainAction.TYPE,
new ClusterAllocationExplainRequest(TEST_REQUEST_TIMEOUT).setIndex(indexName).setShard(0).setPrimary(true)
).actionGet();

logger.info("--> clusterExplainResponse2: {}", Strings.toString(clusterExplainResponse2, true, true));
for (var nodeDecision : clusterExplainResponse2.getExplanation()
.getShardAllocationDecision()
.getAllocateDecision()
.getNodeDecisions()) {
assertEquals(
Set.of("restore_in_progress"),
nodeDecision.getCanAllocateDecision().getDecisions().stream().map(Decision::label).collect(Collectors.toSet())
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.cluster.routing.RecoverySource;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;

/**
Expand Down Expand Up @@ -49,15 +50,31 @@ public Decision canAllocate(final ShardRouting shardRouting, final RoutingAlloca
return allocation.decision(Decision.YES, NAME, "shard is currently being restored");
}
}
return allocation.decision(
Decision.NO,
NAME,
"shard has failed to be restored from the snapshot [%s] - manually close or delete the index [%s] in order to retry "
+ "to restore the snapshot again or use the reroute API to force the allocation of an empty primary shard. Details: [%s]",
source.snapshot(),
shardRouting.getIndexName(),
shardRouting.unassignedInfo().details()
);

/**
* POST: the RestoreInProgress.Entry is non-existent. This section differentiates between a restore that failed
* because of a indexing fault (see {@link AllocationService.applyFailedShards}) or because of an allocation
* failure.
*/
UnassignedInfo unassignedInfo = shardRouting.unassignedInfo();
if (unassignedInfo.failedAllocations() > 0) {
Comment on lines +60 to +61
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking good, matches with what David said in the #100233

return allocation.decision(
Decision.NO,
NAME,
"shard has failed to be restored from the snapshot [%s] - manually close or delete the index [%s] in order to retry "
+ "to restore the snapshot again or use the reroute API to force the allocation of an empty primary shard. Check the "
+ "logs for more information about the failure. Details: [%s]",
source.snapshot(),
shardRouting.getIndexName(),
unassignedInfo.details()
);
} else {
return allocation.decision(
Decision.NO,
NAME,
"shard was prevented from being allocated on all nodes because of other allocation deciders in previous rounds"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be something like
"Restore from snapshot failed because the configured constraints prevented allocation on any of the available nodes. Please check constraints applied in index and cluster settings, then retry the restore"

Just to avoid the talk about other allocation deciders (that seems like an abstract concept, but I might be misunderstanding who the audience is here) and also the tip to check the constraints and retry might be helpful?

Happy to go with consensus here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds like a better message.

I'm also wondering if there's something like "go look for this earlier in the logs?"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could instruct them to use the allocation explain (even though the reason for the allocation being prevented might have since disappeared)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can reference the allocation explain API... the reason disappearing seems like a lousy way to respond

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it does, maybe let's not be explicit about that. The allocation explain API may shed some light in many cases though I think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sure -- maybe we can create a separate task for later to pend the reasoning in RestoreInProgress to somewhere in UnassignedInfo/ClusterState

);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,7 @@ public void testCannotAllocatePrimaryMissingInRestoreInProgress() {
assertEquals(Decision.Type.NO, decision.type());
assertThat(
decision.getExplanation(),
equalTo(
"shard has failed to be restored from the snapshot [default:_repository:_missing/_uuid] - manually close or "
+ "delete the index [test] in order to retry to restore the snapshot again or use the reroute API "
+ "to force the allocation of an empty primary shard. Details: [restore_source[_repository/_missing]]"
)
equalTo("shard was prevented from being allocated on all nodes because of other allocation deciders in previous rounds")
);
}

Expand All @@ -105,17 +101,28 @@ public void testCanAllocatePrimaryExistingInRestoreInProgress() {
routingTable = clusterState.routingTable();

final RestoreInProgress.State shardState;
final int failureCount;
if (randomBoolean()) {
shardState = randomFrom(RestoreInProgress.State.STARTED, RestoreInProgress.State.INIT);
failureCount = 0;
} else {
shardState = RestoreInProgress.State.FAILURE;

UnassignedInfo currentInfo = primary.unassignedInfo();
UnassignedInfo.Reason reason;
if (randomBoolean()) {
failureCount = randomBoolean() ? 0 : 1;
reason = UnassignedInfo.Reason.ALLOCATION_FAILED;
} else {
failureCount = 0;
reason = currentInfo.reason();
}

UnassignedInfo newInfo = new UnassignedInfo(
currentInfo.reason(),
reason,
currentInfo.message(),
new IOException("i/o failure"),
currentInfo.failedAllocations(),
failureCount,
currentInfo.unassignedTimeNanos(),
currentInfo.unassignedTimeMillis(),
currentInfo.delayed(),
Expand Down Expand Up @@ -165,16 +172,22 @@ public void testCanAllocatePrimaryExistingInRestoreInProgress() {
Decision decision = executeAllocation(clusterState, primary);
if (shardState == RestoreInProgress.State.FAILURE) {
assertEquals(Decision.Type.NO, decision.type());
assertThat(
decision.getExplanation(),
startsWith(
"shard has failed to be restored from the snapshot [default:_repository:_existing/_uuid]"
+ " - manually close or delete the index "
+ "[test] in order to retry to restore the snapshot again or use the reroute API to force the allocation of "
+ "an empty primary shard. Details: [restore_source[_repository/_existing], failure "
+ "java.io.IOException: i/o failure"
)
);
if (failureCount > 0) {
assertThat(
decision.getExplanation(),
startsWith(
"shard has failed to be restored from the snapshot [default:_repository:_existing/_uuid]"
+ " - manually close or delete the index "
+ "[test] in order to retry to restore the snapshot again or use the reroute API to force the allocation of "
+ "an empty primary shard. Check the logs for more information about the failure. Details:"
)
);
} else {
assertThat(
decision.getExplanation(),
startsWith("shard was prevented from being allocated on all nodes because of other allocation deciders")
);
}
} else {
assertEquals(Decision.Type.YES, decision.type());
assertEquals("shard is currently being restored", decision.getExplanation());
Expand Down