Skip to content

Commit 7dce356

Browse files
authored
Fix privileges for system index migration WRITE block (#122214)
* Fix privileges for system index migration WRITE block (#121327) This PR removes a potential cause of data loss when migrating system indices. It does this by changing the way we set a "write-block" on the system index to migrate - now using a dedicated transport request rather than a settings update. Furthermore, we no longer delete the write-block prior to deleting the index, as this was another source of potential data loss. Additionally, we now remove the block if the migration fails. * Update release notes * Delete docs/changelog/122214.yaml
1 parent 20b720b commit 7dce356

File tree

6 files changed

+178
-37
lines changed

6 files changed

+178
-37
lines changed

docs/changelog/121119.yaml

Lines changed: 0 additions & 5 deletions
This file was deleted.

modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/AbstractFeatureMigrationIntegTest.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,17 @@
99

1010
package org.elasticsearch.migration;
1111

12+
import org.elasticsearch.ElasticsearchException;
1213
import org.elasticsearch.Version;
1314
import org.elasticsearch.action.ActionListener;
15+
import org.elasticsearch.action.ActionRequest;
1416
import org.elasticsearch.action.admin.cluster.migration.TransportGetFeatureUpgradeStatusAction;
1517
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
1618
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
1719
import org.elasticsearch.action.admin.indices.stats.IndexStats;
1820
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
1921
import org.elasticsearch.action.index.IndexRequestBuilder;
22+
import org.elasticsearch.action.support.ActionFilter;
2023
import org.elasticsearch.action.support.ActiveShardCount;
2124
import org.elasticsearch.client.internal.Client;
2225
import org.elasticsearch.cluster.ClusterState;
@@ -28,6 +31,7 @@
2831
import org.elasticsearch.index.IndexVersion;
2932
import org.elasticsearch.indices.AssociatedIndexDescriptor;
3033
import org.elasticsearch.indices.SystemIndexDescriptor;
34+
import org.elasticsearch.plugins.ActionPlugin;
3135
import org.elasticsearch.plugins.Plugin;
3236
import org.elasticsearch.plugins.PluginsService;
3337
import org.elasticsearch.plugins.SystemIndexPlugin;
@@ -50,6 +54,10 @@
5054
import java.util.function.BiConsumer;
5155
import java.util.function.Function;
5256

57+
import static java.util.Collections.emptySet;
58+
import static java.util.Collections.singletonList;
59+
import static java.util.Collections.unmodifiableSet;
60+
import static org.elasticsearch.common.util.set.Sets.newHashSet;
5361
import static org.hamcrest.Matchers.containsInAnyOrder;
5462
import static org.hamcrest.Matchers.endsWith;
5563
import static org.hamcrest.Matchers.equalTo;
@@ -255,12 +263,18 @@ protected void assertIndexHasCorrectProperties(
255263
assertThat(thisIndexStats.getTotal().getDocs().getCount(), is((long) INDEX_DOC_COUNT));
256264
}
257265

258-
public static class TestPlugin extends Plugin implements SystemIndexPlugin {
266+
public static class TestPlugin extends Plugin implements SystemIndexPlugin, ActionPlugin {
259267
public final AtomicReference<Function<ClusterState, Map<String, Object>>> preMigrationHook = new AtomicReference<>();
260268
public final AtomicReference<BiConsumer<ClusterState, Map<String, Object>>> postMigrationHook = new AtomicReference<>();
269+
private final BlockingActionFilter blockingActionFilter;
261270

262271
public TestPlugin() {
272+
blockingActionFilter = new BlockingActionFilter();
273+
}
263274

275+
@Override
276+
public List<ActionFilter> getActionFilters() {
277+
return singletonList(blockingActionFilter);
264278
}
265279

266280
@Override
@@ -299,5 +313,26 @@ public void indicesMigrationComplete(
299313
postMigrationHook.get().accept(clusterService.state(), preUpgradeMetadata);
300314
listener.onResponse(true);
301315
}
316+
317+
public static class BlockingActionFilter extends org.elasticsearch.action.support.ActionFilter.Simple {
318+
private Set<String> blockedActions = emptySet();
319+
320+
@Override
321+
protected boolean apply(String action, ActionRequest request, ActionListener<?> listener) {
322+
if (blockedActions.contains(action)) {
323+
throw new ElasticsearchException("force exception on [" + action + "]");
324+
}
325+
return true;
326+
}
327+
328+
@Override
329+
public int order() {
330+
return 0;
331+
}
332+
333+
public void blockActions(String... actions) {
334+
blockedActions = unmodifiableSet(newHashSet(actions));
335+
}
336+
}
302337
}
303338
}

modules/reindex/src/internalClusterTest/java/org/elasticsearch/migration/FeatureMigrationIT.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@
1717
import org.elasticsearch.action.admin.cluster.migration.PostFeatureUpgradeRequest;
1818
import org.elasticsearch.action.admin.cluster.migration.PostFeatureUpgradeResponse;
1919
import org.elasticsearch.action.admin.indices.alias.Alias;
20+
import org.elasticsearch.action.admin.indices.alias.TransportIndicesAliasesAction;
2021
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
2122
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
2223
import org.elasticsearch.action.admin.indices.template.put.PutComponentTemplateAction;
2324
import org.elasticsearch.action.admin.indices.template.put.TransportPutComposableIndexTemplateAction;
2425
import org.elasticsearch.action.search.SearchRequestBuilder;
26+
import org.elasticsearch.action.support.ActionFilter;
27+
import org.elasticsearch.action.support.ActionFilters;
2528
import org.elasticsearch.action.support.ActiveShardCount;
2629
import org.elasticsearch.cluster.ClusterState;
2730
import org.elasticsearch.cluster.ClusterStateUpdateTask;
@@ -36,10 +39,12 @@
3639
import org.elasticsearch.common.settings.Settings;
3740
import org.elasticsearch.index.query.QueryBuilders;
3841
import org.elasticsearch.indices.SystemIndexDescriptor;
42+
import org.elasticsearch.migration.AbstractFeatureMigrationIntegTest.TestPlugin.BlockingActionFilter;
3943
import org.elasticsearch.painless.PainlessPlugin;
4044
import org.elasticsearch.plugins.Plugin;
4145
import org.elasticsearch.plugins.SystemIndexPlugin;
4246
import org.elasticsearch.reindex.ReindexPlugin;
47+
import org.elasticsearch.test.InternalTestCluster;
4348
import org.elasticsearch.upgrades.FeatureMigrationResults;
4449
import org.elasticsearch.upgrades.SingleFeatureMigrationResult;
4550

@@ -272,6 +277,60 @@ public void testMigrateIndexWithWriteBlock() throws Exception {
272277
});
273278
}
274279

280+
@AwaitsFix(bugUrl = "ES-10666") // This test uncovered an existing issue
281+
public void testIndexBlockIsRemovedWhenAliasRequestFails() throws Exception {
282+
createSystemIndexForDescriptor(INTERNAL_UNMANAGED);
283+
ensureGreen();
284+
285+
// Block the alias request to simulate a failure
286+
InternalTestCluster internalTestCluster = internalCluster();
287+
ActionFilters actionFilters = internalTestCluster.getInstance(ActionFilters.class, internalTestCluster.getMasterName());
288+
BlockingActionFilter blockingActionFilter = null;
289+
for (ActionFilter filter : actionFilters.filters()) {
290+
if (filter instanceof BlockingActionFilter) {
291+
blockingActionFilter = (BlockingActionFilter) filter;
292+
break;
293+
}
294+
}
295+
assertNotNull("BlockingActionFilter should exist", blockingActionFilter);
296+
blockingActionFilter.blockActions(TransportIndicesAliasesAction.NAME);
297+
298+
// Start the migration
299+
client().execute(PostFeatureUpgradeAction.INSTANCE, new PostFeatureUpgradeRequest(TEST_REQUEST_TIMEOUT)).get();
300+
301+
// Wait till the migration fails
302+
assertBusy(() -> {
303+
GetFeatureUpgradeStatusResponse statusResp = client().execute(
304+
GetFeatureUpgradeStatusAction.INSTANCE,
305+
new GetFeatureUpgradeStatusRequest(TEST_REQUEST_TIMEOUT)
306+
).get();
307+
logger.info(Strings.toString(statusResp));
308+
assertThat(statusResp.getUpgradeStatus(), equalTo(GetFeatureUpgradeStatusResponse.UpgradeStatus.ERROR));
309+
});
310+
311+
// Get the settings to see if the write block was removed
312+
var allsettings = client().admin().indices().prepareGetSettings(INTERNAL_UNMANAGED.getIndexPattern()).get().getIndexToSettings();
313+
var internalUnmanagedOldIndexSettings = allsettings.get(".int-unman-old");
314+
var writeBlock = internalUnmanagedOldIndexSettings.get(IndexMetadata.INDEX_BLOCKS_WRITE_SETTING.getKey());
315+
assertThat("Write block on old index should be removed on migration ERROR status", writeBlock, equalTo("false"));
316+
317+
// Unblock the alias request
318+
blockingActionFilter.blockActions();
319+
320+
// Retry the migration
321+
client().execute(PostFeatureUpgradeAction.INSTANCE, new PostFeatureUpgradeRequest(TEST_REQUEST_TIMEOUT)).get();
322+
323+
// Ensure that the migration is successful after the alias request is unblocked
324+
assertBusy(() -> {
325+
GetFeatureUpgradeStatusResponse statusResp = client().execute(
326+
GetFeatureUpgradeStatusAction.INSTANCE,
327+
new GetFeatureUpgradeStatusRequest(TEST_REQUEST_TIMEOUT)
328+
).get();
329+
logger.info(Strings.toString(statusResp));
330+
assertThat(statusResp.getUpgradeStatus(), equalTo(GetFeatureUpgradeStatusResponse.UpgradeStatus.NO_MIGRATION_NEEDED));
331+
});
332+
}
333+
275334
public void testMigrationWillRunAfterError() throws Exception {
276335
createSystemIndexForDescriptor(INTERNAL_MANAGED);
277336

server/src/main/java/org/elasticsearch/action/admin/indices/alias/IndicesAliasesResponse.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,17 @@ public boolean hasErrors() {
7777
return errors;
7878
}
7979

80+
/**
81+
* Get a list of all errors from the response. If there are no errors, an empty list is returned.
82+
*/
83+
public List<ElasticsearchException> getErrors() {
84+
if (errors == false) {
85+
return List.of();
86+
} else {
87+
return actionResults.stream().filter(a -> a.getError() != null).map(AliasActionResult::getError).toList();
88+
}
89+
}
90+
8091
/**
8192
* Build a response from a list of action results. Sets the errors boolean based
8293
* on whether an of the individual results contain an error.
@@ -165,6 +176,13 @@ public static AliasActionResult buildSuccess(List<String> indices, AliasActions
165176
return new AliasActionResult(indices, action, null);
166177
}
167178

179+
/**
180+
* The error result if the action failed, null if the action succeeded.
181+
*/
182+
public ElasticsearchException getError() {
183+
return error;
184+
}
185+
168186
private int getStatus() {
169187
return error == null ? 200 : error.status().getStatus();
170188
}

server/src/main/java/org/elasticsearch/upgrades/SystemIndexMigrator.java

Lines changed: 62 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
import org.elasticsearch.action.ActionListener;
1616
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
1717
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequestBuilder;
18+
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse;
1819
import org.elasticsearch.action.admin.indices.create.CreateIndexClusterStateUpdateRequest;
20+
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockRequest;
1921
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsClusterStateUpdateRequest;
2022
import org.elasticsearch.action.support.ActiveShardCount;
2123
import org.elasticsearch.action.support.master.AcknowledgedResponse;
@@ -32,7 +34,6 @@
3234
import org.elasticsearch.cluster.metadata.MetadataIndexTemplateService;
3335
import org.elasticsearch.cluster.metadata.MetadataUpdateSettingsService;
3436
import org.elasticsearch.cluster.service.ClusterService;
35-
import org.elasticsearch.common.CheckedBiConsumer;
3637
import org.elasticsearch.common.Strings;
3738
import org.elasticsearch.common.settings.IndexScopedSettings;
3839
import org.elasticsearch.common.settings.Settings;
@@ -59,6 +60,7 @@
5960
import java.util.stream.Collectors;
6061

6162
import static org.elasticsearch.action.admin.cluster.migration.TransportGetFeatureUpgradeStatusAction.NO_UPGRADE_REQUIRED_INDEX_VERSION;
63+
import static org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock.WRITE;
6264
import static org.elasticsearch.cluster.metadata.IndexMetadata.State.CLOSE;
6365
import static org.elasticsearch.core.Strings.format;
6466

@@ -448,12 +450,33 @@ private void migrateSingleIndex(ClusterState clusterState, Consumer<BulkByScroll
448450
logAndThrowExceptionForFailures(bulkByScrollResponse)
449451
);
450452
} else {
451-
// Successful completion of reindexing - remove read only and delete old index
452-
setWriteBlock(
453-
oldIndex,
454-
false,
455-
delegate2.delegateFailureAndWrap(setAliasAndRemoveOldIndex(migrationInfo, bulkByScrollResponse))
456-
);
453+
// Successful completion of reindexing. Now we need to set the alias and remove the old index.
454+
setAliasAndRemoveOldIndex(migrationInfo, ActionListener.wrap(aliasesResponse -> {
455+
if (aliasesResponse.hasErrors()) {
456+
var e = new ElasticsearchException("Aliases request had errors");
457+
for (var error : aliasesResponse.getErrors()) {
458+
e.addSuppressed(error);
459+
}
460+
throw e;
461+
}
462+
logger.info(
463+
"Successfully migrated old index [{}] to new index [{}] from feature [{}]",
464+
oldIndexName,
465+
migrationInfo.getNextIndexName(),
466+
migrationInfo.getFeatureName()
467+
);
468+
delegate2.onResponse(bulkByScrollResponse);
469+
}, e -> {
470+
logger.error(
471+
() -> format(
472+
"An error occurred while changing aliases and removing the old index [%s] from feature [%s]",
473+
oldIndexName,
474+
migrationInfo.getFeatureName()
475+
),
476+
e
477+
);
478+
removeReadOnlyBlockOnReindexFailure(oldIndex, delegate2, e);
479+
}));
457480
}
458481
}, e -> {
459482
logger.error(
@@ -511,10 +534,7 @@ private void createIndex(SystemIndexMigrationInfo migrationInfo, ActionListener<
511534
);
512535
}
513536

514-
private CheckedBiConsumer<ActionListener<BulkByScrollResponse>, AcknowledgedResponse, Exception> setAliasAndRemoveOldIndex(
515-
SystemIndexMigrationInfo migrationInfo,
516-
BulkByScrollResponse bulkByScrollResponse
517-
) {
537+
private void setAliasAndRemoveOldIndex(SystemIndexMigrationInfo migrationInfo, ActionListener<IndicesAliasesResponse> listener) {
518538
final IndicesAliasesRequestBuilder aliasesRequest = migrationInfo.createClient(baseClient).admin().indices().prepareAliases();
519539
aliasesRequest.removeIndex(migrationInfo.getCurrentIndexName());
520540
aliasesRequest.addAlias(migrationInfo.getNextIndexName(), migrationInfo.getCurrentIndexName());
@@ -533,30 +553,42 @@ private CheckedBiConsumer<ActionListener<BulkByScrollResponse>, AcknowledgedResp
533553
);
534554
});
535555

536-
// Technically this callback might have a different cluster state, but it shouldn't matter - these indices shouldn't be changing
537-
// while we're trying to migrate them.
538-
return (listener, unsetReadOnlyResponse) -> aliasesRequest.execute(
539-
listener.delegateFailureAndWrap((l, deleteIndexResponse) -> l.onResponse(bulkByScrollResponse))
540-
);
556+
aliasesRequest.execute(listener);
541557
}
542558

543559
/**
544-
* Makes the index readonly if it's not set as a readonly yet
560+
* Sets the write block on the index to the given value.
545561
*/
546562
private void setWriteBlock(Index index, boolean readOnlyValue, ActionListener<AcknowledgedResponse> listener) {
547-
final Settings readOnlySettings = Settings.builder().put(IndexMetadata.INDEX_BLOCKS_WRITE_SETTING.getKey(), readOnlyValue).build();
548-
549-
metadataUpdateSettingsService.updateSettings(
550-
new UpdateSettingsClusterStateUpdateRequest(
551-
MasterNodeRequest.INFINITE_MASTER_NODE_TIMEOUT,
552-
TimeValue.ZERO,
553-
readOnlySettings,
554-
UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE,
555-
UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT,
556-
index
557-
),
558-
listener
559-
);
563+
if (readOnlyValue) {
564+
// Setting the Block with an AddIndexBlockRequest ensures all shards have accounted for the block and all
565+
// in-flight writes are completed before returning.
566+
baseClient.admin()
567+
.indices()
568+
.addBlock(
569+
new AddIndexBlockRequest(WRITE, index.getName()).masterNodeTimeout(MasterNodeRequest.INFINITE_MASTER_NODE_TIMEOUT),
570+
listener.delegateFailureAndWrap((l, response) -> {
571+
if (response.isAcknowledged() == false) {
572+
throw new ElasticsearchException("Failed to acknowledge read-only block index request");
573+
}
574+
l.onResponse(response);
575+
})
576+
);
577+
} else {
578+
// The only way to remove a Block is via a settings update.
579+
final Settings readOnlySettings = Settings.builder().put(IndexMetadata.INDEX_BLOCKS_WRITE_SETTING.getKey(), false).build();
580+
metadataUpdateSettingsService.updateSettings(
581+
new UpdateSettingsClusterStateUpdateRequest(
582+
MasterNodeRequest.INFINITE_MASTER_NODE_TIMEOUT,
583+
TimeValue.ZERO,
584+
readOnlySettings,
585+
UpdateSettingsClusterStateUpdateRequest.OnExisting.OVERWRITE,
586+
UpdateSettingsClusterStateUpdateRequest.OnStaticSetting.REJECT,
587+
index
588+
),
589+
listener
590+
);
591+
}
560592
}
561593

562594
private void reindex(SystemIndexMigrationInfo migrationInfo, ActionListener<BulkByScrollResponse> listener) {

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/privilege/SystemPrivilege.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77
package org.elasticsearch.xpack.core.security.authz.privilege;
88

9+
import org.elasticsearch.action.admin.indices.readonly.TransportAddIndexBlockAction;
910
import org.elasticsearch.action.search.TransportSearchShardsAction;
1011
import org.elasticsearch.index.seqno.RetentionLeaseActions;
1112
import org.elasticsearch.index.seqno.RetentionLeaseBackgroundSyncAction;
@@ -38,12 +39,13 @@ public final class SystemPrivilege extends Privilege {
3839
RetentionLeaseActions.ADD.name() + "*", // needed for CCR to add retention leases
3940
RetentionLeaseActions.REMOVE.name() + "*", // needed for CCR to remove retention leases
4041
RetentionLeaseActions.RENEW.name() + "*", // needed for CCR to renew retention leases
41-
"indices:admin/settings/update", // needed for DiskThresholdMonitor.markIndicesReadOnly
42+
"indices:admin/settings/update", // needed for: DiskThresholdMonitor.markIndicesReadOnly, SystemIndexMigrator
4243
CompletionPersistentTaskAction.INSTANCE.name(), // needed for ShardFollowTaskCleaner
4344
"indices:data/write/*", // needed for SystemIndexMigrator
4445
"indices:data/read/*", // needed for SystemIndexMigrator
4546
"indices:admin/refresh", // needed for SystemIndexMigrator
4647
"indices:admin/aliases", // needed for SystemIndexMigrator
48+
TransportAddIndexBlockAction.TYPE.name() + "*", // needed for SystemIndexMigrator
4749
TransportSearchShardsAction.TYPE.name(), // added so this API can be called with the system user by other APIs
4850
ActionTypes.RELOAD_REMOTE_CLUSTER_CREDENTIALS_ACTION.name() // needed for Security plugin reload of remote cluster credentials
4951
);

0 commit comments

Comments
 (0)