Skip to content

Commit ff3ec37

Browse files
committed
Make IndexLifecycleService project-aware
Updates the background service of ILM to process all projects in the cluster.
1 parent c3255ad commit ff3ec37

File tree

3 files changed

+97
-105
lines changed

3 files changed

+97
-105
lines changed

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleService.java

Lines changed: 67 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import org.elasticsearch.cluster.ProjectState;
1919
import org.elasticsearch.cluster.metadata.IndexMetadata;
2020
import org.elasticsearch.cluster.metadata.LifecycleExecutionState;
21-
import org.elasticsearch.cluster.metadata.Metadata;
2221
import org.elasticsearch.cluster.metadata.ProjectMetadata;
2322
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
2423
import org.elasticsearch.cluster.service.ClusterService;
@@ -28,7 +27,6 @@
2827
import org.elasticsearch.common.scheduler.TimeValueSchedule;
2928
import org.elasticsearch.common.settings.Settings;
3029
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
31-
import org.elasticsearch.core.FixForMultiProject;
3230
import org.elasticsearch.core.Nullable;
3331
import org.elasticsearch.core.SuppressForbidden;
3432
import org.elasticsearch.core.TimeValue;
@@ -183,9 +181,12 @@ public ProjectMetadata moveIndicesToPreviouslyFailedStep(ProjectMetadata current
183181
void onMaster(ClusterState clusterState) {
184182
maybeScheduleJob();
185183

186-
// TODO multi-project: this probably needs a per-project iteration
187-
@FixForMultiProject
188-
final ProjectState state = clusterState.projectState(Metadata.DEFAULT_PROJECT_ID);
184+
for (var projectId : clusterState.metadata().projects().keySet()) {
185+
onMaster(clusterState.projectState(projectId));
186+
}
187+
}
188+
189+
void onMaster(ProjectState state) {
189190
final ProjectMetadata projectMetadata = state.metadata();
190191
final IndexLifecycleMetadata currentMetadata = projectMetadata.custom(IndexLifecycleMetadata.TYPE);
191192
if (currentMetadata != null) {
@@ -416,20 +417,21 @@ public void applyClusterState(ClusterChangedEvent event) {
416417
return;
417418
}
418419

419-
@FixForMultiProject
420-
final IndexLifecycleMetadata ilmMetadata = event.state()
421-
.metadata()
422-
.getProject(Metadata.DEFAULT_PROJECT_ID)
423-
.custom(IndexLifecycleMetadata.TYPE);
424-
if (ilmMetadata == null) {
425-
return;
426-
}
427-
final IndexLifecycleMetadata previousIlmMetadata = event.previousState()
428-
.metadata()
429-
.getProject(Metadata.DEFAULT_PROJECT_ID)
430-
.custom(IndexLifecycleMetadata.TYPE);
431-
if (event.previousState().nodes().isLocalNodeElectedMaster() == false || ilmMetadata != previousIlmMetadata) {
432-
policyRegistry.update(ilmMetadata);
420+
// We're updating the policy registry cache here, which doesn't actually work with multiple projects because the policies from one
421+
// project would overwrite the polices from another project. However, since we're not planning on running ILM in a multi-project
422+
// cluster, we can ignore this.
423+
for (var project : event.state().metadata().projects().values()) {
424+
final IndexLifecycleMetadata ilmMetadata = project.custom(IndexLifecycleMetadata.TYPE);
425+
if (ilmMetadata == null) {
426+
continue;
427+
}
428+
final var previousProject = event.previousState().metadata().projects().get(project.id());
429+
final IndexLifecycleMetadata previousIlmMetadata = previousProject == null
430+
? null
431+
: previousProject.custom(IndexLifecycleMetadata.TYPE);
432+
if (event.previousState().nodes().isLocalNodeElectedMaster() == false || ilmMetadata != previousIlmMetadata) {
433+
policyRegistry.update(ilmMetadata);
434+
}
433435
}
434436
}
435437

@@ -461,10 +463,13 @@ public boolean policyExists(String policyId) {
461463
* @param clusterState the current cluster state
462464
* @param fromClusterStateChange whether things are triggered from the cluster-state-listener or the scheduler
463465
*/
464-
@FixForMultiProject
465466
void triggerPolicies(ClusterState clusterState, boolean fromClusterStateChange) {
466-
@FixForMultiProject
467-
final var state = clusterState.projectState(Metadata.DEFAULT_PROJECT_ID);
467+
for (var projectId : clusterState.metadata().projects().keySet()) {
468+
triggerPolicies(clusterState.projectState(projectId), fromClusterStateChange);
469+
}
470+
}
471+
472+
void triggerPolicies(ProjectState state, boolean fromClusterStateChange) {
468473
final var projectMetadata = state.metadata();
469474
IndexLifecycleMetadata currentMetadata = projectMetadata.custom(IndexLifecycleMetadata.TYPE);
470475

@@ -585,51 +590,54 @@ PolicyStepsRegistry getPolicyRegistry() {
585590
return policyRegistry;
586591
}
587592

588-
static Set<String> indicesOnShuttingDownNodesInDangerousStep(ClusterState state, String nodeId) {
593+
static boolean indicesOnShuttingDownNodesInDangerousStep(ClusterState state, String nodeId) {
589594
final Set<String> shutdownNodes = PluginShutdownService.shutdownTypeNodes(
590595
state,
591596
SingleNodeShutdownMetadata.Type.REMOVE,
592597
SingleNodeShutdownMetadata.Type.SIGTERM,
593598
SingleNodeShutdownMetadata.Type.REPLACE
594599
);
595600
if (shutdownNodes.isEmpty()) {
596-
return Set.of();
601+
return true;
597602
}
598603

599-
// Returning a set of strings will cause weird behavior with multiple projects
600-
@FixForMultiProject
601-
Set<String> indicesPreventingShutdown = state.metadata()
602-
.projects()
603-
.values()
604-
.stream()
605-
.flatMap(project -> project.indices().entrySet().stream())
606-
// Filter out to only consider managed indices
607-
.filter(indexToMetadata -> Strings.hasText(indexToMetadata.getValue().getLifecyclePolicyName()))
608-
// Only look at indices in the shrink action
609-
.filter(indexToMetadata -> ShrinkAction.NAME.equals(indexToMetadata.getValue().getLifecycleExecutionState().action()))
610-
// Only look at indices on a step that may potentially be dangerous if we removed the node
611-
.filter(indexToMetadata -> {
612-
String step = indexToMetadata.getValue().getLifecycleExecutionState().step();
613-
return SetSingleNodeAllocateStep.NAME.equals(step)
614-
|| CheckShrinkReadyStep.NAME.equals(step)
615-
|| ShrinkStep.NAME.equals(step)
616-
|| ShrunkShardsAllocatedStep.NAME.equals(step);
617-
})
618-
// Only look at indices where the node picked for the shrink is the node marked as shutting down
619-
.filter(indexToMetadata -> {
620-
String nodePicked = indexToMetadata.getValue()
621-
.getSettings()
622-
.get(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING.getKey() + "_id");
623-
return nodeId.equals(nodePicked);
624-
})
625-
.map(Map.Entry::getKey)
626-
.collect(Collectors.toSet());
627-
logger.trace(
628-
"with nodes marked as shutdown for removal {}, indices {} are preventing shutdown",
629-
shutdownNodes,
630-
indicesPreventingShutdown
631-
);
632-
return indicesPreventingShutdown;
604+
boolean result = true;
605+
for (var project : state.metadata().projects().values()) {
606+
Set<String> indicesPreventingShutdown = project.indices()
607+
.entrySet()
608+
.stream()
609+
// Filter out to only consider managed indices
610+
.filter(indexToMetadata -> Strings.hasText(indexToMetadata.getValue().getLifecyclePolicyName()))
611+
// Only look at indices in the shrink action
612+
.filter(indexToMetadata -> ShrinkAction.NAME.equals(indexToMetadata.getValue().getLifecycleExecutionState().action()))
613+
// Only look at indices on a step that may potentially be dangerous if we removed the node
614+
.filter(indexToMetadata -> {
615+
String step = indexToMetadata.getValue().getLifecycleExecutionState().step();
616+
return SetSingleNodeAllocateStep.NAME.equals(step)
617+
|| CheckShrinkReadyStep.NAME.equals(step)
618+
|| ShrinkStep.NAME.equals(step)
619+
|| ShrunkShardsAllocatedStep.NAME.equals(step);
620+
})
621+
// Only look at indices where the node picked for the shrink is the node marked as shutting down
622+
.filter(indexToMetadata -> {
623+
String nodePicked = indexToMetadata.getValue()
624+
.getSettings()
625+
.get(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_SETTING.getKey() + "_id");
626+
return nodeId.equals(nodePicked);
627+
})
628+
.map(Map.Entry::getKey)
629+
.collect(Collectors.toSet());
630+
logger.trace(
631+
"with nodes marked as shutdown for removal {}, indices {} in project {} are preventing shutdown",
632+
shutdownNodes,
633+
indicesPreventingShutdown,
634+
project.id()
635+
);
636+
if (indicesPreventingShutdown.isEmpty() == false) {
637+
result = false;
638+
}
639+
}
640+
return result;
633641
}
634642

635643
@Override
@@ -641,8 +649,7 @@ public boolean safeToShutdown(String nodeId, SingleNodeShutdownMetadata.Type shu
641649
case REPLACE:
642650
case REMOVE:
643651
case SIGTERM:
644-
Set<String> indices = indicesOnShuttingDownNodesInDangerousStep(clusterService.state(), nodeId);
645-
return indices.isEmpty();
652+
return indicesOnShuttingDownNodesInDangerousStep(clusterService.state(), nodeId);
646653
default:
647654
throw new IllegalArgumentException("unknown shutdown type: " + shutdownType);
648655
}

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleServiceTests.java

Lines changed: 30 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.elasticsearch.cluster.metadata.LifecycleExecutionState;
1919
import org.elasticsearch.cluster.metadata.Metadata;
2020
import org.elasticsearch.cluster.metadata.NodesShutdownMetadata;
21+
import org.elasticsearch.cluster.metadata.ProjectMetadata;
2122
import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata;
2223
import org.elasticsearch.cluster.node.DiscoveryNode;
2324
import org.elasticsearch.cluster.node.DiscoveryNodeUtils;
@@ -169,13 +170,11 @@ public void testStoppedModeSkip() {
169170
.numberOfReplicas(randomIntBetween(0, 5))
170171
.build();
171172
Map<String, IndexMetadata> indices = Map.of(index.getName(), indexMetadata);
172-
Metadata metadata = Metadata.builder()
173+
var project = ProjectMetadata.builder(randomProjectIdOrDefault())
173174
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(policyMap, OperationMode.STOPPED))
174-
.indices(indices)
175-
.persistentSettings(settings(IndexVersion.current()).build())
176-
.build();
175+
.indices(indices);
177176
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
178-
.metadata(metadata)
177+
.putProjectMetadata(project)
179178
.nodes(DiscoveryNodes.builder().localNodeId(nodeId).masterNodeId(nodeId).add(masterNode).build())
180179
.build();
181180
ClusterChangedEvent event = new ClusterChangedEvent("_source", currentState, ClusterState.EMPTY_STATE);
@@ -208,13 +207,11 @@ public void testRequestedStopOnShrink() {
208207
.numberOfReplicas(randomIntBetween(0, 5))
209208
.build();
210209
Map<String, IndexMetadata> indices = Map.of(index.getName(), indexMetadata);
211-
Metadata metadata = Metadata.builder()
210+
var project = ProjectMetadata.builder(randomProjectIdOrDefault())
212211
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(policyMap, OperationMode.STOPPING))
213-
.indices(indices)
214-
.persistentSettings(settings(IndexVersion.current()).build())
215-
.build();
212+
.indices(indices);
216213
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
217-
.metadata(metadata)
214+
.putProjectMetadata(project)
218215
.nodes(DiscoveryNodes.builder().localNodeId(nodeId).masterNodeId(nodeId).add(masterNode).build())
219216
.build();
220217

@@ -264,13 +261,11 @@ private void verifyCanStopWithStep(String stoppableStep) {
264261
.numberOfReplicas(randomIntBetween(0, 5))
265262
.build();
266263
Map<String, IndexMetadata> indices = Map.of(index.getName(), indexMetadata);
267-
Metadata metadata = Metadata.builder()
264+
var project = ProjectMetadata.builder(randomProjectIdOrDefault())
268265
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(policyMap, OperationMode.STOPPING))
269-
.indices(indices)
270-
.persistentSettings(settings(IndexVersion.current()).build())
271-
.build();
266+
.indices(indices);
272267
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
273-
.metadata(metadata)
268+
.putProjectMetadata(project)
274269
.nodes(DiscoveryNodes.builder().localNodeId(nodeId).masterNodeId(nodeId).add(masterNode).build())
275270
.build();
276271

@@ -312,13 +307,11 @@ public void testRequestedStopOnSafeAction() {
312307
.numberOfReplicas(randomIntBetween(0, 5))
313308
.build();
314309
Map<String, IndexMetadata> indices = Map.of(index.getName(), indexMetadata);
315-
Metadata metadata = Metadata.builder()
310+
var project = ProjectMetadata.builder(randomProjectIdOrDefault())
316311
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(policyMap, OperationMode.STOPPING))
317-
.indices(indices)
318-
.persistentSettings(settings(IndexVersion.current()).build())
319-
.build();
312+
.indices(indices);
320313
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
321-
.metadata(metadata)
314+
.putProjectMetadata(project)
322315
.nodes(DiscoveryNodes.builder().localNodeId(nodeId).masterNodeId(nodeId).add(masterNode).build())
323316
.build();
324317

@@ -429,11 +422,9 @@ public void doTestExceptionStillProcessesOtherIndices(boolean useOnMaster) {
429422
.build();
430423
Map<String, IndexMetadata> indices = Map.of(index1.getName(), i1indexMetadata, index2.getName(), i2indexMetadata);
431424

432-
Metadata metadata = Metadata.builder()
425+
var project = ProjectMetadata.builder(randomProjectIdOrDefault())
433426
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(policyMap, OperationMode.RUNNING))
434-
.indices(indices)
435-
.persistentSettings(settings(IndexVersion.current()).build())
436-
.build();
427+
.indices(indices);
437428

438429
Settings settings = Settings.builder().put(LifecycleSettings.LIFECYCLE_POLL_INTERVAL, "1s").build();
439430
var clusterSettings = new ClusterSettings(
@@ -443,7 +434,7 @@ public void doTestExceptionStillProcessesOtherIndices(boolean useOnMaster) {
443434
ClusterService clusterService = ClusterServiceUtils.createClusterService(threadPool, clusterSettings);
444435
DiscoveryNode node = clusterService.localNode();
445436
ClusterState currentState = ClusterState.builder(ClusterName.DEFAULT)
446-
.metadata(metadata)
437+
.putProjectMetadata(project)
447438
.nodes(DiscoveryNodes.builder().add(node).masterNodeId(node.getId()).localNodeId(node.getId()))
448439
.build();
449440
ClusterServiceUtils.setState(clusterService, currentState);
@@ -539,9 +530,10 @@ public void testIndicesOnShuttingDownNodesInDangerousStep() {
539530
SingleNodeShutdownMetadata.Type.SIGTERM,
540531
SingleNodeShutdownMetadata.Type.REPLACE
541532
)) {
542-
ClusterState state = ClusterState.builder(ClusterName.DEFAULT).build();
543-
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "regular_node"), equalTo(Set.of()));
544-
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"), equalTo(Set.of()));
533+
final var project = ProjectMetadata.builder(randomProjectIdOrDefault()).build();
534+
ClusterState state = ClusterState.builder(ClusterName.DEFAULT).putProjectMetadata(project).build();
535+
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "regular_node"), equalTo(true));
536+
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"), equalTo(true));
545537

546538
IndexMetadata nonDangerousIndex = IndexMetadata.builder("no_danger")
547539
.settings(settings(IndexVersion.current()).put(LifecycleSettings.LIFECYCLE_NAME, "mypolicy"))
@@ -583,14 +575,12 @@ public void testIndicesOnShuttingDownNodesInDangerousStep() {
583575
.build();
584576
Map<String, IndexMetadata> indices = Map.of("no_danger", nonDangerousIndex, "danger", dangerousIndex);
585577

586-
Metadata metadata = Metadata.builder()
587-
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(Map.of(), OperationMode.RUNNING))
588-
.indices(indices)
589-
.persistentSettings(settings(IndexVersion.current()).build())
590-
.build();
591-
592578
state = ClusterState.builder(ClusterName.DEFAULT)
593-
.metadata(metadata)
579+
.putProjectMetadata(
580+
ProjectMetadata.builder(project)
581+
.putCustom(IndexLifecycleMetadata.TYPE, new IndexLifecycleMetadata(Map.of(), OperationMode.RUNNING))
582+
.indices(indices)
583+
)
594584
.nodes(
595585
DiscoveryNodes.builder()
596586
.localNodeId(nodeId)
@@ -613,8 +603,8 @@ public void testIndicesOnShuttingDownNodesInDangerousStep() {
613603
.build();
614604

615605
// No danger yet, because no node is shutting down
616-
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "regular_node"), equalTo(Set.of()));
617-
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"), equalTo(Set.of()));
606+
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "regular_node"), equalTo(true));
607+
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"), equalTo(true));
618608

619609
state = ClusterState.builder(state)
620610
.metadata(
@@ -638,12 +628,12 @@ public void testIndicesOnShuttingDownNodesInDangerousStep() {
638628
)
639629
.build();
640630

641-
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "regular_node"), equalTo(Set.of()));
631+
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "regular_node"), equalTo(true));
642632
// No danger, because this is a "RESTART" type shutdown
643633
assertThat(
644634
"restart type shutdowns are not considered dangerous",
645635
IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"),
646-
equalTo(Set.of())
636+
equalTo(true)
647637
);
648638

649639
final String targetNodeName = type == SingleNodeShutdownMetadata.Type.REPLACE ? randomAlphaOfLengthBetween(10, 20) : null;
@@ -673,7 +663,7 @@ public void testIndicesOnShuttingDownNodesInDangerousStep() {
673663
.build();
674664

675665
// The dangerous index should be calculated as being in danger now
676-
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"), equalTo(Set.of("danger")));
666+
assertThat(IndexLifecycleService.indicesOnShuttingDownNodesInDangerousStep(state, "shutdown_node"), equalTo(false));
677667
}
678668
}
679669
}

x-pack/qa/multi-project/xpack-rest-tests-with-multiple-projects/build.gradle

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,7 @@ tasks.named("yamlRestTest").configure {
4848
'^esql/191_lookup_join_text/*',
4949
'^esql/192_lookup_join_on_aliases/*',
5050
'^health/10_usage/*',
51-
'^ilm/10_basic/Test Undeletable Policy In Use',
52-
'^ilm/20_move_to_step/*',
53-
'^ilm/30_retry/*',
5451
'^ilm/60_operation_mode/*',
55-
'^ilm/60_remove_policy_for_index/*',
56-
'^ilm/70_downsampling/*',
5752
'^ilm/80_health/*',
5853
'^logsdb/10_usage/*',
5954
'^migrate/10_reindex/*',

0 commit comments

Comments
 (0)