Skip to content

Commit dc726e6

Browse files
authored
Allow ILM to transition to implicit cached steps (#91779)
ILM tries to honour the cached phase however, some steps are implicit (e.g. injected actions or the terminal policy/phase step) Currently, ILM would throw an exception if the currently cached phase was removed: ``` step [{"phase":"warm","action":"complete","name":"complete"}] for index [index] with policy [my-policy] does not exist ``` ILM would currently also throw an exception if the next step was an implicit one and the phase was removed from the underlying policy e.g. if the index is on the `migrate` step and looking to transition to the `check-migration` step, whilt the `warm` phase doesn't exist in the policy anymore ``` step [{"phase":"warm","action":"migrate","name":"check-migration"}] for index [index] with policy [my-policy] does not exist ``` This fixes these scenarios by enhancing the `PolicyStepsRegistry#parseStepKeysFromPhase` method to compute all the steps in the phase (including all implicit steps)
1 parent 7166af0 commit dc726e6

File tree

6 files changed

+168
-13
lines changed

6 files changed

+168
-13
lines changed

docs/changelog/91779.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 91779
2+
summary: Allow ILM to transition to implicit cached steps
3+
area: ILM+SLM
4+
type: bug
5+
issues:
6+
- 91749

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/PhaseCacheManagement.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ public static boolean isIndexPhaseDefinitionUpdatable(
263263
* information, returns null.
264264
*/
265265
@Nullable
266-
public static Set<Step.StepKey> readStepKeys(
266+
static Set<Step.StepKey> readStepKeys(
267267
final NamedXContentRegistry xContentRegistry,
268268
final Client client,
269269
final String phaseDef,

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@ public static void validateTransition(
8585
}
8686

8787
final Set<Step.StepKey> cachedStepKeys = stepRegistry.parseStepKeysFromPhase(
88-
lifecycleState.phaseDefinition(),
89-
lifecycleState.phase()
88+
policyName,
89+
lifecycleState.phase(),
90+
lifecycleState.phaseDefinition()
9091
);
9192
boolean isNewStepCached = cachedStepKeys != null && cachedStepKeys.contains(newStepKey);
9293

x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/PolicyStepsRegistry.java

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import org.elasticsearch.xpack.core.ilm.LifecyclePolicy;
3434
import org.elasticsearch.xpack.core.ilm.LifecyclePolicyMetadata;
3535
import org.elasticsearch.xpack.core.ilm.Phase;
36-
import org.elasticsearch.xpack.core.ilm.PhaseCacheManagement;
3736
import org.elasticsearch.xpack.core.ilm.PhaseExecutionInfo;
3837
import org.elasticsearch.xpack.core.ilm.Step;
3938
import org.elasticsearch.xpack.core.ilm.TerminalPolicyStep;
@@ -42,12 +41,14 @@
4241
import java.util.HashMap;
4342
import java.util.LinkedHashMap;
4443
import java.util.List;
44+
import java.util.Locale;
4545
import java.util.Map;
46-
import java.util.Optional;
46+
import java.util.Objects;
4747
import java.util.Set;
4848
import java.util.SortedMap;
4949
import java.util.TreeMap;
5050
import java.util.concurrent.ConcurrentHashMap;
51+
import java.util.stream.Collectors;
5152

5253
public class PolicyStepsRegistry {
5354
private static final Logger logger = LogManager.getLogger(PolicyStepsRegistry.class);
@@ -245,13 +246,50 @@ public Step.StepKey getFirstStepForPhaseAndAction(ClusterState state, Index inde
245246

246247
/*
247248
* Parses the step keys from the {@code phaseDef} for the given phase.
249+
* ILM makes use of some implicit steps that belong to actions that we automatically inject
250+
* (eg. unfollow and migrate) or special purpose steps like the phase `complete` step.
251+
*
252+
* The {@code phaseDef} is *mostly* a valid json we store in the lifecycle execution state. However,
253+
* we have a few of exceptional cases:
254+
* - null is treated as the `new` phase (see {@code InitializePolicyContextStep})
255+
* - the `new` phase is not stored as json but ... "new"
256+
* - there's a legacy step, the {@code TerminalPolicyStep} which is also not stored as json but as "completed"
257+
* (note: this step exists only for BWC reasons as these days we move to the {@code PhaseCompleteStep} when reaching
258+
* the end of the phase)
259+
*
260+
* This method returns **all** the steps that are part of the phase definition including the implicit steps.
261+
*
248262
* Returns null if there's a parsing error.
249263
*/
250264
@Nullable
251-
public Set<Step.StepKey> parseStepKeysFromPhase(String phaseDef, String currentPhase) {
252-
return PhaseCacheManagement.readStepKeys(xContentRegistry, client, phaseDef, currentPhase, licenseState);
265+
public Set<Step.StepKey> parseStepKeysFromPhase(String policy, String currentPhase, String phaseDef) {
266+
try {
267+
String phaseDefNonNull = Objects.requireNonNullElse(phaseDef, InitializePolicyContextStep.INITIALIZATION_PHASE);
268+
return parseStepsFromPhase(policy, currentPhase, phaseDefNonNull).stream().map(Step::getKey).collect(Collectors.toSet());
269+
} catch (IOException e) {
270+
logger.trace(
271+
() -> String.format(
272+
Locale.ROOT,
273+
"unable to parse steps for policy [{}], phase [{}], and phase definition [{}]",
274+
policy,
275+
currentPhase,
276+
phaseDef
277+
),
278+
e
279+
);
280+
return null;
281+
}
253282
}
254283

284+
/**
285+
* The {@code phaseDef} is *mostly* a valid json we store in the lifecycle execution state. However,
286+
* we have a few of exceptional cases:
287+
* - null is treated as the `new` phase (see {@code InitializePolicyContextStep})
288+
* - the `new` phase is not stored as json but ... "new"
289+
* - there's a legacy step, the {@code TerminalPolicyStep} which is also not stored as json but as "completed"
290+
* (note: this step exists only for BWC reasons as these days we move to the {@code PhaseCompleteStep} when reaching
291+
* the end of the phase)
292+
*/
255293
private List<Step> parseStepsFromPhase(String policy, String currentPhase, String phaseDef) throws IOException {
256294
final PhaseExecutionInfo phaseExecutionInfo;
257295
LifecyclePolicyMetadata policyMetadata = lifecyclePolicyMap.get(policy);
@@ -341,8 +379,10 @@ public Step getStep(final IndexMetadata indexMetadata, final Step.StepKey stepKe
341379
}
342380

343381
// parse phase steps from the phase definition in the index settings
344-
final String phaseJson = Optional.ofNullable(indexMetadata.getLifecycleExecutionState().phaseDefinition())
345-
.orElse(InitializePolicyContextStep.INITIALIZATION_PHASE);
382+
final String phaseJson = Objects.requireNonNullElse(
383+
indexMetadata.getLifecycleExecutionState().phaseDefinition(),
384+
InitializePolicyContextStep.INITIALIZATION_PHASE
385+
);
346386

347387
final List<Step> phaseSteps;
348388
try {

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,66 @@ public void testValidateTransitionToCachedStepWhenMissingPhaseFromPolicy() {
684684
}
685685
}
686686

687+
public void testValidateTransitionToInjectedMissingStep() {
688+
// we'll test the case when the warm phase was deleted and the next step is an injected one
689+
690+
LifecycleExecutionState.Builder executionState = LifecycleExecutionState.builder()
691+
.setPhase("warm")
692+
.setAction("migrate")
693+
.setStep("migrate")
694+
.setPhaseDefinition("""
695+
{
696+
"policy" : "my-policy",
697+
"phase_definition" : {
698+
"min_age" : "20m",
699+
"actions" : {
700+
"set_priority" : {
701+
"priority" : 150
702+
}
703+
}
704+
},
705+
"version" : 1,
706+
"modified_date_in_millis" : 1578521007076
707+
}""");
708+
709+
IndexMetadata meta = buildIndexMetadata("my-policy", executionState);
710+
711+
try (Client client = new NoOpClient(getTestName())) {
712+
Step.StepKey currentStepKey = new Step.StepKey("warm", MigrateAction.NAME, MigrateAction.NAME);
713+
Step.StepKey nextStepKey = new Step.StepKey("warm", MigrateAction.NAME, DataTierMigrationRoutedStep.NAME);
714+
715+
Step.StepKey waitForRolloverStepKey = new Step.StepKey("hot", RolloverAction.NAME, WaitForRolloverReadyStep.NAME);
716+
Step.StepKey rolloverStepKey = new Step.StepKey("hot", RolloverAction.NAME, RolloverStep.NAME);
717+
Step waitForRolloverReadyStep = new WaitForRolloverReadyStep(
718+
waitForRolloverStepKey,
719+
rolloverStepKey,
720+
client,
721+
null,
722+
null,
723+
null,
724+
1L,
725+
null,
726+
null,
727+
null,
728+
null,
729+
null,
730+
null
731+
);
732+
733+
try {
734+
IndexLifecycleTransition.validateTransition(
735+
meta,
736+
currentStepKey,
737+
nextStepKey,
738+
createOneStepPolicyStepRegistry("my-policy", waitForRolloverReadyStep)
739+
);
740+
} catch (Exception e) {
741+
logger.error(e.getMessage(), e);
742+
fail("validateTransition should not throw exception on valid transitions");
743+
}
744+
}
745+
}
746+
687747
public void testMoveClusterStateToFailedStep() {
688748
String indexName = "my_index";
689749
String policyName = "my_policy";

x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/MoveToNextStepUpdateTaskTests.java

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import org.apache.lucene.util.SetOnce;
1010
import org.elasticsearch.Version;
11+
import org.elasticsearch.client.internal.Client;
1112
import org.elasticsearch.cluster.ClusterName;
1213
import org.elasticsearch.cluster.ClusterState;
1314
import org.elasticsearch.cluster.metadata.IndexMetadata;
@@ -27,20 +28,34 @@
2728
import org.elasticsearch.xpack.core.ilm.Step.StepKey;
2829
import org.junit.Before;
2930

31+
import java.util.ArrayList;
3032
import java.util.Collections;
3133
import java.util.List;
34+
import java.util.Locale;
35+
import java.util.Map;
3236
import java.util.concurrent.atomic.AtomicBoolean;
3337

3438
import static org.elasticsearch.cluster.metadata.LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY;
3539
import static org.hamcrest.Matchers.equalTo;
40+
import static org.mockito.Mockito.mock;
41+
import static org.mockito.Mockito.when;
3642

3743
public class MoveToNextStepUpdateTaskTests extends ESTestCase {
3844

45+
private static final NamedXContentRegistry REGISTRY;
46+
3947
String policy;
4048
ClusterState clusterState;
4149
Index index;
4250
LifecyclePolicy lifecyclePolicy;
4351

52+
static {
53+
try (IndexLifecycle indexLifecycle = new IndexLifecycle(Settings.EMPTY)) {
54+
List<NamedXContentRegistry.Entry> entries = new ArrayList<>(indexLifecycle.getNamedXContent());
55+
REGISTRY = new NamedXContentRegistry(entries);
56+
}
57+
}
58+
4459
@Before
4560
public void setupClusterState() {
4661
policy = randomAlphaOfLength(10);
@@ -75,13 +90,22 @@ public void testExecuteSuccessfullyMoved() throws Exception {
7590
setStateToKey(currentStepKey, now);
7691

7792
AtomicBoolean changed = new AtomicBoolean(false);
93+
Client client = mock(Client.class);
94+
when(client.settings()).thenReturn(Settings.EMPTY);
95+
AlwaysExistingStepRegistry stepRegistry = new AlwaysExistingStepRegistry(client);
96+
stepRegistry.update(
97+
new IndexLifecycleMetadata(
98+
Map.of(policy, new LifecyclePolicyMetadata(lifecyclePolicy, Collections.emptyMap(), 2L, 2L)),
99+
OperationMode.RUNNING
100+
)
101+
);
78102
MoveToNextStepUpdateTask task = new MoveToNextStepUpdateTask(
79103
index,
80104
policy,
81105
currentStepKey,
82106
nextStepKey,
83107
() -> now,
84-
new AlwaysExistingStepRegistry(),
108+
stepRegistry,
85109
state -> changed.set(true)
86110
);
87111
ClusterState newState = task.execute(clusterState);
@@ -140,13 +164,22 @@ public void testExecuteSuccessfulMoveWithInvalidNextStep() throws Exception {
140164
setStateToKey(currentStepKey, now);
141165

142166
SetOnce<Boolean> changed = new SetOnce<>();
167+
Client client = mock(Client.class);
168+
when(client.settings()).thenReturn(Settings.EMPTY);
169+
AlwaysExistingStepRegistry stepRegistry = new AlwaysExistingStepRegistry(client);
170+
stepRegistry.update(
171+
new IndexLifecycleMetadata(
172+
Map.of(policy, new LifecyclePolicyMetadata(lifecyclePolicy, Collections.emptyMap(), 2L, 2L)),
173+
OperationMode.RUNNING
174+
)
175+
);
143176
MoveToNextStepUpdateTask task = new MoveToNextStepUpdateTask(
144177
index,
145178
policy,
146179
currentStepKey,
147180
invalidNextStep,
148181
() -> now,
149-
new AlwaysExistingStepRegistry(),
182+
stepRegistry,
150183
s -> changed.set(true)
151184
);
152185
ClusterState newState = task.execute(clusterState);
@@ -186,7 +219,11 @@ public void testOnFailure() {
186219
private static class AlwaysExistingStepRegistry extends PolicyStepsRegistry {
187220

188221
AlwaysExistingStepRegistry() {
189-
super(new NamedXContentRegistry(Collections.emptyList()), null, null);
222+
this(null);
223+
}
224+
225+
AlwaysExistingStepRegistry(Client client) {
226+
super(REGISTRY, client, null);
190227
}
191228

192229
@Override
@@ -215,7 +252,18 @@ private void setStateToKey(StepKey stepKey, long now) {
215252
lifecycleState.setActionTime(now);
216253
lifecycleState.setStep(stepKey.name());
217254
lifecycleState.setStepTime(now);
218-
lifecycleState.setPhaseDefinition("{\"actions\":{\"TEST_ACTION\":{}}}");
255+
256+
lifecycleState.setPhaseDefinition(String.format(Locale.ROOT, """
257+
{
258+
"policy" : "%s",
259+
"phase_definition" : {
260+
"min_age" : "20m",
261+
"actions" : {
262+
}
263+
},
264+
"version" : 1,
265+
"modified_date_in_millis" : 1578521007076
266+
}""", policy));
219267
clusterState = ClusterState.builder(clusterState)
220268
.metadata(
221269
Metadata.builder(clusterState.getMetadata())

0 commit comments

Comments
 (0)