@@ -145,7 +145,7 @@ public void testIsGreenWhenPoliciesHaveFailedForLessThanWarningThreshold() {
145
145
var clusterState = createClusterStateWith (
146
146
new SnapshotLifecycleMetadata (
147
147
createSlmPolicyWithInvocations (
148
- snapshotInvocation (execTime , execTime + 1000L ),
148
+ snapshotInvocation (randomBoolean () ? null : execTime , execTime + 1000L ),
149
149
snapshotInvocation (null , execTime + window + 1000L ),
150
150
randomLongBetween (0 , 4 )
151
151
),
@@ -173,13 +173,50 @@ public void testIsGreenWhenPoliciesHaveFailedForLessThanWarningThreshold() {
173
173
public void testIsYellowWhenPoliciesHaveFailedForMoreThanWarningThreshold () {
174
174
long execTime = System .currentTimeMillis ();
175
175
long window = TimeUnit .HOURS .toMillis (24 ) + 5000L ; // 24 hours and some extra room.
176
- long failedInvocations = randomLongBetween (5L , Long .MAX_VALUE );
176
+ long failedInvocations1 = randomLongBetween (5L , Long .MAX_VALUE );
177
+ long failedInvocations2 = randomLongBetween (5L , Long .MAX_VALUE );
178
+ long failedInvocations3 = randomLongBetween (5L , Long .MAX_VALUE );
177
179
var clusterState = createClusterStateWith (
178
180
new SnapshotLifecycleMetadata (
179
- createSlmPolicyWithInvocations (
180
- snapshotInvocation (execTime , execTime + 1000L ),
181
- snapshotInvocation (null , execTime + window + 1000L ),
182
- failedInvocations
181
+ Map .of (
182
+ "test-policy" ,
183
+ SnapshotLifecyclePolicyMetadata .builder ()
184
+ .setPolicy (new SnapshotLifecyclePolicy ("policy-id-1" , "test-policy" , "" , "test-repository" , null , null ))
185
+ .setVersion (1L )
186
+ .setModifiedDate (System .currentTimeMillis ())
187
+ .setLastSuccess (snapshotInvocation (execTime , execTime + 1000L ))
188
+ .setLastFailure (snapshotInvocation (null , execTime + window + 1000L ))
189
+ .setInvocationsSinceLastSuccess (failedInvocations1 )
190
+ .build (),
191
+ "test-policy-without-any-success" ,
192
+ SnapshotLifecyclePolicyMetadata .builder ()
193
+ .setPolicy (
194
+ new SnapshotLifecyclePolicy ("policy-id-2" , "test-policy-without-any-success" , "" , "test-repository" , null , null )
195
+ )
196
+ .setVersion (1L )
197
+ .setModifiedDate (System .currentTimeMillis ())
198
+ .setLastSuccess (null )
199
+ .setLastFailure (snapshotInvocation (null , execTime + window + 1000L ))
200
+ .setInvocationsSinceLastSuccess (failedInvocations2 )
201
+ .build (),
202
+ "test-policy-without-success-start-time" ,
203
+ SnapshotLifecyclePolicyMetadata .builder ()
204
+ .setPolicy (
205
+ new SnapshotLifecyclePolicy (
206
+ "policy-id-3" ,
207
+ "test-policy-without-success-start-time" ,
208
+ "" ,
209
+ "test-repository" ,
210
+ null ,
211
+ null
212
+ )
213
+ )
214
+ .setVersion (1L )
215
+ .setModifiedDate (System .currentTimeMillis ())
216
+ .setLastSuccess (snapshotInvocation (null , execTime ))
217
+ .setLastFailure (snapshotInvocation (null , execTime + window + 1000L ))
218
+ .setInvocationsSinceLastSuccess (failedInvocations3 )
219
+ .build ()
183
220
),
184
221
RUNNING ,
185
222
null
@@ -194,15 +231,27 @@ public void testIsYellowWhenPoliciesHaveFailedForMoreThanWarningThreshold() {
194
231
new HealthIndicatorResult (
195
232
NAME ,
196
233
YELLOW ,
197
- "Encountered [1 ] unhealthy snapshot lifecycle management policies." ,
234
+ "Encountered [3 ] unhealthy snapshot lifecycle management policies." ,
198
235
new SimpleHealthIndicatorDetails (
199
236
Map .of (
200
237
"slm_status" ,
201
238
RUNNING ,
202
239
"policies" ,
203
- 1 ,
240
+ 3 ,
204
241
"unhealthy_policies" ,
205
- Map .of ("count" , 1 , "invocations_since_last_success" , Map .of ("test-policy" , failedInvocations ))
242
+ Map .of (
243
+ "count" ,
244
+ 3 ,
245
+ "invocations_since_last_success" ,
246
+ Map .of (
247
+ "test-policy" ,
248
+ failedInvocations1 ,
249
+ "test-policy-without-any-success" ,
250
+ failedInvocations2 ,
251
+ "test-policy-without-success-start-time" ,
252
+ failedInvocations3
253
+ )
254
+ )
206
255
)
207
256
),
208
257
Collections .singletonList (
@@ -218,15 +267,30 @@ public void testIsYellowWhenPoliciesHaveFailedForMoreThanWarningThreshold() {
218
267
List .of (
219
268
new Diagnosis (
220
269
SlmHealthIndicatorService .checkRecentlyFailedSnapshots (
221
- "An automated snapshot policy is unhealthy:\n "
270
+ "Several automated snapshot policies are unhealthy:\n "
222
271
+ "- [test-policy] had ["
223
- + failedInvocations
272
+ + failedInvocations1
224
273
+ "] repeated failures without successful execution since ["
225
274
+ FORMATTER .formatMillis (execTime )
226
- + "]" ,
227
- "Check the snapshot lifecycle policy for detailed failure info:\n - GET /_slm/policy/policy-id?human"
275
+ + "]\n "
276
+ + "- [test-policy-without-any-success] had ["
277
+ + failedInvocations2
278
+ + "] repeated failures without successful execution\n "
279
+ + "- [test-policy-without-success-start-time] had ["
280
+ + failedInvocations3
281
+ + "] repeated failures without successful execution" ,
282
+ "Check the snapshot lifecycle policies for detailed failure info:\n "
283
+ + "- GET /_slm/policy/policy-id-1?human\n "
284
+ + "- GET /_slm/policy/policy-id-2?human\n "
285
+ + "- GET /_slm/policy/policy-id-3?human"
286
+
228
287
),
229
- List .of (new Diagnosis .Resource (Type .SLM_POLICY , List .of ("test-policy" )))
288
+ List .of (
289
+ new Diagnosis .Resource (
290
+ Type .SLM_POLICY ,
291
+ List .of ("test-policy" , "test-policy-without-any-success" , "test-policy-without-success-start-time" )
292
+ )
293
+ )
230
294
)
231
295
)
232
296
)
0 commit comments