Skip to content

Commit 5693e42

Browse files
kibanamachineszaffaranoelasticmachine
authored
[8.18] [Security Solutio][Diagnostic Queries] Improve ILM checks for serverless (elastic#236166) (elastic#236179)
# Backport This will backport the following commits from `main` to `8.18`: - [[Security Solutio][Diagnostic Queries] Improve ILM checks for serverless (elastic#236166)](elastic#236166) <!--- Backport version: 9.6.6 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sorenlouv/backport) <!--BACKPORT [{"author":{"name":"Sebastián Zaffarano","email":"[email protected]"},"sourceCommit":{"committedDate":"2025-09-23T19:33:02Z","message":"[Security Solutio][Diagnostic Queries] Improve ILM checks for serverless (elastic#236166)\n\n## Summary\n\nImprove error handling when querying non-existent ILM APIs in\nserverless.\n\n### Checklist\n\nCheck the PR satisfies following conditions. \n\nReviewers should verify this PR satisfies this list as well.\n\n- [ ] Any text added follows [EUI's writing\nguidelines](https://elastic.github.io/eui/#/guidelines/writing), uses\nsentence case text and includes [i18n\nsupport](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md)\n- [ ]\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\nwas added for features that require explanation or tutorials\n- [x] [Unit or functional\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\nwere updated or added to match the most common scenarios\n- [ ] If a plugin configuration key changed, check if it needs to be\nallowlisted in the cloud and added to the [docker\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\n- [ ] This was checked for breaking HTTP API changes, and any breaking\nchanges have been approved by the breaking-change committee. The\n`release_note:breaking` label should be applied in these situations.\n- [ ] [Flaky Test\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\nused on any tests changed\n- [ ] The PR description includes the appropriate Release Notes section,\nand the correct `release_note:*` label is applied per the\n[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)\n- [ ] Review the [backport\nguidelines](https://docs.google.com/document/d/1VyN5k91e5OVumlc0Gb9RPa3h1ewuPE705nRtioPiTvY/edit?usp=sharing)\nand apply applicable `backport:*` labels.\n\n---------\n\nCo-authored-by: kibanamachine <[email protected]>\nCo-authored-by: Elastic Machine <[email protected]>","sha":"3b22aaf18c96b804b42c016a018de81c2da261b5","branchLabelMapping":{"^v9.2.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","Team: SecuritySolution","backport:all-open","v9.2.0"],"title":"[Security Solutio][Diagnostic Queries] Improve ILM checks for serverless","number":236166,"url":"https://github.com/elastic/kibana/pull/236166","mergeCommit":{"message":"[Security Solutio][Diagnostic Queries] Improve ILM checks for serverless (elastic#236166)\n\n## Summary\n\nImprove error handling when querying non-existent ILM APIs in\nserverless.\n\n### Checklist\n\nCheck the PR satisfies following conditions. \n\nReviewers should verify this PR satisfies this list as well.\n\n- [ ] Any text added follows [EUI's writing\nguidelines](https://elastic.github.io/eui/#/guidelines/writing), uses\nsentence case text and includes [i18n\nsupport](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md)\n- [ ]\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\nwas added for features that require explanation or tutorials\n- [x] [Unit or functional\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\nwere updated or added to match the most common scenarios\n- [ ] If a plugin configuration key changed, check if it needs to be\nallowlisted in the cloud and added to the [docker\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\n- [ ] This was checked for breaking HTTP API changes, and any breaking\nchanges have been approved by the breaking-change committee. The\n`release_note:breaking` label should be applied in these situations.\n- [ ] [Flaky Test\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\nused on any tests changed\n- [ ] The PR description includes the appropriate Release Notes section,\nand the correct `release_note:*` label is applied per the\n[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)\n- [ ] Review the [backport\nguidelines](https://docs.google.com/document/d/1VyN5k91e5OVumlc0Gb9RPa3h1ewuPE705nRtioPiTvY/edit?usp=sharing)\nand apply applicable `backport:*` labels.\n\n---------\n\nCo-authored-by: kibanamachine <[email protected]>\nCo-authored-by: Elastic Machine <[email protected]>","sha":"3b22aaf18c96b804b42c016a018de81c2da261b5"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.2.0","branchLabelMappingKey":"^v9.2.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/236166","number":236166,"mergeCommit":{"message":"[Security Solutio][Diagnostic Queries] Improve ILM checks for serverless (elastic#236166)\n\n## Summary\n\nImprove error handling when querying non-existent ILM APIs in\nserverless.\n\n### Checklist\n\nCheck the PR satisfies following conditions. \n\nReviewers should verify this PR satisfies this list as well.\n\n- [ ] Any text added follows [EUI's writing\nguidelines](https://elastic.github.io/eui/#/guidelines/writing), uses\nsentence case text and includes [i18n\nsupport](https://github.com/elastic/kibana/blob/main/src/platform/packages/shared/kbn-i18n/README.md)\n- [ ]\n[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)\nwas added for features that require explanation or tutorials\n- [x] [Unit or functional\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\nwere updated or added to match the most common scenarios\n- [ ] If a plugin configuration key changed, check if it needs to be\nallowlisted in the cloud and added to the [docker\nlist](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)\n- [ ] This was checked for breaking HTTP API changes, and any breaking\nchanges have been approved by the breaking-change committee. The\n`release_note:breaking` label should be applied in these situations.\n- [ ] [Flaky Test\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was\nused on any tests changed\n- [ ] The PR description includes the appropriate Release Notes section,\nand the correct `release_note:*` label is applied per the\n[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)\n- [ ] Review the [backport\nguidelines](https://docs.google.com/document/d/1VyN5k91e5OVumlc0Gb9RPa3h1ewuPE705nRtioPiTvY/edit?usp=sharing)\nand apply applicable `backport:*` labels.\n\n---------\n\nCo-authored-by: kibanamachine <[email protected]>\nCo-authored-by: Elastic Machine <[email protected]>","sha":"3b22aaf18c96b804b42c016a018de81c2da261b5"}}]}] BACKPORT--> Co-authored-by: Sebastián Zaffarano <[email protected]> Co-authored-by: Elastic Machine <[email protected]>
1 parent 73c0f97 commit 5693e42

File tree

2 files changed

+233
-41
lines changed

2 files changed

+233
-41
lines changed

x-pack/solutions/security/plugins/security_solution/server/lib/telemetry/diagnostic/health_diagnostic_receiver.test.ts

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,5 +362,192 @@ describe('Security Solution - Health Diagnostic Queries - CircuitBreakingQueryEx
362362
done
363363
);
364364
});
365+
366+
test('should handle ILM API errors and assume serverless', (done) => {
367+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot', 'warm'] });
368+
const circuitBreaker = createMockCircuitBreaker(true);
369+
370+
const ilmError = new Error(
371+
'no handler found for uri [/.alerts-security.alerts*/_ilm/explain?only_managed=false&filter_path=indices.*.phase] and method [GET]'
372+
);
373+
mockEsClient.ilm.explainLifecycle.mockRejectedValue(ilmError);
374+
setupPointInTime(mockEsClient);
375+
mockEsClient.search.mockResolvedValue(createMockSearchResponse([]));
376+
377+
executeObservableTest(
378+
queryExecutor.search({ query, circuitBreakers: [circuitBreaker] }),
379+
() => {
380+
expect(mockEsClient.ilm.explainLifecycle).toHaveBeenCalledWith({
381+
index: 'test-index',
382+
only_managed: false,
383+
filter_path: ['indices.*.phase'],
384+
});
385+
expect(mockEsClient.openPointInTime).toHaveBeenCalledWith({
386+
index: ['test-index'],
387+
keep_alive: '1m',
388+
});
389+
done();
390+
},
391+
done
392+
);
393+
});
394+
395+
test('should handle network errors during ILM checks', (done) => {
396+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot'] });
397+
const circuitBreaker = createMockCircuitBreaker(true);
398+
399+
const networkError = new Error('ECONNREFUSED');
400+
mockEsClient.ilm.explainLifecycle.mockRejectedValue(networkError);
401+
setupPointInTime(mockEsClient);
402+
mockEsClient.search.mockResolvedValue(createMockSearchResponse([]));
403+
404+
executeObservableTest(
405+
queryExecutor.search({ query, circuitBreakers: [circuitBreaker] }),
406+
() => {
407+
expect(mockEsClient.openPointInTime).toHaveBeenCalledWith({
408+
index: ['test-index'],
409+
keep_alive: '1m',
410+
});
411+
done();
412+
},
413+
done
414+
);
415+
});
416+
417+
test('should handle malformed ILM responses', (done) => {
418+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot'] });
419+
const circuitBreaker = createMockCircuitBreaker(true);
420+
421+
mockEsClient.ilm.explainLifecycle.mockResolvedValue({});
422+
setupPointInTime(mockEsClient);
423+
mockEsClient.search.mockResolvedValue(createMockSearchResponse([]));
424+
425+
executeObservableTest(
426+
queryExecutor.search({ query, circuitBreakers: [circuitBreaker] }),
427+
() => {
428+
expect(mockEsClient.openPointInTime).toHaveBeenCalledWith({
429+
index: ['test-index'],
430+
keep_alive: '1m',
431+
});
432+
done();
433+
},
434+
done
435+
);
436+
});
437+
});
438+
439+
describe('indicesFor method', () => {
440+
test('should return original index when no tiers are specified', async () => {
441+
const query = createMockQuery(QueryType.DSL);
442+
const result = await queryExecutor.indicesFor(query);
443+
expect(result).toEqual(['test-index']);
444+
expect(mockEsClient.ilm.explainLifecycle).not.toHaveBeenCalled();
445+
});
446+
447+
test('should filter indices by tiers when ILM is available', async () => {
448+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot', 'warm'] });
449+
450+
mockEsClient.ilm.explainLifecycle.mockResolvedValue({
451+
indices: {
452+
'test-index-000001': { phase: 'hot' },
453+
'test-index-000002': { phase: 'warm' },
454+
'test-index-000003': { phase: 'cold' },
455+
'test-index-000004': { phase: 'hot' },
456+
},
457+
});
458+
459+
const result = await queryExecutor.indicesFor(query);
460+
expect(result).toEqual(['test-index-000001', 'test-index-000002', 'test-index-000004']);
461+
expect(mockEsClient.ilm.explainLifecycle).toHaveBeenCalledWith({
462+
index: 'test-index',
463+
only_managed: false,
464+
filter_path: ['indices.*.phase'],
465+
});
466+
});
467+
468+
test('should handle serverless environment (undefined indices)', async () => {
469+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot'] });
470+
471+
mockEsClient.ilm.explainLifecycle.mockResolvedValue({ indices: undefined });
472+
473+
const result = await queryExecutor.indicesFor(query);
474+
expect(result).toEqual(['test-index']);
475+
});
476+
477+
test('should handle empty ILM response', async () => {
478+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot'] });
479+
480+
mockEsClient.ilm.explainLifecycle.mockResolvedValue({});
481+
482+
const result = await queryExecutor.indicesFor(query);
483+
expect(result).toEqual(['test-index']);
484+
});
485+
486+
test('should handle indices without phase information', async () => {
487+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot'] });
488+
489+
mockEsClient.ilm.explainLifecycle.mockResolvedValue({
490+
indices: {
491+
'test-index-000001': { phase: 'hot' },
492+
'test-index-000002': {},
493+
'test-index-000003': { other_field: 'value' },
494+
},
495+
});
496+
497+
const result = await queryExecutor.indicesFor(query);
498+
expect(result).toEqual(['test-index-000001']);
499+
});
500+
501+
test('should filter out indices not in specified tiers', async () => {
502+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot'] });
503+
504+
mockEsClient.ilm.explainLifecycle.mockResolvedValue({
505+
indices: {
506+
'test-index-000001': { phase: 'hot' },
507+
'test-index-000002': { phase: 'warm' },
508+
'test-index-000003': { phase: 'cold' },
509+
},
510+
});
511+
512+
const result = await queryExecutor.indicesFor(query);
513+
expect(result).toEqual(['test-index-000001']);
514+
});
515+
516+
test('should handle ILM API errors by falling back to original index', async () => {
517+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot'] });
518+
519+
const serverlessError = new Error(
520+
'no handler found for uri [/.alerts-security.alerts*/_ilm/explain?only_managed=false&filter_path=indices.*.phase] and method [GET]'
521+
);
522+
mockEsClient.ilm.explainLifecycle.mockRejectedValue(serverlessError);
523+
524+
const result = await queryExecutor.indicesFor(query);
525+
expect(result).toEqual(['test-index']);
526+
});
527+
528+
test('should handle authorization errors gracefully', async () => {
529+
const query = createMockQuery(QueryType.DSL, { tiers: ['hot'] });
530+
531+
const authError = new Error('security_exception');
532+
mockEsClient.ilm.explainLifecycle.mockRejectedValue(authError);
533+
534+
const result = await queryExecutor.indicesFor(query);
535+
expect(result).toEqual(['test-index']);
536+
});
537+
538+
test('should return empty array when no indices match tiers', async () => {
539+
const query = createMockQuery(QueryType.DSL, { tiers: ['frozen'] });
540+
541+
mockEsClient.ilm.explainLifecycle.mockResolvedValue({
542+
indices: {
543+
'test-index-000001': { phase: 'hot' },
544+
'test-index-000002': { phase: 'warm' },
545+
'test-index-000003': { phase: 'cold' },
546+
},
547+
});
548+
549+
const result = await queryExecutor.indicesFor(query);
550+
expect(result).toEqual([]);
551+
});
365552
});
366553
});

x-pack/solutions/security/plugins/security_solution/server/lib/telemetry/diagnostic/health_diagnostic_receiver.ts

Lines changed: 46 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -203,52 +203,57 @@ export class CircuitBreakingQueryExecutorImpl implements CircuitBreakingQueryExe
203203
}
204204
const tiers = query.tiers;
205205

206-
return (
207-
await this.client.ilm
208-
.explainLifecycle({
209-
index: query.index,
210-
only_managed: false,
211-
filter_path: ['indices.*.phase'],
212-
})
213-
.then((response) => {
214-
if (response.indices === undefined) {
215-
this.logger.debug(
216-
'Got an empty response while explaining lifecycle. Asumming serverless.',
217-
{
218-
index: query.index,
219-
} as LogMeta
220-
);
221-
return [query.index];
222-
} else {
223-
const indices = Object.entries(response.indices).map(([indexName, stats]) => {
224-
if ('phase' in stats && stats.phase) {
225-
if (tiers.includes(stats.phase)) {
226-
return indexName;
227-
} else {
228-
this.logger.debug('Index is not in the expected phases', {
229-
phase: stats.phase,
230-
index: indexName,
231-
tiers,
232-
} as LogMeta);
233-
return '';
234-
}
206+
return this.client.ilm
207+
.explainLifecycle({
208+
index: query.index,
209+
only_managed: false,
210+
filter_path: ['indices.*.phase'],
211+
})
212+
.then((response) => {
213+
if (response.indices === undefined) {
214+
this.logger.debug(
215+
'Got an empty response while explaining lifecycle. Asumming serverless.',
216+
{
217+
index: query.index,
218+
} as LogMeta
219+
);
220+
return [query.index];
221+
} else {
222+
const indices = Object.entries(response.indices).map(([indexName, stats]) => {
223+
if ('phase' in stats && stats.phase) {
224+
if (tiers.includes(stats.phase)) {
225+
return indexName;
235226
} else {
236-
// should not happen, but just in case
237-
this.logger.debug('Index is not managed by an ILM', {
227+
this.logger.debug('Index is not in the expected phases', {
228+
phase: stats.phase,
238229
index: indexName,
239230
tiers,
240231
} as LogMeta);
241232
return '';
242233
}
243-
});
244-
this.logger.debug('Indices managed by ILM', {
245-
queryName: query.name,
246-
tiers: query.tiers,
247-
indices,
248-
} as LogMeta);
249-
return indices;
250-
}
251-
})
252-
).filter((indexName) => indexName !== '');
234+
} else {
235+
// should not happen, but just in case
236+
this.logger.debug('Index is not managed by an ILM', {
237+
index: indexName,
238+
tiers,
239+
} as LogMeta);
240+
return '';
241+
}
242+
});
243+
this.logger.debug('Indices managed by ILM', {
244+
queryName: query.name,
245+
tiers: query.tiers,
246+
indices,
247+
} as LogMeta);
248+
return indices;
249+
}
250+
})
251+
.then((indices) => {
252+
return indices.filter((indexName) => indexName !== '');
253+
})
254+
.catch((error) => {
255+
this.logger.info('Error while checking ILM status, assuming serverless', { error });
256+
return [query.index];
257+
});
253258
}
254259
}

0 commit comments

Comments
 (0)