Skip to content

Commit fa1998c

Browse files
dgieselaarkibanamachinemaryam-saeidibenakansara
authored
[RCA] AI-assisted root cause analysis (#197200)
Implements an LLM-based root cause analysis process. At a high level, it works by investigating entities - which means pulling in alerts, SLOs, and log patterns. From there, it can inspect related entities to get to the root cause. The backend implementation lives in `x-pack/packages/observability_utils-*` (`service_rca`). It can be imported into any server-side plugin and executed from there. The UI changes are mostly contained to `x-pack/plugins/observability_solution/observabillity_ai_assistant_app`. This plugin now exports a `RootCauseAnalysisContainer` which takes a stream of data that is returned by the root cause analysis process. The current implementation lives in the Investigate app. There, it calls its own endpoint that kicks off the RCA process, and feeds it into the `RootCauseAnalysisContainer` exposed by the Observability AI Assistant app plugin. I've left it in a route there so the investigation itself can be updated as the process runs - this would allow the user to close the browser and come back later, and see a full investigation. > [!NOTE] > Notes for reviewing teams > > @kbn/es-types: > - support both types and typesWithBodyKey > - simplify KeysOfSources type > > @kbn/server-route-repository: > - abortable streamed responses > > @kbn/sse-utils*: > - abortable streamed responses > - serialize errors in specific format for more reliable re-hydration of errors > - keep connection open with SSE comments > > @kbn/inference-*: > - export *Of variants of types, for easier manual inference > - add automated retries for `output` API > - add `name` to tool responses for type inference (get type of tool response via tool name) > - add `data` to tool responses for transporting internal data (not sent to the LLM) > - simplify `chunksIntoMessage` > - allow consumers of nlToEsql task to add to `system` prompt > - add toolCallId to validation error message > > @kbn/aiops*: > - export `categorizationAnalyzer` for use in observability-ai* > > @kbn/observability-ai-assistant* > - configurable limit (tokens or doc count) for knowledge base recall > > @kbn/slo*: > - export client that returns summary indices --------- Co-authored-by: kibanamachine <[email protected]> Co-authored-by: Maryam Saeidi <[email protected]> Co-authored-by: Bena Kansara <[email protected]>
1 parent 64e9728 commit fa1998c

File tree

144 files changed

+27293
-364
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

144 files changed

+27293
-364
lines changed

.eslintrc.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,7 @@ module.exports = {
919919
'x-pack/plugins/observability_solution/exploratory_view/**/*.{js,mjs,ts,tsx}',
920920
'x-pack/plugins/observability_solution/ux/**/*.{js,mjs,ts,tsx}',
921921
'x-pack/plugins/observability_solution/slo/**/*.{js,mjs,ts,tsx}',
922+
'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}',
922923
],
923924
rules: {
924925
'no-console': ['warn', { allow: ['error'] }],
@@ -938,6 +939,7 @@ module.exports = {
938939
'x-pack/plugins/observability_solution/observability/**/*.stories.*',
939940
'x-pack/plugins/observability_solution/exploratory_view/**/*.stories.*',
940941
'x-pack/plugins/observability_solution/slo/**/*.stories.*',
942+
'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}',
941943
],
942944
rules: {
943945
'react/function-component-definition': [

.github/CODEOWNERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,8 @@ x-pack/packages/observability/alerting_rule_utils @elastic/obs-ux-management-tea
800800
x-pack/packages/observability/alerting_test_data @elastic/obs-ux-management-team
801801
x-pack/packages/observability/get_padded_alert_time_range_util @elastic/obs-ux-management-team
802802
x-pack/packages/observability/logs_overview @elastic/obs-ux-logs-team
803+
x-pack/packages/observability/observability_ai/observability_ai_common @elastic/obs-ai-assistant
804+
x-pack/packages/observability/observability_ai/observability_ai_server @elastic/obs-ai-assistant
803805
x-pack/packages/observability/observability_utils/observability_utils_browser @elastic/observability-ui
804806
x-pack/packages/observability/observability_utils/observability_utils_common @elastic/observability-ui
805807
x-pack/packages/observability/observability_utils/observability_utils_server @elastic/observability-ui

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,8 @@
695695
"@kbn/observability-ai-assistant-app-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_app",
696696
"@kbn/observability-ai-assistant-management-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_management",
697697
"@kbn/observability-ai-assistant-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant",
698+
"@kbn/observability-ai-common": "link:x-pack/packages/observability/observability_ai/observability_ai_common",
699+
"@kbn/observability-ai-server": "link:x-pack/packages/observability/observability_ai/observability_ai_server",
698700
"@kbn/observability-alert-details": "link:x-pack/packages/observability/alert_details",
699701
"@kbn/observability-alerting-rule-utils": "link:x-pack/packages/observability/alerting_rule_utils",
700702
"@kbn/observability-alerting-test-data": "link:x-pack/packages/observability/alerting_test_data",
@@ -1145,6 +1147,7 @@
11451147
"fnv-plus": "^1.3.1",
11461148
"formik": "^2.4.6",
11471149
"fp-ts": "^2.3.1",
1150+
"fuse.js": "^7.0.0",
11481151
"get-port": "^5.0.0",
11491152
"getopts": "^2.2.5",
11501153
"getos": "^3.1.0",

packages/kbn-es-types/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export type {
1313
SearchHit,
1414
ESSearchResponse,
1515
ESSearchRequest,
16+
ESSearchRequestWithoutBody,
1617
ESSourceOptions,
1718
InferSearchResponseOf,
1819
AggregationResultOf,

packages/kbn-es-types/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*/
99

1010
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
11+
import * as estypesWithoutBody from '@elastic/elasticsearch/lib/api/types';
1112
import type {
1213
Field,
1314
QueryDslFieldAndFormat,
@@ -26,6 +27,7 @@ import {
2627

2728
export type ESFilter = estypes.QueryDslQueryContainer;
2829
export type ESSearchRequest = estypes.SearchRequest;
30+
export type ESSearchRequestWithoutBody = estypesWithoutBody.SearchRequest;
2931
export type AggregationOptionsByType = Required<estypes.AggregationsAggregationContainer>;
3032

3133
// Typings for Elasticsearch queries and aggregations. These are intended to be

packages/kbn-es-types/src/search.ts

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,15 @@ type InvalidAggregationRequest = unknown;
2323
// Union keys are not included in keyof, but extends iterates over the types in a union.
2424
type ValidAggregationKeysOf<T extends Record<string, any>> = T extends T ? keyof T : never;
2525

26-
type KeyOfSource<T> = Record<
27-
keyof T,
28-
(T extends Record<string, { terms: { missing_bucket: true } }> ? null : never) | string | number
29-
>;
26+
type KeyOfSource<T> = {
27+
[key in keyof T]:
28+
| (T[key] extends Record<string, { terms: { missing_bucket: true } }> ? null : never)
29+
| string
30+
| number;
31+
};
3032

31-
type KeysOfSources<T extends any[]> = T extends [any]
32-
? KeyOfSource<T[0]>
33-
: T extends [any, any]
34-
? KeyOfSource<T[0]> & KeyOfSource<T[1]>
35-
: T extends [any, any, any]
36-
? KeyOfSource<T[0]> & KeyOfSource<T[1]> & KeyOfSource<T[2]>
37-
: T extends [any, any, any, any]
38-
? KeyOfSource<T[0]> & KeyOfSource<T[1]> & KeyOfSource<T[2]> & KeyOfSource<T[3]>
39-
: Record<string, null | string | number>;
33+
// convert to intersection to be able to get all the keys
34+
type KeysOfSources<T extends any[]> = UnionToIntersection<KeyOfSource<ValuesType<Pick<T, number>>>>;
4035

4136
type CompositeKeysOf<TAggregationContainer extends AggregationsAggregationContainer> =
4237
TAggregationContainer extends {

packages/kbn-investigation-shared/src/rest_specs/update.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ const updateInvestigationParamsSchema = z.object({
2424
}),
2525
tags: z.array(z.string()),
2626
externalIncidentUrl: z.string().nullable(),
27+
rootCauseAnalysis: z.object({
28+
events: z.array(z.any()),
29+
}),
2730
})
2831
.partial(),
2932
});

packages/kbn-investigation-shared/src/schema/investigation.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ const investigationSchema = z.object({
3535
notes: z.array(investigationNoteSchema),
3636
items: z.array(investigationItemSchema),
3737
externalIncidentUrl: z.string().nullable(),
38+
rootCauseAnalysis: z
39+
.object({
40+
events: z.array(z.any()),
41+
})
42+
.optional(),
3843
});
3944

4045
type Status = z.infer<typeof statusSchema>;

packages/kbn-server-route-repository/src/register_routes.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,15 @@ export function registerRoutes<TDependencies extends Record<string, any>>({
9898
if (isKibanaResponse(result)) {
9999
return result;
100100
} else if (isObservable(result)) {
101+
const controller = new AbortController();
102+
request.events.aborted$.subscribe(() => {
103+
controller.abort();
104+
});
101105
return response.ok({
102-
body: observableIntoEventSourceStream(result as Observable<ServerSentEvent>),
106+
body: observableIntoEventSourceStream(result as Observable<ServerSentEvent>, {
107+
logger,
108+
signal: controller.signal,
109+
}),
103110
});
104111
} else {
105112
const body = result || {};
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
import { Logger } from '@kbn/logging';
11+
import { observableIntoEventSourceStream } from './observable_into_event_source_stream';
12+
import { PassThrough } from 'node:stream';
13+
import { Subject } from 'rxjs';
14+
import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events';
15+
import {
16+
ServerSentEventErrorCode,
17+
createSSEInternalError,
18+
createSSERequestError,
19+
} from '@kbn/sse-utils/src/errors';
20+
21+
describe('observableIntoEventSourceStream', () => {
22+
let logger: jest.Mocked<Logger>;
23+
24+
let controller: AbortController;
25+
26+
let stream: PassThrough;
27+
let source$: Subject<ServerSentEvent>;
28+
29+
let data: string[];
30+
31+
beforeEach(() => {
32+
jest.useFakeTimers();
33+
logger = {
34+
debug: jest.fn(),
35+
error: jest.fn(),
36+
} as unknown as jest.Mocked<Logger>;
37+
38+
controller = new AbortController();
39+
source$ = new Subject();
40+
data = [];
41+
42+
stream = observableIntoEventSourceStream(source$, { logger, signal: controller.signal });
43+
stream.on('data', (chunk) => {
44+
data.push(chunk.toString());
45+
});
46+
});
47+
48+
afterEach(() => {
49+
jest.clearAllTimers();
50+
});
51+
52+
it('writes events into the stream in SSE format', () => {
53+
source$.next({ type: ServerSentEventType.data, data: { foo: 'bar' } });
54+
source$.complete();
55+
56+
jest.runAllTimers();
57+
58+
expect(data).toEqual(['event: data\ndata: {"data":{"foo":"bar"}}\n\n']);
59+
});
60+
61+
it('handles SSE errors', () => {
62+
const sseError = createSSEInternalError('Invalid input');
63+
64+
source$.error(sseError);
65+
66+
jest.runAllTimers();
67+
68+
expect(logger.error).toHaveBeenCalledWith(sseError);
69+
expect(logger.debug).toHaveBeenCalled();
70+
const debugFn = logger.debug.mock.calls[0][0] as () => string;
71+
const loggedError = JSON.parse(debugFn());
72+
expect(loggedError).toEqual({
73+
type: 'error',
74+
error: {
75+
code: ServerSentEventErrorCode.internalError,
76+
message: 'Invalid input',
77+
meta: {},
78+
},
79+
});
80+
81+
expect(data).toEqual([
82+
`event: error\ndata: ${JSON.stringify({
83+
error: {
84+
code: ServerSentEventErrorCode.internalError,
85+
message: 'Invalid input',
86+
meta: {},
87+
},
88+
})}\n\n`,
89+
]);
90+
});
91+
92+
it('handles SSE errors with metadata', () => {
93+
const sseError = createSSERequestError('Invalid request', 400);
94+
95+
source$.error(sseError);
96+
97+
jest.runAllTimers();
98+
99+
expect(logger.error).toHaveBeenCalledWith(sseError);
100+
expect(logger.debug).toHaveBeenCalled();
101+
const debugFn = logger.debug.mock.calls[0][0] as () => string;
102+
const loggedError = JSON.parse(debugFn());
103+
expect(loggedError).toEqual({
104+
type: 'error',
105+
error: {
106+
code: ServerSentEventErrorCode.requestError,
107+
message: 'Invalid request',
108+
meta: {
109+
status: 400,
110+
},
111+
},
112+
});
113+
114+
expect(data).toEqual([
115+
`event: error\ndata: ${JSON.stringify({
116+
error: {
117+
code: ServerSentEventErrorCode.requestError,
118+
message: 'Invalid request',
119+
meta: {
120+
status: 400,
121+
},
122+
},
123+
})}\n\n`,
124+
]);
125+
});
126+
127+
it('handles non-SSE errors', () => {
128+
const error = new Error('Non-SSE Error');
129+
130+
source$.error(error);
131+
132+
jest.runAllTimers();
133+
134+
expect(logger.error).toHaveBeenCalledWith(error);
135+
expect(data).toEqual([
136+
`event: error\ndata: ${JSON.stringify({
137+
error: {
138+
code: ServerSentEventErrorCode.internalError,
139+
message: 'Non-SSE Error',
140+
},
141+
})}\n\n`,
142+
]);
143+
});
144+
145+
it('should send keep-alive comments every 10 seconds', () => {
146+
jest.advanceTimersByTime(10000);
147+
expect(data).toContain(': keep-alive');
148+
149+
jest.advanceTimersByTime(10000);
150+
expect(data.filter((d) => d === ': keep-alive')).toHaveLength(2);
151+
});
152+
153+
describe('without fake timers', () => {
154+
beforeEach(() => {
155+
jest.useFakeTimers({ doNotFake: ['nextTick'] });
156+
});
157+
158+
it('should end the stream when the observable completes', async () => {
159+
jest.useFakeTimers({ doNotFake: ['nextTick'] });
160+
161+
const endSpy = jest.fn();
162+
stream.on('end', endSpy);
163+
164+
source$.complete();
165+
166+
await new Promise((resolve) => process.nextTick(resolve));
167+
168+
expect(endSpy).toHaveBeenCalled();
169+
});
170+
171+
it('should end stream when signal is aborted', async () => {
172+
const endSpy = jest.fn();
173+
stream.on('end', endSpy);
174+
175+
// Emit some data
176+
source$.next({ type: ServerSentEventType.data, data: { initial: 'data' } });
177+
178+
// Abort the signal
179+
controller.abort();
180+
181+
// Emit more data after abort
182+
source$.next({ type: ServerSentEventType.data, data: { after: 'abort' } });
183+
184+
await new Promise((resolve) => process.nextTick(resolve));
185+
186+
expect(endSpy).toHaveBeenCalled();
187+
188+
// Data after abort should not be received
189+
expect(data).toEqual([
190+
`event: data\ndata: ${JSON.stringify({ data: { initial: 'data' } })}\n\n`,
191+
]);
192+
});
193+
194+
afterEach(() => {
195+
jest.useFakeTimers();
196+
});
197+
});
198+
});

0 commit comments

Comments
 (0)