elastic · reakaleek · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025
@@ -246,7 +246,9 @@ dotnet_diagnostic.IDE0305.severity = none
 # CS8509 already warns
 dotnet_diagnostic.IDE0072.severity = none
 
-
+[src/api/Elastic.Documentation.Api.Lambda/**.cs]
+dotnet_diagnostic.IL3050.severity = none
+dotnet_diagnostic.IL2026.severity = none
 
 [DocumentationWebHost.cs]
 dotnet_diagnostic.IL3050.severity = none

@@ -0,0 +1,41 @@
+---
+# This workflow is used to build the API lambda
+# lambda function bootstrap binary that can be deployed to AWS Lambda.
+name: Build API Lambda
+
+on:
+  workflow_dispatch: 
+  workflow_call: 
+    inputs:
+      ref:
+        required: false
+        type: string
+        default: ${{ github.ref }}
+
+jobs: 
+  build:
+    runs-on: ubuntu-latest
+    env:
+      BINARY_PATH: .artifacts/Elastic.Documentation.Api.Lambda/release_linux-x64/bootstrap
+    steps:
+      - uses: actions/checkout@v4
+        with: 
+          ref: ${{ inputs.ref }}
+      - name: Amazon Linux 2023 build
+        run: |
+          docker build . -t api-lambda:latest -f src/api/Elastic.Documentation.Api.Lambda/Dockerfile
+      - name: Get bootstrap binary
+        run: |
+          docker cp $(docker create --name tc api-lambda:latest):/app/.artifacts/publish ./.artifacts && docker rm tc
+      - name: Inspect bootstrap binary
+        run: |
+          tree .artifacts
+          stat "${BINARY_PATH}"
+      - name: Archive artifact
+        id: upload-artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: api-lambda-binary
+          retention-days: 1
+          if-no-files-found: error
+          path: ${{ env.BINARY_PATH }}
@@ -32,9 +32,12 @@ jobs:
       - name: Validate Content Sources
         run: dotnet run --project src/tooling/docs-assembler -c release -- content-source validate
 
-  build-lambda:
+  build-link-index-updater-lambda:
     uses: ./.github/workflows/build-link-index-updater-lambda.yml
-
+
+  build-api-lambda:
+    uses: ./.github/workflows/build-api-lambda.yml
+
   npm:
     runs-on: ubuntu-latest
     defaults: 

@@ -8,6 +8,7 @@
   </ItemGroup>
   <!-- AWS -->
   <ItemGroup>
+    <PackageVersion Include="Amazon.Lambda.AspNetCoreServer.Hosting" Version="1.9.0" />
     <PackageVersion Include="Amazon.Lambda.RuntimeSupport" Version="1.13.0" />
     <PackageVersion Include="Amazon.Lambda.Core" Version="2.5.1" />
     <PackageVersion Include="Amazon.Lambda.S3Events" Version="3.1.0" />
@@ -18,6 +19,7 @@
     <PackageVersion Include="AWSSDK.S3" Version="4.0.0.1" />
     <PackageVersion Include="FakeItEasy" Version="8.3.0" />
     <PackageVersion Include="Elastic.Ingest.Elasticsearch" Version="0.11.3" />
+    <PackageVersion Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.4" />
     <PackageVersion Include="Microsoft.OpenApi" Version="2.0.0-preview9" />
     <PackageVersion Include="System.Text.Json" Version="9.0.5" />
   </ItemGroup>
@@ -70,4 +72,4 @@
     </PackageVersion>
     <PackageVersion Include="xunit.v3" Version="2.0.2" />
   </ItemGroup>
-</Project>
+</Project>
@@ -119,6 +119,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "config", "config", "{6FAB56
 		config\navigation.yml = config\navigation.yml
 	EndProjectSection
 EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "api", "api", "{B042CC78-5060-4091-B95A-79C71BA3908A}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Api.Core", "src\api\Elastic.Documentation.Api.Core\Elastic.Documentation.Api.Core.csproj", "{F30B90AD-1A01-4A6F-9699-809FA6875B22}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Api.Infrastructure", "src\api\Elastic.Documentation.Api.Infrastructure\Elastic.Documentation.Api.Infrastructure.csproj", "{AE3FC78E-167F-4B6E-88EC-84743EB748B7}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Api.Lambda", "src\api\Elastic.Documentation.Api.Lambda\Elastic.Documentation.Api.Lambda.csproj", "{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -204,6 +212,18 @@ Global
 		{164F55EC-9412-4CD4-81AD-3598B57632A6}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{164F55EC-9412-4CD4-81AD-3598B57632A6}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{164F55EC-9412-4CD4-81AD-3598B57632A6}.Release|Any CPU.Build.0 = Release|Any CPU
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22}.Release|Any CPU.Build.0 = Release|Any CPU
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7}.Release|Any CPU.Build.0 = Release|Any CPU
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(NestedProjects) = preSolution
 		{4D198E25-C211-41DC-9E84-B15E89BD7048} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
@@ -234,5 +254,9 @@ Global
 		{89B83007-71E6-4B57-BA78-2544BFA476DB} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
 		{111E7029-BB29-4039-9B45-04776798A8DD} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
 		{164F55EC-9412-4CD4-81AD-3598B57632A6} = {67B576EE-02FA-4F9B-94BC-3630BC09ECE5}
+		{B042CC78-5060-4091-B95A-79C71BA3908A} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22} = {B042CC78-5060-4091-B95A-79C71BA3908A}
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7} = {B042CC78-5060-4091-B95A-79C71BA3908A}
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE} = {B042CC78-5060-4091-B95A-79C71BA3908A}
 	EndGlobalSection
 EndGlobal
@@ -4,25 +4,12 @@ import { EventSourceMessage } from '@microsoft/fetch-event-source'
 import { useEffect, useState, useRef, useCallback } from 'react'
 import * as z from 'zod'
 
-export const LlmGatewayRequestSchema = z.object({
-    userContext: z.object({
-        userEmail: z.string(),
-    }),
-    platformContext: z.object({
-        origin: z.literal('support_portal'),
-        useCase: z.literal('support_assistant'),
-        metadata: z.any(),
-    }),
-    input: z.array(
-        z.object({
-            role: z.string(),
-            message: z.string(),
-        })
-    ),
-    threadId: z.string(),
+export const AskAiRequestSchema = z.object({
+    message: z.string(),
+    threadId: z.string().optional(),
 })
 
-export type LlmGatewayRequest = z.infer<typeof LlmGatewayRequestSchema>
+export type AskAiRequest = z.infer<typeof AskAiRequestSchema>
 
 const sharedAttributes = {
     timestamp: z.number(),
@@ -154,8 +141,8 @@ export const useLlmGateway = (props: Props): UseLlmGatewayResponse => {
         [processMessage]
     )
 
-    const { sendMessage, abort } = useFetchEventSource<LlmGatewayRequest>({
-        apiEndpoint: '/chat',
+    const { sendMessage, abort } = useFetchEventSource<AskAiRequest>({
+        apiEndpoint: '/_api/v1/ask-ai/stream',
         onMessage,
         onError: (error) => {
             setError(error)
@@ -221,64 +208,70 @@ export const useLlmGateway = (props: Props): UseLlmGatewayResponse => {
     }
 }
 
-function createLlmGatewayRequest(question: string, threadId?: string) {
-    // TODO: we should move this to the backend so that the use cannot change this
-    // Right now, the backend is a pure proxy to the LLM gateway
-    return LlmGatewayRequestSchema.parse({
-        userContext: {
-            userEmail: `elastic-docs-v3@invalid`, // Random email (will be optional in the future)
-        },
-        platformContext: {
-            origin: 'support_portal',
-            useCase: 'support_assistant',
-            metadata: {},
-        },
-        input: [
-            {
-                role: 'user',
-                message: `
-                    # ROLE AND GOAL
-                    You are an expert AI assistant for the Elastic Stack (Elasticsearch, Kibana, Beats, Logstash, etc.). Your sole purpose is to answer user questions based *exclusively* on the provided context from the official Elastic Documentation.
-
-                    # CRITICAL INSTRUCTION: SINGLE-SHOT INTERACTION
-                    This is a single-turn interaction. The user cannot reply to your answer for clarification. Therefore, your response MUST be final, self-contained, and as comprehensive as possible based on the provided context.
-                    Also, keep the response as short as possible, but do not truncate the context.
-
-                    # RULES
-                    1.  **Facts** Always do RAG search to find the relevant Elastic documentation.
-                    2.  **Strictly Grounded Answers:** You MUST base your answer 100% on the information from the search results. Do not use any of your pre-trained knowledge or any information outside of this context.
-                    3.  **Handle Ambiguity Gracefully:** Since you cannot ask clarifying questions, if the question is broad or ambiguous (e.g., "how to improve performance"), structure your answer to cover the different interpretations supported by the context.
-                        * Acknowledge the ambiguity. For example: "Your question about 'performance' can cover several areas. Based on the documentation, here are the key aspects:"
-                        * Organize the answer with clear headings for each aspect (e.g., "Indexing Performance," "Query Performance").
-                        * But if there is a similar or related topic in the docs you can mention it and link to it.
-                    4.  **Direct Answer First:** If the context directly and sufficiently answers a specific question, provide a clear, comprehensive, and well-structured answer.
-                        * Use Markdown for formatting (e.g., code blocks for configurations, bullet points for lists).
-                        * Use LaTeX for mathematical or scientific notations where appropriate (e.g., \`$E = mc^2$\`).
-                        * Make the answer as complete as possible, as this is the user's only response.
-                        * Keep the answer short and concise. We want to link users to the Elastic Documentation to find more information.
-                    5.  **Handling Incomplete Answers:** If the context contains relevant information but does not fully answer the question, you MUST follow this procedure:
-                        * Start by explicitly stating that you could not find a complete answer.
-                        * Then, summarize the related information you *did* find in the context, explaining how it might be helpful.
-                    6.  **Handling No Answer:** If the context is empty or completely irrelevant to the question, you MUST respond with the following, and nothing else:
-                        I was unable to find an answer to your question in the Elastic Documentation.
-
-                        For further assistance, you may want to:
-                        * Ask the community of experts at **discuss.elastic.co**.
-                        * If you have an Elastic subscription, contact our support engineers at **support.elastic.co**."
-                    7.  If you are 100% sure that something is not supported by Elastic, then say so.
-                    8.  **Tone:** Your tone should be helpful, professional, and confident. It is better to provide no answer (Rule #5) than an incorrect one.
-                        * Assume that the user is using Elastic for the first time.
-                        * Assume that the user is a beginner.
-                        * Assume that the user has a limited knowledge of Elastic
-                        * Explain unusual terminology, abbreviations, or acronyms.
-                        * Always try to cite relevant Elastic documentation.
-                `,
-            },
-            {
-                role: 'user',
-                message: question,
-            },
-        ],
+function createLlmGatewayRequest(
+    message: string,
+    threadId?: string
+): AskAiRequest {
+    return AskAiRequestSchema.parse({
+        message,
         threadId,
     })
+
+    // return LlmGatewayRequestSchema.parse({
+    //     userContext: {
+    //         userEmail: `elastic-docs-v3@invalid`, // Random email (will be optional in the future)
+    //     },
+    //     platformContext: {
+    //         origin: 'support_portal',
+    //         useCase: 'support_assistant',
+    //         metadata: {},
+    //     },
+    //     input: [
+    //         {
+    //             role: 'user',
+    //             message: `
+    //                 # ROLE AND GOAL
+    //                 You are an expert AI assistant for the Elastic Stack (Elasticsearch, Kibana, Beats, Logstash, etc.). Your sole purpose is to answer user questions based *exclusively* on the provided context from the official Elastic Documentation.
+    //
+    //                 # CRITICAL INSTRUCTION: SINGLE-SHOT INTERACTION
+    //                 This is a single-turn interaction. The user cannot reply to your answer for clarification. Therefore, your response MUST be final, self-contained, and as comprehensive as possible based on the provided context.
+    //                 Also, keep the response as short as possible, but do not truncate the context.
+    //
+    //                 # RULES
+    //                 1.  **Facts** Always do RAG search to find the relevant Elastic documentation.
+    //                 2.  **Strictly Grounded Answers:** You MUST base your answer 100% on the information from the search results. Do not use any of your pre-trained knowledge or any information outside of this context.
+    //                 3.  **Handle Ambiguity Gracefully:** Since you cannot ask clarifying questions, if the question is broad or ambiguous (e.g., "how to improve performance"), structure your answer to cover the different interpretations supported by the context.
+    //                     * Acknowledge the ambiguity. For example: "Your question about 'performance' can cover several areas. Based on the documentation, here are the key aspects:"
+    //                     * Organize the answer with clear headings for each aspect (e.g., "Indexing Performance," "Query Performance").
+    //                     * But if there is a similar or related topic in the docs you can mention it and link to it.
+    //                 4.  **Direct Answer First:** If the context directly and sufficiently answers a specific question, provide a clear, comprehensive, and well-structured answer.
+    //                     * Use Markdown for formatting (e.g., code blocks for configurations, bullet points for lists).
+    //                     * Use LaTeX for mathematical or scientific notations where appropriate (e.g., \`$E = mc^2$\`).
+    //                     * Make the answer as complete as possible, as this is the user's only response.
+    //                     * Keep the answer short and concise. We want to link users to the Elastic Documentation to find more information.
+    //                 5.  **Handling Incomplete Answers:** If the context contains relevant information but does not fully answer the question, you MUST follow this procedure:
+    //                     * Start by explicitly stating that you could not find a complete answer.
+    //                     * Then, summarize the related information you *did* find in the context, explaining how it might be helpful.
+    //                 6.  **Handling No Answer:** If the context is empty or completely irrelevant to the question, you MUST respond with the following, and nothing else:
+    //                     I was unable to find an answer to your question in the Elastic Documentation.
+    //
+    //                     For further assistance, you may want to:
+    //                     * Ask the community of experts at **discuss.elastic.co**.
+    //                     * If you have an Elastic subscription, contact our support engineers at **support.elastic.co**."
+    //                 7.  If you are 100% sure that something is not supported by Elastic, then say so.
+    //                 8.  **Tone:** Your tone should be helpful, professional, and confident. It is better to provide no answer (Rule #5) than an incorrect one.
+    //                     * Assume that the user is using Elastic for the first time.
+    //                     * Assume that the user is a beginner.
+    //                     * Assume that the user has a limited knowledge of Elastic
+    //                     * Explain unusual terminology, abbreviations, or acronyms.
+    //                     * Always try to cite relevant Elastic documentation.
+    //             `,
+    //         },
+    //         {
+    //             role: 'user',
+    //             message: question,
+    //         },
+    //     ],
+    //     threadId,
+    // })
 }
@@ -0,0 +1,18 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using Microsoft.Extensions.Logging;
+
+namespace Elastic.Documentation.Api.Core.AskAi;
+
+public class AskAiUsecase(IAskAiGateway<Stream> askAiGateway, ILogger<AskAiUsecase> logger)
+{
+	public async Task<Stream> AskAi(AskAiRequest askAiRequest, Cancel ctx)
+	{
+		logger.LogDebug("Processing AskAiRequest: {Request}", askAiRequest);
+		return await askAiGateway.AskAi(askAiRequest, ctx);
+	}
+}
+
+public record AskAiRequest(string Message, string? ThreadId);
@@ -0,0 +1,10 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+namespace Elastic.Documentation.Api.Core.AskAi;
+
+public interface IAskAiGateway<T>
+{
+	Task<T> AskAi(AskAiRequest askAiRequest, Cancel ctx = default);
+}
@@ -0,0 +1,15 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+    <PropertyGroup>
+        <TargetFramework>net9.0</TargetFramework>
+        <ImplicitUsings>enable</ImplicitUsings>
+        <Nullable>enable</Nullable>
+        <AssemblyName>Elastic.Documentation.Api.Core</AssemblyName>
+        <RootNamespace>Elastic.Documentation.Api.Core</RootNamespace>
+    </PropertyGroup>
+
+    <ItemGroup>
+      <PackageReference Include="Microsoft.Extensions.Logging" />
+    </ItemGroup>
+
+</Project>
@@ -0,0 +1,13 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using System.Text.Json.Serialization;
+using Elastic.Documentation.Api.Core.AskAi;
+
+namespace Elastic.Documentation.Api.Core;
+
+
+[JsonSerializable(typeof(AskAiRequest))]
+[JsonSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)]
+public partial class ApiJsonContext : JsonSerializerContext;