elastic · reakaleek · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025
@@ -246,7 +246,9 @@ dotnet_diagnostic.IDE0305.severity = none
 # CS8509 already warns
 dotnet_diagnostic.IDE0072.severity = none
 
-
+[src/api/Elastic.Documentation.Api.Lambda/**.cs]
+dotnet_diagnostic.IL3050.severity = none
+dotnet_diagnostic.IL2026.severity = none
 
 [DocumentationWebHost.cs]
 dotnet_diagnostic.IL3050.severity = none

@@ -0,0 +1,41 @@
+---
+# This workflow is used to build the API lambda
+# lambda function bootstrap binary that can be deployed to AWS Lambda.
+name: Build API Lambda
+
+on:
+  workflow_dispatch: 
+  workflow_call: 
+    inputs:
+      ref:
+        required: false
+        type: string
+        default: ${{ github.ref }}
+
+jobs: 
+  build:
+    runs-on: ubuntu-latest
+    env:
+      BINARY_PATH: .artifacts/Elastic.Documentation.Api.Lambda/release_linux-x64/bootstrap
+    steps:
+      - uses: actions/checkout@v4
+        with: 
+          ref: ${{ inputs.ref }}
+      - name: Amazon Linux 2023 build
+        run: |
+          docker build . -t api-lambda:latest -f src/api/Elastic.Documentation.Api.Lambda/Dockerfile
+      - name: Get bootstrap binary
+        run: |
+          docker cp $(docker create --name tc api-lambda:latest):/app/.artifacts/publish ./.artifacts && docker rm tc
+      - name: Inspect bootstrap binary
+        run: |
+          tree .artifacts
+          stat "${BINARY_PATH}"
+      - name: Archive artifact
+        id: upload-artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: api-lambda-binary
+          retention-days: 1
+          if-no-files-found: error
+          path: ${{ env.BINARY_PATH }}
@@ -32,9 +32,12 @@ jobs:
       - name: Validate Content Sources
         run: dotnet run --project src/tooling/docs-assembler -c release -- content-source validate
 
-  build-lambda:
+  build-link-index-updater-lambda:
     uses: ./.github/workflows/build-link-index-updater-lambda.yml
-
+
+  build-api-lambda:
+    uses: ./.github/workflows/build-api-lambda.yml
+
   npm:
     runs-on: ubuntu-latest
     defaults: 

@@ -8,6 +8,7 @@
   </ItemGroup>
   <!-- AWS -->
   <ItemGroup>
+    <PackageVersion Include="Amazon.Lambda.AspNetCoreServer.Hosting" Version="1.9.0" />
     <PackageVersion Include="Amazon.Lambda.RuntimeSupport" Version="1.13.0" />
     <PackageVersion Include="Amazon.Lambda.Core" Version="2.5.1" />
     <PackageVersion Include="Amazon.Lambda.S3Events" Version="3.1.0" />
@@ -18,6 +19,7 @@
     <PackageVersion Include="AWSSDK.S3" Version="4.0.0.1" />
     <PackageVersion Include="FakeItEasy" Version="8.3.0" />
     <PackageVersion Include="Elastic.Ingest.Elasticsearch" Version="0.11.3" />
+    <PackageVersion Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.4" />
     <PackageVersion Include="Microsoft.OpenApi" Version="2.0.0-preview9" />
     <PackageVersion Include="System.Text.Json" Version="9.0.5" />
   </ItemGroup>
@@ -70,4 +72,4 @@
     </PackageVersion>
     <PackageVersion Include="xunit.v3" Version="2.0.2" />
   </ItemGroup>
-</Project>
+</Project>
@@ -119,6 +119,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "config", "config", "{6FAB56
 		config\navigation.yml = config\navigation.yml
 	EndProjectSection
 EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "api", "api", "{B042CC78-5060-4091-B95A-79C71BA3908A}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Api.Core", "src\api\Elastic.Documentation.Api.Core\Elastic.Documentation.Api.Core.csproj", "{F30B90AD-1A01-4A6F-9699-809FA6875B22}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Api.Infrastructure", "src\api\Elastic.Documentation.Api.Infrastructure\Elastic.Documentation.Api.Infrastructure.csproj", "{AE3FC78E-167F-4B6E-88EC-84743EB748B7}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Elastic.Documentation.Api.Lambda", "src\api\Elastic.Documentation.Api.Lambda\Elastic.Documentation.Api.Lambda.csproj", "{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -204,6 +212,18 @@ Global
 		{164F55EC-9412-4CD4-81AD-3598B57632A6}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{164F55EC-9412-4CD4-81AD-3598B57632A6}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{164F55EC-9412-4CD4-81AD-3598B57632A6}.Release|Any CPU.Build.0 = Release|Any CPU
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22}.Release|Any CPU.Build.0 = Release|Any CPU
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7}.Release|Any CPU.Build.0 = Release|Any CPU
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(NestedProjects) = preSolution
 		{4D198E25-C211-41DC-9E84-B15E89BD7048} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
@@ -234,5 +254,9 @@ Global
 		{89B83007-71E6-4B57-BA78-2544BFA476DB} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
 		{111E7029-BB29-4039-9B45-04776798A8DD} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
 		{164F55EC-9412-4CD4-81AD-3598B57632A6} = {67B576EE-02FA-4F9B-94BC-3630BC09ECE5}
+		{B042CC78-5060-4091-B95A-79C71BA3908A} = {BE6011CC-1200-4957-B01F-FCCA10C5CF5A}
+		{F30B90AD-1A01-4A6F-9699-809FA6875B22} = {B042CC78-5060-4091-B95A-79C71BA3908A}
+		{AE3FC78E-167F-4B6E-88EC-84743EB748B7} = {B042CC78-5060-4091-B95A-79C71BA3908A}
+		{C6A121C5-DEB1-4FCE-9140-AF144EA98EEE} = {B042CC78-5060-4091-B95A-79C71BA3908A}
 	EndGlobalSection
 EndGlobal
@@ -4,25 +4,12 @@ import { EventSourceMessage } from '@microsoft/fetch-event-source'
 import { useEffect, useState, useRef, useCallback } from 'react'
 import * as z from 'zod'
 
-export const LlmGatewayRequestSchema = z.object({
-    userContext: z.object({
-        userEmail: z.string(),
-    }),
-    platformContext: z.object({
-        origin: z.literal('support_portal'),
-        useCase: z.literal('support_assistant'),
-        metadata: z.any(),
-    }),
-    input: z.array(
-        z.object({
-            role: z.string(),
-            message: z.string(),
-        })
-    ),
-    threadId: z.string(),
+export const AskAiRequestSchema = z.object({
+    message: z.string(),
+    threadId: z.string().optional(),
 })
 
-export type LlmGatewayRequest = z.infer<typeof LlmGatewayRequestSchema>
+export type AskAiRequest = z.infer<typeof AskAiRequestSchema>
 
 const sharedAttributes = {
     timestamp: z.number(),
@@ -154,8 +141,8 @@ export const useLlmGateway = (props: Props): UseLlmGatewayResponse => {
         [processMessage]
     )
 
-    const { sendMessage, abort } = useFetchEventSource<LlmGatewayRequest>({
-        apiEndpoint: '/chat',
+    const { sendMessage, abort } = useFetchEventSource<AskAiRequest>({
+        apiEndpoint: '/_api/v1/ask-ai/stream',
         onMessage,
         onError: (error) => {
             setError(error)
@@ -221,64 +208,12 @@ export const useLlmGateway = (props: Props): UseLlmGatewayResponse => {
     }
 }
 
-function createLlmGatewayRequest(question: string, threadId?: string) {
-    // TODO: we should move this to the backend so that the use cannot change this
-    // Right now, the backend is a pure proxy to the LLM gateway
-    return LlmGatewayRequestSchema.parse({
-        userContext: {
-            userEmail: `elastic-docs-v3@invalid`, // Random email (will be optional in the future)
-        },
-        platformContext: {
-            origin: 'support_portal',
-            useCase: 'support_assistant',
-            metadata: {},
-        },
-        input: [
-            {
-                role: 'user',
-                message: `
-                    # ROLE AND GOAL
-                    You are an expert AI assistant for the Elastic Stack (Elasticsearch, Kibana, Beats, Logstash, etc.). Your sole purpose is to answer user questions based *exclusively* on the provided context from the official Elastic Documentation.
-
-                    # CRITICAL INSTRUCTION: SINGLE-SHOT INTERACTION
-                    This is a single-turn interaction. The user cannot reply to your answer for clarification. Therefore, your response MUST be final, self-contained, and as comprehensive as possible based on the provided context.
-                    Also, keep the response as short as possible, but do not truncate the context.
-
-                    # RULES
-                    1.  **Facts** Always do RAG search to find the relevant Elastic documentation.
-                    2.  **Strictly Grounded Answers:** You MUST base your answer 100% on the information from the search results. Do not use any of your pre-trained knowledge or any information outside of this context.
-                    3.  **Handle Ambiguity Gracefully:** Since you cannot ask clarifying questions, if the question is broad or ambiguous (e.g., "how to improve performance"), structure your answer to cover the different interpretations supported by the context.
-                        * Acknowledge the ambiguity. For example: "Your question about 'performance' can cover several areas. Based on the documentation, here are the key aspects:"
-                        * Organize the answer with clear headings for each aspect (e.g., "Indexing Performance," "Query Performance").
-                        * But if there is a similar or related topic in the docs you can mention it and link to it.
-                    4.  **Direct Answer First:** If the context directly and sufficiently answers a specific question, provide a clear, comprehensive, and well-structured answer.
-                        * Use Markdown for formatting (e.g., code blocks for configurations, bullet points for lists).
-                        * Use LaTeX for mathematical or scientific notations where appropriate (e.g., \`$E = mc^2$\`).
-                        * Make the answer as complete as possible, as this is the user's only response.
-                        * Keep the answer short and concise. We want to link users to the Elastic Documentation to find more information.
-                    5.  **Handling Incomplete Answers:** If the context contains relevant information but does not fully answer the question, you MUST follow this procedure:
-                        * Start by explicitly stating that you could not find a complete answer.
-                        * Then, summarize the related information you *did* find in the context, explaining how it might be helpful.
-                    6.  **Handling No Answer:** If the context is empty or completely irrelevant to the question, you MUST respond with the following, and nothing else:
-                        I was unable to find an answer to your question in the Elastic Documentation.
-
-                        For further assistance, you may want to:
-                        * Ask the community of experts at **discuss.elastic.co**.
-                        * If you have an Elastic subscription, contact our support engineers at **support.elastic.co**."
-                    7.  If you are 100% sure that something is not supported by Elastic, then say so.
-                    8.  **Tone:** Your tone should be helpful, professional, and confident. It is better to provide no answer (Rule #5) than an incorrect one.
-                        * Assume that the user is using Elastic for the first time.
-                        * Assume that the user is a beginner.
-                        * Assume that the user has a limited knowledge of Elastic
-                        * Explain unusual terminology, abbreviations, or acronyms.
-                        * Always try to cite relevant Elastic documentation.
-                `,
-            },
-            {
-                role: 'user',
-                message: question,
-            },
-        ],
+function createLlmGatewayRequest(
+    message: string,
+    threadId?: string
+): AskAiRequest {
+    return AskAiRequestSchema.parse({
+        message,
         threadId,
     })
 }
@@ -0,0 +1,38 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using Microsoft.Extensions.Logging;
+
+namespace Elastic.Documentation.Api.Core.AskAi;
+
+public class AskAiUsecase(IAskAiGateway<Stream> askAiGateway, ILogger<AskAiUsecase> logger)
+{
+	public async Task<Stream> AskAi(AskAiRequest askAiRequest, Cancel ctx)
+	{
+		logger.LogDebug("Processing AskAiRequest: {Request}", askAiRequest);
+		return await askAiGateway.AskAi(askAiRequest, ctx);
+	}
+}
+
+public record AskAiRequest(string Message, string? ThreadId)
+{
+	public static string SystemPrompt =>
+		"""
+		Role: You are a specialized AI assistant designed to answer user questions exclusively from a set of provided documentation. Your primary purpose is to retrieve, synthesize, and present information directly from these documents.
+
+		## Core Directives:
+
+		- Source of Truth: Your only source of information is the document content provided to you for each user query. You must not use any pre-trained knowledge or external information.
+		- Answering Style: Answer the user's question directly and comprehensively. As the user cannot ask follow-up questions, your response must be a complete, self-contained answer to their query. Do not start with phrases like "Based on the documents..."—simply provide the answer.
+		- Handling Unknowns: If the information required to answer the question is not present in the provided documents, you must explicitly state that the answer cannot be found. Do not attempt to guess, infer, or provide a general response.
+		- Helpful Fallback: If you cannot find a direct answer, you may suggest and link to a few related or similar topics that are present in the documentation. This provides value even when a direct answer is unavailable.
+		- Output Format: Your final response should be a single, coherent block of text.
+
+		## Negative Constraints:
+
+		- Do not mention that you are a language model or AI.
+		- Do not provide answers based on your general knowledge.
+		- Do not ask the user for clarification.
+		""";
+}
@@ -0,0 +1,10 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+namespace Elastic.Documentation.Api.Core.AskAi;
+
+public interface IAskAiGateway<T>
+{
+	Task<T> AskAi(AskAiRequest askAiRequest, Cancel ctx = default);
+}
@@ -0,0 +1,15 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+    <PropertyGroup>
+        <TargetFramework>net9.0</TargetFramework>
+        <ImplicitUsings>enable</ImplicitUsings>
+        <Nullable>enable</Nullable>
+        <AssemblyName>Elastic.Documentation.Api.Core</AssemblyName>
+        <RootNamespace>Elastic.Documentation.Api.Core</RootNamespace>
+    </PropertyGroup>
+
+    <ItemGroup>
+      <PackageReference Include="Microsoft.Extensions.Logging" />
+    </ItemGroup>
+
+</Project>
@@ -0,0 +1,13 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using System.Text.Json.Serialization;
+using Elastic.Documentation.Api.Core.AskAi;
+
+namespace Elastic.Documentation.Api.Core;
+
+
+[JsonSerializable(typeof(AskAiRequest))]
+[JsonSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)]
+public partial class ApiJsonContext : JsonSerializerContext;
@@ -0,0 +1,66 @@
+// Licensed to Elasticsearch B.V under one or more agreements.
+// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
+// See the LICENSE file in the project root for more information
+
+using System.Text;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using Elastic.Documentation.Api.Core.AskAi;
+using Elastic.Documentation.Api.Infrastructure.Gcp;
+using Microsoft.Extensions.Options;
+
+namespace Elastic.Documentation.Api.Infrastructure.Adapters.AskAi;
+
+public class LlmGatewayAskAiGateway(HttpClient httpClient, GcpIdTokenProvider tokenProvider, IOptionsSnapshot<LlmGatewayOptions> options) : IAskAiGateway<Stream>
+{
+	public async Task<Stream> AskAi(AskAiRequest askAiRequest, Cancel ctx = default)
+	{
+		var llmGatewayRequest = LlmGatewayRequest.CreateFromRequest(askAiRequest);
+		var requestBody = JsonSerializer.Serialize(llmGatewayRequest, LlmGatewayContext.Default.LlmGatewayRequest);
+		var request = new HttpRequestMessage(HttpMethod.Post, options.Value.FunctionUrl)
+		{
+			Content = new StringContent(requestBody, Encoding.UTF8, "application/json")
+		};
+		var authToken = await tokenProvider.GenerateIdTokenAsync(ctx);
+		request.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", authToken);
+		request.Headers.Add("User-Agent", "elastic-docs-proxy/1.0");
+		request.Headers.Add("Accept", "text/event-stream");
+		request.Content.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue("application/json");
+		var response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, ctx);
+		return await response.Content.ReadAsStreamAsync(ctx);
+	}
+}
+
+public record LlmGatewayRequest(
+	UserContext UserContext,
+	PlatformContext PlatformContext,
+	ChatInput[] Input,
+	string ThreadId
+)
+{
+	public static LlmGatewayRequest CreateFromRequest(AskAiRequest request) =>
+		new(
+			UserContext: new UserContext("elastic-docs-v3@invalid"),
+			PlatformContext: new PlatformContext("support_portal", "support_assistant", []),
+			Input:
+			[
+				new ChatInput("system", AskAiRequest.SystemPrompt),
+				new ChatInput("user", request.Message)
+			],
+			ThreadId: request.ThreadId ?? "elastic-docs-" + Guid.NewGuid()
+		);
+}
+
+public record UserContext(string UserEmail);
+
+public record PlatformContext(
+	string Origin,
+	string UseCase,
+	Dictionary<string, object>? Metadata = null
+);
+
+public record ChatInput(string Role, string Message);
+
+[JsonSerializable(typeof(LlmGatewayRequest))]
+[JsonSourceGenerationOptions(PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)]
+internal sealed partial class LlmGatewayContext : JsonSerializerContext;