From e52188d73c3a38cdcec92f4890ce180f3f51b944 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Fri, 3 Jan 2025 12:26:00 +0100
Subject: [PATCH 1/9] wip

---
 .../images/search/rag-venn-diagram.svg        | 19 ++++++++++
 .../retrieval-augmented-generation.asciidoc   | 36 +++++++++++++++++++
 .../search-your-data.asciidoc                 |  1 +
 3 files changed, 56 insertions(+)
 create mode 100644 docs/reference/images/search/rag-venn-diagram.svg
 create mode 100644 docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
diff --git a/docs/reference/images/search/rag-venn-diagram.svg b/docs/reference/images/search/rag-venn-diagram.svg
new file mode 100644
index 0000000000000..cba6c0e6adf6b
--- /dev/null
+++ b/docs/reference/images/search/rag-venn-diagram.svg
@@ -0,0 +1,19 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 400">
+  <!-- Left circle (Information Retrieval) -->
+  <circle cx="220" cy="200" r="150" fill="#4A90E2" opacity="0.6"/>
+  
+  <!-- Right circle (Generative AI) -->
+  <circle cx="380" cy="200" r="150" fill="#50C878" opacity="0.6"/>
+  
+  <!-- Text labels -->
+  <text x="160" y="200" font-family="Arial" font-size="20" fill="#2C3E50" text-anchor="middle">Information
+    <tspan x="160" y="225">Retrieval</tspan>
+  </text>
+  
+  <text x="440" y="200" font-family="Arial" font-size="20" fill="#2C3E50" text-anchor="middle">Generative
+    <tspan x="440" y="225">AI</tspan>
+  </text>
+  
+  <!-- RAG label in intersection -->
+  <text x="300" y="200" font-family="Arial" font-size="28" font-weight="bold" fill="#2C3E50" text-anchor="middle">RAG</text>
+</svg>
\ No newline at end of file
diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
new file mode 100644
index 0000000000000..30b6df5e72579
--- /dev/null
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -0,0 +1,36 @@
+[rag-elasticsearch]
+== Retrieval augmented generation
+
+Retrieval augmented generation (RAG) is a technique that retrieves additional context  from an external datastore before prompting an LLM.
+This grounds the LLM with in-context learning.
+Compared to finetuning or continuous pretraining, RAG can be implemented faster and cheaper, and it has several advantages.
+
+image::images/search/rag-venn-diagram.svg[RAG sits at the intersection of information retrieval and generative AI, align=center, width=500]
+
+RAG sits at the intersection of information retrieval and generative AI.
+{es} is an excellent tool for implementing RAG, because it offers various retrieval capabilities, such as full-text search, vector search, and hybrid search.
+
+[discrete]
+[[rag-elasticsearch-advantages]]
+=== Advantages of RAG
+
+RAG has several advantages:
+
+* It enables grounding the LLM with additional, up-to-date and/or private data.
+* It is much cheaper and easier to maintain compared to finetuning or continuously pretraining a model.
+* It ensures data privacy and security because you control what data the model sees. Different indices have different access controls.
+* You can rely on the language model to parse and format the retrieved context in a style or format of your choice.
+* You can start with a simple BM25-based full-text search system and gradually improve it by adding more advanced semantic and hybrid search capabilities.
+
+[discrete]
+[[rag-elasticsearch-example]]
+=== Example
+
+Here's a simple example of a RAG system using {es}, where a user has a question about the company travel policy:
+
+1. User makes natural language queries about company travel policy
+2. System retrieves relevant documents from {es}
+3. LLM generates response using retrieved context
+
+The result is accurate, up-to-date answers based on company documents.
+
diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc
index 9ef1ae0ebc59b..b38af1fffca25 100644
--- a/docs/reference/search/search-your-data/search-your-data.asciidoc
+++ b/docs/reference/search/search-your-data/search-your-data.asciidoc
@@ -48,6 +48,7 @@ include::../../how-to/recipes.asciidoc[]
 include::retrievers-overview.asciidoc[]
 include::knn-search.asciidoc[]
 include::semantic-search.asciidoc[]
+include::retrieval-augmented-generation.asciidoc[]
 include::search-across-clusters.asciidoc[]
 include::search-with-synonyms.asciidoc[]
 include::search-application-overview.asciidoc[]

From da060e9632164ca3413312a521a5647e52cedfbc Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Mon, 6 Jan 2025 16:57:35 +0100
Subject: [PATCH 2/9] Refactor, add workflow diagram, links

---
 docs/reference/images/search/rag-schema.svg   | 85 +++++++++++++++++++
 .../retrieval-augmented-generation.asciidoc   | 71 ++++++++++++----
 2 files changed, 141 insertions(+), 15 deletions(-)
 create mode 100644 docs/reference/images/search/rag-schema.svg

diff --git a/docs/reference/images/search/rag-schema.svg b/docs/reference/images/search/rag-schema.svg
new file mode 100644
index 0000000000000..657c3bf541ef3
--- /dev/null
+++ b/docs/reference/images/search/rag-schema.svg
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg viewBox="-20 -80 840 380" xmlns="http://www.w3.org/2000/svg">
+    <!-- Title -->
+    <text x="50" y="-55" text-anchor="start" font-family="Arial" font-size="16" font-weight="bold" fill="#333">
+        Retrieval Augmented Generation with Elasticsearch
+    </text>
+
+    <!-- Arrow markers definition -->
+    <defs>
+        <marker id="arrowhead" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
+            <polygon points="0 0, 10 3.5, 0 7" fill="#666"/>
+        </marker>
+        <marker id="arrowhead-blue" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
+            <polygon points="0 0, 10 3.5, 0 7" fill="#0066cc"/>
+        </marker>
+    </defs>
+    
+    <!-- Custom Instructions component -->
+    <g>
+        <text x="510" y="-15" text-anchor="middle" font-family="Arial" font-size="8" fill="#666">Define how the model should</text>
+        <text x="510" y="-5" text-anchor="middle" font-family="Arial" font-size="8" fill="#666">parse and render information</text>
+        <rect x="450" y="5" width="120" height="35" rx="10" fill="#e8f0f9" stroke="#0066cc"/>
+        <text x="510" y="25" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Custom instructions</text>
+        <line x1="510" y1="40" x2="510" y2="95" stroke="#0066cc" stroke-width="2" stroke-dasharray="2" marker-end="url(#arrowhead-blue)"/>
+    </g>
+
+    <!-- Search Strategy component -->
+    <g>
+        <text x="310" y="-15" text-anchor="middle" font-family="Arial" font-size="8" fill="#666">Use full-text, semantic, or hybrid search</text>
+        <rect x="250" y="5" width="120" height="35" rx="10" fill="#e8f0f9" stroke="#0066cc"/>
+        <text x="310" y="25" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Search strategy</text>
+        <line x1="310" y1="40" x2="310" y2="95" stroke="#0066cc" stroke-width="2" stroke-dasharray="2" marker-end="url(#arrowhead-blue)"/>
+    </g>
+
+    <!-- Number circles - positioned uniformly -->
+    <g>
+        <circle cx="40" cy="85" r="10" fill="#333"/>
+        <text x="40" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">1</text>
+        
+        <circle cx="240" cy="85" r="10" fill="#333"/>
+        <text x="240" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">2</text>
+        
+        <circle cx="440" cy="85" r="10" fill="#333"/>
+        <text x="440" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">3</text>
+        
+        <circle cx="640" cy="85" r="10" fill="#333"/>
+        <text x="640" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">4</text>
+    </g>
+
+    <!-- Main flow components -->
+    <g>
+        <!-- Input component -->
+        <path d="M50,100 h90 a30,30 0 0 1 0,60 h-90 a30,30 0 0 1 0,-60" fill="#edf7ec" stroke="#006400"/>
+        <text x="95" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#006400">User query</text>
+    </g>
+
+    <g>
+        <!-- Search component -->
+        <rect x="250" y="100" width="120" height="60" rx="10" fill="#f0f0f0" stroke="#000"/>
+        <text x="310" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Elasticsearch</text>
+        <text x="310" y="180" text-anchor="middle" font-family="Arial" font-size="10" fill="#666">
+            <tspan font-weight="bold">Retrieves</tspan> relevant</text>
+        <text x="310" y="195" text-anchor="middle" font-family="Arial" font-size="10" fill="#666">documents</text>
+    </g>
+
+    <g>
+        <!-- Processing component -->
+        <rect x="450" y="100" width="120" height="60" rx="10" fill="#f0f0f0" stroke="#000"/>
+        <text x="510" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Language model</text>
+        <text x="510" y="180" text-anchor="middle" font-family="Arial" font-size="10" fill="#666">Processes context &amp;</text>
+        <text x="510" y="195" text-anchor="middle" font-family="Arial" font-size="10" fill="#666">
+            <tspan font-weight="bold">generates</tspan> answer</text>
+    </g>
+
+    <g>
+        <!-- Output component -->
+        <path d="M710,100 l60,30 l-60,30 l-60,-30 z" fill="#edf7ec" stroke="#006400"/>
+        <text x="710" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#006400">Response</text>
+    </g>
+    
+    <!-- Flow connections -->
+    <line x1="170" y1="130" x2="240" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+    <line x1="370" y1="130" x2="440" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+    <line x1="570" y1="130" x2="640" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+</svg>
\ No newline at end of file
diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
index 30b6df5e72579..9f371cb984981 100644
--- a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -1,13 +1,18 @@
 [rag-elasticsearch]
 == Retrieval augmented generation
 
-Retrieval augmented generation (RAG) is a technique that retrieves additional context  from an external datastore before prompting an LLM.
-This grounds the LLM with in-context learning.
+.🍿 Prefer a video introduction?
+***********************
+Check out https://www.youtube.com/watch?v=OS4ZefUPAks[this short video] from the Elastic Snackable Series.
+***********************
+
+Retrieval augmented generation (RAG) is a technique where additional context is retrieved from an external datastore before prompting a language model to generate a response using the retrieved context.
+This grounds the model with in-context learning.
 Compared to finetuning or continuous pretraining, RAG can be implemented faster and cheaper, and it has several advantages.
 
 image::images/search/rag-venn-diagram.svg[RAG sits at the intersection of information retrieval and generative AI, align=center, width=500]
 
-RAG sits at the intersection of information retrieval and generative AI.
+RAG sits at the intersection of https://www.elastic.co/what-is/information-retrieval[information retrieval] and generative AI.
 {es} is an excellent tool for implementing RAG, because it offers various retrieval capabilities, such as full-text search, vector search, and hybrid search.
 
 [discrete]
@@ -16,21 +21,57 @@ RAG sits at the intersection of information retrieval and generative AI.
 
 RAG has several advantages:
 
-* It enables grounding the LLM with additional, up-to-date and/or private data.
-* It is much cheaper and easier to maintain compared to finetuning or continuously pretraining a model.
-* It ensures data privacy and security because you control what data the model sees. Different indices have different access controls.
-* You can rely on the language model to parse and format the retrieved context in a style or format of your choice.
-* You can start with a simple BM25-based full-text search system and gradually improve it by adding more advanced semantic and hybrid search capabilities.
+* *Improved context:* Enables grounding the LLM with additional, up-to-date, and/or private data.
+* *Reduced hallucination:* Helps minimize factual errors by enabling models to cite authoritative sources.
+* *Cost efficiency:* Requires less maintenance compared to finetuning or continuously pretraining models.
+* *Enhanced security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control, and field/document-level security.
+* *Simplified response parsing:* Eliminates the need for custom parsing logic by letting the language model handle parsing {es} responses and formatting the retrieved context.
+* *Flexible implementation:* Works with basic 
+// TODO: uncomment when page is live <<full-text-search,full-text search>> 
+full-text search and can be gradually updated to use advanced <<semantic-search,semantic search>> and hybrid search capabilities.
+
+[discrete]
+[[rag-elasticsearch-components]]
+=== RAG system overview
+
+The following diagram illustrates a simple RAG system using {es}.
+
+image::images/search/rag-schema.svg[Components of a simple RAG system using Elasticsearch, align=center, width=800]
+
+The system consists of the following components:
+
+. User submits a query
+. Elasticsearch retrieves relevant documents, using full-text search, vector search, or hybrid search
+. Language model processes the context and generates a response, using custom instructions, such as "Cite a source" or "Provide a concise summary of the `content` field in markdown format"
+. Model returns final response to the user
+
+[TIP]
+====
+A more advanced setup might include query rewriting between steps 1 and 2. This intermediate step could use one or more additional language models with different instructions to reformulate queries for more specific and detailed responses.
+====
 
 [discrete]
-[[rag-elasticsearch-example]]
-=== Example
+[[rag-elasticsearch-getting-started]]
+=== Getting started
+
+Start building RAG applications quickly with Playground, which seamlessly integrates {es} with language model providers.
+The Playground UI enables you to build, test, and deploy RAG interfaces on top of your {es} indices.
+
+Playground automatically selects the best retrieval methods for your data, while providing full control over the final {es} queries and language model instructions.
+You can also download the underlying Python code to integrate with your existing applications.
+
+Learn more in the {kibana-ref}/playground.html[documentation] and 
+try the https://www.elastic.co/demo-gallery/ai-playground[interactive lab] for hands-on experience.
+
+[discrete]
+[[rag-elasticsearch-learn-more]]
+=== Learn more
+
+Learn more about building RAG systems using {es} in these blog posts:
 
-Here's a simple example of a RAG system using {es}, where a user has a question about the company travel policy:
+* https://www.elastic.co/blog/beyond-rag-basics-semantic-search-with-elasticsearch[Beyond RAG Basics: Advanced strategies for AI applications]
+* https://www.elastic.co/search-labs/blog/building-a-rag-system-with-gemma-hugging-face-elasticsearch[Building a RAG system with Gemma, Hugging Face, and Elasticsearch]
+* https://www.elastic.co/search-labs/blog/rag-agent-tool-elasticsearch-langchain[Building an agentic RAG tool with Elasticsearch and Langchain]
 
-1. User makes natural language queries about company travel policy
-2. System retrieves relevant documents from {es}
-3. LLM generates response using retrieved context
 
-The result is accurate, up-to-date answers based on company documents.
 

From 7b78a827f56d0c6357c32c53911587ba26cbf996 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Mon, 6 Jan 2025 17:01:46 +0100
Subject: [PATCH 3/9] Highlight can start cheap

---
 .../search-your-data/retrieval-augmented-generation.asciidoc    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
index 9f371cb984981..db86aa0088cad 100644
--- a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -28,7 +28,7 @@ RAG has several advantages:
 * *Simplified response parsing:* Eliminates the need for custom parsing logic by letting the language model handle parsing {es} responses and formatting the retrieved context.
 * *Flexible implementation:* Works with basic 
 // TODO: uncomment when page is live <<full-text-search,full-text search>> 
-full-text search and can be gradually updated to use advanced <<semantic-search,semantic search>> and hybrid search capabilities.
+full-text search and can be gradually updated to add more advanced and computationally intensive <<semantic-search,semantic search>> capabilities.
 
 [discrete]
 [[rag-elasticsearch-components]]

From d1b336208bdadd28b9a335bc528e6784f55286cc Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Mon, 6 Jan 2025 17:03:43 +0100
Subject: [PATCH 4/9] LLM -> LM because it needn't be large

---
 .../search-your-data/retrieval-augmented-generation.asciidoc    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
index db86aa0088cad..0d6a6533ed0f6 100644
--- a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -21,7 +21,7 @@ RAG sits at the intersection of https://www.elastic.co/what-is/information-retri
 
 RAG has several advantages:
 
-* *Improved context:* Enables grounding the LLM with additional, up-to-date, and/or private data.
+* *Improved context:* Enables grounding the language model with additional, up-to-date, and/or private data.
 * *Reduced hallucination:* Helps minimize factual errors by enabling models to cite authoritative sources.
 * *Cost efficiency:* Requires less maintenance compared to finetuning or continuously pretraining models.
 * *Enhanced security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control, and field/document-level security.

From f9d9e5fb646736088b12aea18ba57fdadc551329 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Mon, 6 Jan 2025 17:04:51 +0100
Subject: [PATCH 5/9] We're not in Oxford anymore

---
 .../search-your-data/retrieval-augmented-generation.asciidoc    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
index 0d6a6533ed0f6..2d54620430101 100644
--- a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -24,7 +24,7 @@ RAG has several advantages:
 * *Improved context:* Enables grounding the language model with additional, up-to-date, and/or private data.
 * *Reduced hallucination:* Helps minimize factual errors by enabling models to cite authoritative sources.
 * *Cost efficiency:* Requires less maintenance compared to finetuning or continuously pretraining models.
-* *Enhanced security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control, and field/document-level security.
+* *Enhanced security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control and field/document-level security.
 * *Simplified response parsing:* Eliminates the need for custom parsing logic by letting the language model handle parsing {es} responses and formatting the retrieved context.
 * *Flexible implementation:* Works with basic 
 // TODO: uncomment when page is live <<full-text-search,full-text search>> 

From 6045d6137d25c894cf9b177a6e334f3714b5c6f6 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Tue, 7 Jan 2025 13:11:09 +0100
Subject: [PATCH 6/9] Tweak diagrams

---
 docs/reference/images/search/rag-schema.svg   | 86 ++++++++-----------
 .../images/search/rag-venn-diagram.svg        |  4 +-
 2 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/docs/reference/images/search/rag-schema.svg b/docs/reference/images/search/rag-schema.svg
index 657c3bf541ef3..f26edac6c0077 100644
--- a/docs/reference/images/search/rag-schema.svg
+++ b/docs/reference/images/search/rag-schema.svg
@@ -1,10 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <svg viewBox="-20 -80 840 380" xmlns="http://www.w3.org/2000/svg">
-    <!-- Title -->
-    <text x="50" y="-55" text-anchor="start" font-family="Arial" font-size="16" font-weight="bold" fill="#333">
-        Retrieval Augmented Generation with Elasticsearch
-    </text>
-
     <!-- Arrow markers definition -->
     <defs>
         <marker id="arrowhead" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
@@ -17,69 +12,62 @@
     
     <!-- Custom Instructions component -->
     <g>
-        <text x="510" y="-15" text-anchor="middle" font-family="Arial" font-size="8" fill="#666">Define how the model should</text>
-        <text x="510" y="-5" text-anchor="middle" font-family="Arial" font-size="8" fill="#666">parse and render information</text>
-        <rect x="450" y="5" width="120" height="35" rx="10" fill="#e8f0f9" stroke="#0066cc"/>
-        <text x="510" y="25" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Custom instructions</text>
-        <line x1="510" y1="40" x2="510" y2="95" stroke="#0066cc" stroke-width="2" stroke-dasharray="2" marker-end="url(#arrowhead-blue)"/>
+        <text x="470" y="-15" text-anchor="middle" font-family="Arial" font-size="11" fill="#666">Define how the model should</text>
+        <text x="470" y="-5" text-anchor="middle" font-family="Arial" font-size="11" fill="#666">parse and render information</text>
+        <rect x="410" y="5" width="120" height="35" rx="10" fill="#e8f0f9" stroke="#0066cc"/>
+        <text x="470" y="25" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Custom instructions</text>
+        <line x1="470" y1="40" x2="470" y2="95" stroke="#0066cc" stroke-width="2" stroke-dasharray="2" marker-end="url(#arrowhead-blue)"/>
     </g>
 
     <!-- Search Strategy component -->
     <g>
-        <text x="310" y="-15" text-anchor="middle" font-family="Arial" font-size="8" fill="#666">Use full-text, semantic, or hybrid search</text>
+        <text x="310" y="-15" text-anchor="middle" font-family="Arial" font-size="11" fill="#666">Full-text, semantic</text>
+        <text x="310" y="-5" text-anchor="middle" font-family="Arial" font-size="11" fill="#666">or hybrid search</text>
         <rect x="250" y="5" width="120" height="35" rx="10" fill="#e8f0f9" stroke="#0066cc"/>
         <text x="310" y="25" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Search strategy</text>
         <line x1="310" y1="40" x2="310" y2="95" stroke="#0066cc" stroke-width="2" stroke-dasharray="2" marker-end="url(#arrowhead-blue)"/>
     </g>
 
-    <!-- Number circles - positioned uniformly -->
+    <!-- Number circles -->
     <g>
-        <circle cx="40" cy="85" r="10" fill="#333"/>
-        <text x="40" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">1</text>
+        <circle cx="150" cy="85" r="10" fill="#333"/>
+        <text x="150" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">1</text>
         
         <circle cx="240" cy="85" r="10" fill="#333"/>
         <text x="240" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">2</text>
         
-        <circle cx="440" cy="85" r="10" fill="#333"/>
-        <text x="440" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">3</text>
+        <circle cx="400" cy="85" r="10" fill="#333"/>
+        <text x="400" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">3</text>
         
-        <circle cx="640" cy="85" r="10" fill="#333"/>
-        <text x="640" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">4</text>
+        <circle cx="580" cy="85" r="10" fill="#333"/>
+        <text x="580" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">4</text>
     </g>
 
     <!-- Main flow components -->
-    <g>
-        <!-- Input component -->
-        <path d="M50,100 h90 a30,30 0 0 1 0,60 h-90 a30,30 0 0 1 0,-60" fill="#edf7ec" stroke="#006400"/>
-        <text x="95" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#006400">User query</text>
-    </g>
+    <!-- Input component -->
+    <path d="M150,115 h45 a15,15 0 0 1 0,30 h-45 a15,15 0 0 1 0,-30" fill="#edf7ec" stroke="#006400"/>
+    <text x="172" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#006400">User query</text>
 
-    <g>
-        <!-- Search component -->
-        <rect x="250" y="100" width="120" height="60" rx="10" fill="#f0f0f0" stroke="#000"/>
-        <text x="310" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Elasticsearch</text>
-        <text x="310" y="180" text-anchor="middle" font-family="Arial" font-size="10" fill="#666">
-            <tspan font-weight="bold">Retrieves</tspan> relevant</text>
-        <text x="310" y="195" text-anchor="middle" font-family="Arial" font-size="10" fill="#666">documents</text>
-    </g>
+    <!-- Search component -->
+    <rect x="250" y="100" width="120" height="60" rx="10" fill="#f0f0f0" stroke="#000"/>
+    <text x="310" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Elasticsearch</text>
+    <text x="310" y="180" text-anchor="middle" font-family="Arial" font-size="12" fill="#666">
+        <tspan font-weight="bold">Retrieves</tspan> relevant</text>
+    <text x="310" y="195" text-anchor="middle" font-family="Arial" font-size="12" fill="#666">documents</text>
 
-    <g>
-        <!-- Processing component -->
-        <rect x="450" y="100" width="120" height="60" rx="10" fill="#f0f0f0" stroke="#000"/>
-        <text x="510" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Language model</text>
-        <text x="510" y="180" text-anchor="middle" font-family="Arial" font-size="10" fill="#666">Processes context &amp;</text>
-        <text x="510" y="195" text-anchor="middle" font-family="Arial" font-size="10" fill="#666">
-            <tspan font-weight="bold">generates</tspan> answer</text>
-    </g>
+    <!-- Processing component -->
+    <rect x="410" y="100" width="120" height="60" rx="10" fill="#f0f0f0" stroke="#000"/>
+    <text x="470" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Language model</text>
+    <text x="470" y="180" text-anchor="middle" font-family="Arial" font-size="12" fill="#666">Processes context &amp;</text>
+    <text x="470" y="195" text-anchor="middle" font-family="Arial" font-size="12" fill="#666">
+        <tspan font-weight="bold">generates</tspan> answer</text>
 
-    <g>
-        <!-- Output component -->
-        <path d="M710,100 l60,30 l-60,30 l-60,-30 z" fill="#edf7ec" stroke="#006400"/>
-        <text x="710" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#006400">Response</text>
-    </g>
-    
-    <!-- Flow connections -->
-    <line x1="170" y1="130" x2="240" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
-    <line x1="370" y1="130" x2="440" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
-    <line x1="570" y1="130" x2="640" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+    <!-- Flow connections (arrows) -->
+    <line x1="210" y1="130" x2="250" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+    <line x1="370" y1="130" x2="410" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+    <line x1="530" y1="130" x2="570" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+
+    <!-- Output component -->
+    <path d="M585,115 h45 a15,15 0 0 1 0,30 h-45 a15,15 0 0 1 0,-30" fill="#edf7ec" stroke="#006400"/>
+    <text x="607" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#006400">Response</text>
 </svg>
\ No newline at end of file
diff --git a/docs/reference/images/search/rag-venn-diagram.svg b/docs/reference/images/search/rag-venn-diagram.svg
index cba6c0e6adf6b..9906aaefaba0c 100644
--- a/docs/reference/images/search/rag-venn-diagram.svg
+++ b/docs/reference/images/search/rag-venn-diagram.svg
@@ -6,8 +6,8 @@
   <circle cx="380" cy="200" r="150" fill="#50C878" opacity="0.6"/>
   
   <!-- Text labels -->
-  <text x="160" y="200" font-family="Arial" font-size="20" fill="#2C3E50" text-anchor="middle">Information
-    <tspan x="160" y="225">Retrieval</tspan>
+  <text x="160" y="210" font-family="Arial" font-size="20" fill="#2C3E50" text-anchor="middle">Information
+    <tspan x="160" y="235">retrieval</tspan>
   </text>
   
   <text x="440" y="200" font-family="Arial" font-size="20" fill="#2C3E50" text-anchor="middle">Generative

From ca54e5bdad4c3b94d53e39bcbba5cae4437bd579 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Tue, 7 Jan 2025 13:11:23 +0100
Subject: [PATCH 7/9] Updates per review

---
 .../retrieval-augmented-generation.asciidoc   | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
index 2d54620430101..bcb14632acd4e 100644
--- a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -6,9 +6,10 @@
 Check out https://www.youtube.com/watch?v=OS4ZefUPAks[this short video] from the Elastic Snackable Series.
 ***********************
 
-Retrieval augmented generation (RAG) is a technique where additional context is retrieved from an external datastore before prompting a language model to generate a response using the retrieved context.
-This grounds the model with in-context learning.
-Compared to finetuning or continuous pretraining, RAG can be implemented faster and cheaper, and it has several advantages.
+Retrieval Augmented Generation (RAG) is a technique for improving language model responses by grounding the model with additional, verifiable sources of information. It works by first retrieving relevant context from an external datastore, which is then added to the model's context window.
+
+RAG is a form of https://arxiv.org/abs/2301.00234[in-context learning], where the model learns from information provided at inference time.
+Compared to fine-tuning or continuous pre-training, RAG can be implemented more quickly and cheaply, and offers several advantages.
 
 image::images/search/rag-venn-diagram.svg[RAG sits at the intersection of information retrieval and generative AI, align=center, width=500]
 
@@ -23,12 +24,10 @@ RAG has several advantages:
 
 * *Improved context:* Enables grounding the language model with additional, up-to-date, and/or private data.
 * *Reduced hallucination:* Helps minimize factual errors by enabling models to cite authoritative sources.
-* *Cost efficiency:* Requires less maintenance compared to finetuning or continuously pretraining models.
+* *Cost efficiency:* Requires less maintenance compared to fine-tuning or continuously pre-training models.
 * *Enhanced security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control and field/document-level security.
 * *Simplified response parsing:* Eliminates the need for custom parsing logic by letting the language model handle parsing {es} responses and formatting the retrieved context.
-* *Flexible implementation:* Works with basic 
-// TODO: uncomment when page is live <<full-text-search,full-text search>> 
-full-text search and can be gradually updated to add more advanced and computationally intensive <<semantic-search,semantic search>> capabilities.
+* *Flexible implementation:* Works with basic `<<full-text-search,full-text search>>``, and can be gradually updated to add more advanced and computationally intensive <<semantic-search,semantic search>> capabilities.
 
 [discrete]
 [[rag-elasticsearch-components]]
@@ -36,14 +35,14 @@ full-text search and can be gradually updated to add more advanced and computati
 
 The following diagram illustrates a simple RAG system using {es}.
 
-image::images/search/rag-schema.svg[Components of a simple RAG system using Elasticsearch, align=center, width=800]
+image::images/search/rag-schema.svg[Components of a simple RAG system using Elasticsearch, align=center, role="stretch"]
 
-The system consists of the following components:
+The workflow is as follows:
 
-. User submits a query
-. Elasticsearch retrieves relevant documents, using full-text search, vector search, or hybrid search
-. Language model processes the context and generates a response, using custom instructions, such as "Cite a source" or "Provide a concise summary of the `content` field in markdown format"
-. Model returns final response to the user
+. The user submits a query.
+. Elasticsearch retrieves relevant documents using full-text search, vector search, or hybrid search.
+. The language model processes the context and generates a response, using custom instructions. Examples of custom instructions include "Cite a source" or "Provide a concise summary of the `content` field in markdown format."
+. The model returns the final response to the user.
 
 [TIP]
 ====
@@ -60,7 +59,7 @@ The Playground UI enables you to build, test, and deploy RAG interfaces on top o
 Playground automatically selects the best retrieval methods for your data, while providing full control over the final {es} queries and language model instructions.
 You can also download the underlying Python code to integrate with your existing applications.
 
-Learn more in the {kibana-ref}/playground.html[documentation] and 
+Learn more in the {kibana-ref}/playground.html[Playground documentation] and 
 try the https://www.elastic.co/demo-gallery/ai-playground[interactive lab] for hands-on experience.
 
 [discrete]

From 256ade86d28ad2402dab25c585d095c66b0cee29 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Tue, 7 Jan 2025 13:12:02 +0100
Subject: [PATCH 8/9] Uncomment link

---
 .../search-your-data/retrieval-augmented-generation.asciidoc    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
index bcb14632acd4e..a9a53e8758b6b 100644
--- a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -27,7 +27,7 @@ RAG has several advantages:
 * *Cost efficiency:* Requires less maintenance compared to fine-tuning or continuously pre-training models.
 * *Enhanced security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control and field/document-level security.
 * *Simplified response parsing:* Eliminates the need for custom parsing logic by letting the language model handle parsing {es} responses and formatting the retrieved context.
-* *Flexible implementation:* Works with basic `<<full-text-search,full-text search>>``, and can be gradually updated to add more advanced and computationally intensive <<semantic-search,semantic search>> capabilities.
+* *Flexible implementation:* Works with basic <<full-text-search,full-text search>>, and can be gradually updated to add more advanced and computationally intensive <<semantic-search,semantic search>> capabilities.
 
 [discrete]
 [[rag-elasticsearch-components]]

From 5049a29733c31190e93244745a6cffac43a57d20 Mon Sep 17 00:00:00 2001
From: Liam Thompson <leemthompo@gmail.com>
Date: Tue, 7 Jan 2025 13:27:50 +0100
Subject: [PATCH 9/9] Tweak USPs

---
 .../retrieval-augmented-generation.asciidoc                 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
index a9a53e8758b6b..2958999ede91d 100644
--- a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -14,18 +14,18 @@ Compared to fine-tuning or continuous pre-training, RAG can be implemented more
 image::images/search/rag-venn-diagram.svg[RAG sits at the intersection of information retrieval and generative AI, align=center, width=500]
 
 RAG sits at the intersection of https://www.elastic.co/what-is/information-retrieval[information retrieval] and generative AI.
-{es} is an excellent tool for implementing RAG, because it offers various retrieval capabilities, such as full-text search, vector search, and hybrid search.
+{es} is an excellent tool for implementing RAG, because it offers various retrieval capabilities, such as full-text search, vector search, and hybrid search, as well as other tools like filtering, aggregations, and security features.
 
 [discrete]
 [[rag-elasticsearch-advantages]]
 === Advantages of RAG
 
-RAG has several advantages:
+Implementing RAG with {es} has several advantages:
 
 * *Improved context:* Enables grounding the language model with additional, up-to-date, and/or private data.
 * *Reduced hallucination:* Helps minimize factual errors by enabling models to cite authoritative sources.
 * *Cost efficiency:* Requires less maintenance compared to fine-tuning or continuously pre-training models.
-* *Enhanced security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control and field/document-level security.
+* *Built-in security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control and field/document-level security.
 * *Simplified response parsing:* Eliminates the need for custom parsing logic by letting the language model handle parsing {es} responses and formatting the retrieved context.
 * *Flexible implementation:* Works with basic <<full-text-search,full-text search>>, and can be gradually updated to add more advanced and computationally intensive <<semantic-search,semantic search>> capabilities.