diff --git a/package-lock.json b/package-lock.json
index cc6eb6e..8802842 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,6 +9,7 @@
       "version": "0.0.9-beta-3",
       "dependencies": {
         "axios": "^1.1.2",
+        "ignore": "^7.0.4",
         "openai": "^4.80.1"
       },
       "devDependencies": {
@@ -1096,6 +1097,15 @@
         "ms": "^2.0.0"
       }
     },
+    "node_modules/ignore": {
+      "version": "7.0.4",
+      "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.4.tgz",
+      "integrity": "sha512-gJzzk+PQNznz8ysRrC0aOkBNVRBDtE1n53IqyqEf3PXrYwomFs5q4pGMizBMJF+ykh03insJ27hB8gSrD2Hn8A==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4"
+      }
+    },
     "node_modules/import-local": {
       "version": "3.2.0",
       "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.2.0.tgz",
diff --git a/package.json b/package.json
index 7a44f1f..5facd9b 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
   "name": "llama-vscode",
   "displayName": "llama-vscode",
   "description": "Local LLM-assisted text completion using llama.cpp",
-  "version": "0.0.9-beta-3",
+  "version": "0.0.10-beta-1",
   "publisher": "ggml-org",
   "repository": "https://github.com/ggml-org/llama.vscode",
   "engines": {
@@ -31,55 +31,55 @@
     "commands": [
       {
         "command": "extension.triggerInlineCompletion",
-        "title": "Trigger Inline Completion"
+        "title": "llama-vscode: Trigger Inline Completion"
       },
       {
         "command": "extension.triggerNoCacheCompletion",
-        "title": "Trigger No Cache Completion"
+        "title": "llama-vscode: Trigger No Cache Completion"
       },
       {
         "command": "extension.copyIntercept",
-        "title": "Copy Intercept"
+        "title": "llama-vscode: Copy Intercept"
       },
       {
         "command": "extension.cutIntercept",
-        "title": "Cut Intercept"
+        "title": "llama-vscode: Cut Intercept"
       },
       {
         "command": "extension.acceptFirstLine",
-        "title": "Accept First Line"
+        "title": "llama-vscode: Accept First Line"
       },
       {
         "command": "extension.acceptFirstWord",
-        "title": "Accept First Word"
+        "title": "llama-vscode: Accept First Word"
       },
       {
         "command": "extension.copyChunks",
-        "title": "Copy Chunks"
+        "title": "llama-vscode: Copy Chunks"
       },
       {
         "command": "extension.showMenu",
-        "title": "Show Menu"
+        "title": "llama-vscode: Show Menu"
       },
       {
         "command": "extension.askAi",
-        "title": "Ask AI"
+        "title": "llama-vscode: Ask AI"
       },
       {
         "command": "extension.askAiWithContext",
-        "title": "Ask AI With Context"
+        "title": "llama-vscode: Ask AI With Context"
       },
       {
         "command": "extension.editSelectedText",
-        "title": "Edit Selected Text with AI"
+        "title": "llama-vscode: Edit Selected Text with AI"
       },
       {
         "command": "extension.acceptTextEdit",
-        "title": "Accept Text Edit Suggestion"
+        "title": "llama-vscode: Accept Text Edit Suggestion"
       },
       {
         "command": "extension.rejectTextEdit",
-        "title": "Reject Text Edit Suggestion"
+        "title": "llama-vscode: Reject Text Edit Suggestion"
       }
     ],
     "keybindings": [
@@ -122,8 +122,7 @@
         "command": "extension.acceptFirstWord",
         "key": "ctrl+right",
         "when": "editorTextFocus && inlineSuggestionVisible"
-      }
-      ,
+      },
       {
         "command": "extension.showMenu",
         "key": "ctrl+shift+m",
@@ -169,6 +168,11 @@
           "default": "cd c:/ai ; ./llama-server.exe -m qwen2.5-coder-3b-instruct-q6_k.gguf -ngl 99 --port 8011 --path C:/llama.cpp/llama.cpp/examples/server/webui/dist",
           "description": "Shell command for starting chat llama.cpp server, executed from the menu"
         },
+        "llama-vscode.launch_embeddings": {
+          "type": "string",
+          "default": "cd c:/ai ; ./llama-server.exe -m all-MiniLM-L6-v2-Q8_0.gguf --port 8010",
+          "description": "Shell command for starting chat llama.cpp server, executed from the menu"
+        },
         "llama-vscode.launch_training_completion": {
           "type": "string",
           "default": "",
@@ -199,6 +203,11 @@
           "default": "http://127.0.0.1:8011",
           "description": "The URL to be used by the extension for chat with ai."
         },
+        "llama-vscode.endpoint_embeddings": {
+          "type": "string",
+          "default": "http://127.0.0.1:8010",
+          "description": "The URL to be used by the extension for creating embeddings."
+        },
         "llama-vscode.auto": {
           "type": "boolean",
           "default": true,
@@ -274,6 +283,46 @@
           "default": 1000,
           "description": "how often to process queued chunks in normal mode"
         },
+        "llama-vscode.rag_chunk_max_chars": {
+          "type": "number",
+          "default": 2000,
+          "description": "Max number of chars per RAG chunk"
+        },
+        "llama-vscode.rag_max_lines_per_chunk": {
+          "type": "number",
+          "default": 60,
+          "description": "Max number of lines per RAG chunk"
+        },
+        "llama-vscode.rag_max_chars_per_chunk_line": {
+          "type": "number",
+          "default": 300,
+          "description": "max chars for a chunk line, the rest of the line is cut"
+        },
+        "llama-vscode.rag_max_chunks": {
+          "type": "number",
+          "default": 30000,
+          "description": "max cunks for the RAG search"
+        },
+        "llama-vscode.rag_max_bm25_filter_chunks": {
+          "type": "number",
+          "default": 47,
+          "description": "max RAG chunks to filter with BM25 algorithm"
+        },
+        "llama-vscode.rag_max_embedding_filter_chunks": {
+          "type": "number",
+          "default": 5,
+          "description": "max RAG chunks to provide as context to the LLM"
+        },
+        "llama-vscode.rag_max_context_files": {
+          "type": "number",
+          "default": 3,
+          "description": "max number of complete files to send as context to the LLM"
+        },
+        "llama-vscode.rag_max_context_file_chars": {
+          "type": "number",
+          "default": 5000,
+          "description": "max chars for a context file. If the file is bigger it will be cut to avoid too big context."
+        },
         "llama-vscode.language": {
           "type": "string",
           "default": "en",
@@ -285,14 +334,14 @@
           "description": "Enable/disable completions"
         },
         "llama-vscode.languageSettings": {
-            "type": "object",
-            "default": {
-              "*": true
-            },
-            "additionalProperties": {
-              "type": "boolean"
-            },
-            "description": "Enable/disable suggestions for specific languages"
+          "type": "object",
+          "default": {
+            "*": true
+          },
+          "additionalProperties": {
+            "type": "boolean"
+          },
+          "description": "Enable/disable suggestions for specific languages"
         },
         "llama-vscode.use_openai_endpoint": {
           "type": "boolean",
@@ -326,6 +375,7 @@
   },
   "dependencies": {
     "axios": "^1.1.2",
+    "ignore": "^7.0.4",
     "openai": "^4.80.1"
   },
   "devDependencies": {
diff --git a/src/application.ts b/src/application.ts
index c2b5f30..5fcd275 100644
--- a/src/application.ts
+++ b/src/application.ts
@@ -9,6 +9,8 @@ import {Completion} from "./completion";
 import {Logger} from "./logger";
 import { ChatWithAi } from "./chat-with-ai";
 import { TextEditor } from "./text-editor";
+import { ChatContext } from "./chat-context";
+import { Prompts } from "./prompts";
 
 export class Application {
     private static instance: Application;
@@ -23,6 +25,8 @@ export class Application {
     public logger: Logger
     public askAi: ChatWithAi
     public textEditor: TextEditor
+    public chatContext: ChatContext
+    public prompts: Prompts
 
     private constructor() {
         this.extConfig = new Configuration()
@@ -36,6 +40,8 @@ export class Application {
         this.logger = new Logger(this)
         this.askAi = new ChatWithAi(this)
         this.textEditor = new TextEditor(this)
+        this.chatContext = new ChatContext(this)
+        this.prompts = new Prompts(this)
     }
 
     public static getInstance(): Application {
diff --git a/src/architect.ts b/src/architect.ts
index 0f3bc50..36d1234 100644
--- a/src/architect.ts
+++ b/src/architect.ts
@@ -1,4 +1,6 @@
 // TODO
+// При липсащ ембеддинг сървер да дава грешка, за да се разбира, че има проблем
+//
 // Ако се използва лора за чат сървера - да се подава в заявката от webui
 // Идеи
 // - Използване на агенти (?)
@@ -14,10 +16,56 @@ export class Architect {
         this.app = application;
     }
 
+    init = () => {
+        // Start indexing workspace files
+        if (this.app.extConfig.endpoint_embeddings.trim() != "") {
+            setTimeout(() => {
+                this.app.chatContext.indexWorkspaceFiles().catch(error => {
+                    console.error('Failed to index workspace files:', error);
+                });
+            }, 0);
+        }
+    }
+
+    setOnSaveDeleteFileForDb = (context: vscode.ExtensionContext) => {
+        const saveListener = vscode.workspace.onDidSaveTextDocument(async (document) => {
+            try {
+                if (!this.app.chatContext.isImageOrVideoFile(document.uri.toString())){
+                    // Update after a delay and only if the file is not changed in the meantime to avoid too often updates
+                    let updateTime = Date.now()
+                    let fileProperties = this.app.chatContext.getFileProperties(document.uri.toString())
+                    if (fileProperties) fileProperties.updated = updateTime;
+                    setTimeout(async () => {
+                        if (fileProperties && fileProperties.updated > updateTime ) {
+                            return;
+                        }
+                        this.app.chatContext.addDocument(document.uri.toString(), document.getText());
+                    }, 5000);
+                }
+            } catch (error) {
+                console.error('Failed to add document to RAG:', error);
+            }
+        });
+        context.subscriptions.push(saveListener);
+
+        // Add file delete listener for vector RAG
+        const deleteListener = vscode.workspace.onDidDeleteFiles(async (event) => {
+            for (const file of event.files) {
+                try {
+                    await this.app.chatContext.removeDocument(file.toString());
+                } catch (error) {
+                    console.error('Failed to remove document from RAG:', error);
+                }
+            }
+        });
+        context.subscriptions.push(deleteListener);
+    }
+
     setOnChangeConfiguration = (context: vscode.ExtensionContext) => {
         let configurationChangeDisp = vscode.workspace.onDidChangeConfiguration((event) => {
             const config = vscode.workspace.getConfiguration("llama-vscode");
             this.app.extConfig.updateOnEvent(event, config);
+            if (this.app.extConfig.isRagConfigChanged(event)) this.init()
             vscode.window.showInformationMessage(this.app.extConfig.getUiText(`llama-vscode extension is updated.`)??"");
         });
         context.subscriptions.push(configurationChangeDisp);
@@ -31,7 +79,7 @@ export class Architect {
                     this.app.extraContext.pickChunkAroundCursor(previousEditor.selection.active.line, previousEditor.document);
                 }, 0);
             }
-            
+
             if (editor) {
                 // Editor is now active in the UI, pick a chunk
                 let activeDocument = editor.document;
@@ -100,6 +148,17 @@ export class Architect {
         context.subscriptions.push(onSaveDocDisposable);
     }
 
+    setOnChangeWorkspaceFolders = (context: vscode.ExtensionContext) => {
+        // Listen for new workspace folders being added
+        context.subscriptions.push(
+            vscode.workspace.onDidChangeWorkspaceFolders(event => {
+                event.added.forEach(folder => {
+                    this.init();
+                });
+            })
+        );
+    }
+
     registerCommandManualCompletion = (context: vscode.ExtensionContext) => {
         const triggerManualCompletionDisposable = vscode.commands.registerCommand('extension.triggerInlineCompletion', async () => {
             // Manual triggering of the completion with a shortcut
@@ -147,7 +206,18 @@ export class Architect {
             if (this.app.lruResultCache.size() > 0){
                 completionCache = Array.from(this.app.lruResultCache.getMap().entries()).reduce((accumulator, [key, value]) => accumulator + "Key: " + key + "\nCompletion:\n" +  value + "\n\n" , "");
             }
-            vscode.env.clipboard.writeText("Events:\n" + eventLogsCombined + "\n\n------------------------------\n" + "Extra context: \n" + extraContext + "\n\n------------------------------\nCompletion cache: \n" + completionCache)
+            let firstChunks = ""
+            if (this.app.chatContext.entries.size > 0){
+                firstChunks = Array.from(this.app.chatContext.entries.entries()).slice(0,5).reduce((accumulator, [key, value]) => accumulator + "ID: " + key + "\nFile:\n" +  value.uri +
+                "\nfirst line:\n" +  value.firstLine +
+                "\nlast line:\n" +  value.lastLine +
+                "\nChunk:\n" +  value.content + "\n\n" , "");
+            }
+            vscode.env.clipboard.writeText("Events:\n" + eventLogsCombined +
+                 "\n\n------------------------------\n" +
+                 "Extra context: \n" + extraContext +
+                 "\n\n------------------------------\nCompletion cache: \n" + completionCache +
+                 "\n\n------------------------------\nChunks: \n" + firstChunks)
         });
         context.subscriptions.push(triggerCopyChunksDisposable);
     }
diff --git a/src/chat-context.ts b/src/chat-context.ts
new file mode 100644
index 0000000..3080b3d
--- /dev/null
+++ b/src/chat-context.ts
@@ -0,0 +1,403 @@
+import * as vscode from 'vscode';
+import { Application } from './application';
+import { Utils } from './utils';
+import * as fs from 'fs';
+import * as path from 'path';
+import ignore from 'ignore';
+
+interface ChunkEntry {
+    uri: string;
+    content: string;
+    firstLine: number;
+    lastLine: number;
+    hash: string;
+}
+
+interface FileProperties {
+    hash: string;
+    updated: number;
+}
+
+const filename = 'ghost.dat';
+
+export class ChatContext {
+    private app: Application;
+    private nextEntryId: number = 0;
+    public entries: Map<number, ChunkEntry>;
+    private filesProperties: Map<string, FileProperties>;
+
+    constructor(application: Application) {
+        this.app = application;
+        this.entries = new Map();
+        this.filesProperties = new Map();
+    }
+
+    public async init() {
+        vscode.window.showInformationMessage('Vector index initialized!');
+    }
+
+    public getRagContextChunks = async (prompt: string): Promise<ChunkEntry[]> => {
+        this.app.statusbar.showTextInfo(this.app.extConfig.getUiText("Extracting keywords from query..."))
+        let query = this.app.prompts.replaceOnePlaceholders(this.app.prompts.CHAT_GET_KEY_WORDS, "prompt", prompt)
+        let data = await this.app.llamaServer.getChatCompletion(query);
+                    if (!data || !data.choices[0].message.content) {
+                        vscode.window.showInformationMessage('No suggestions available');
+                        return [];
+                    }
+        let keywords = data.choices[0].message.content.trim().split("|");
+
+        // TODO the synonyms are not returned with good quality each time - words are repeated and sometimes are irrelevant
+        // Probably in future with better models will work better or probably with the previous prompt we could get synonyms as well
+
+
+        this.app.statusbar.showTextInfo(this.app.extConfig.getUiText("Filtering chunks step 1..."))
+        let topChunksBm25 = this.rankTexts(keywords, Array.from(this.entries.values()), this.app.extConfig.rag_max_bm25_filter_chunks)
+        let topContextChunks: ChunkEntry[];
+        if (this.app.extConfig.endpoint_embeddings.trim() != ""){
+            topContextChunks = await this.cosineSimilarityRank(query, topChunksBm25, this.app.extConfig.rag_max_embedding_filter_chunks);
+        } else {
+            vscode.window.showInformationMessage('No embeddings server. Filtering chunks step 2 will be skipped.');
+            this.app.statusbar.showTextInfo(this.app.extConfig.getUiText("Filtering chunks step 2..."))
+            topContextChunks = topChunksBm25.slice(0, 5);
+        }
+
+        this.app.statusbar.showTextInfo(this.app.extConfig.getUiText("Context chunks ready."))
+
+        return topContextChunks;
+    }
+
+    public getRagFilesContext = async (prompt: string): Promise<string> => {
+        let contextFiles = this.getFilesFromQuery(prompt)
+        let filesContext = ""
+        for (const fileName of contextFiles.slice(0, this.app.extConfig.rag_max_context_files)) {
+             let contextFile = Array.from(this.filesProperties).find(([key]) => key.toLocaleLowerCase().endsWith(fileName.toLocaleLowerCase()))
+             if (contextFile){
+                const [fileUrl, fileProperties] = contextFile;
+                const document = await vscode.workspace.openTextDocument(vscode.Uri.parse(fileUrl));
+                filesContext += "\n\n" + fileUrl + ":\n" + document.getText().slice(0, this.app.extConfig.rag_max_context_file_chars)
+             }
+        };
+        return filesContext;
+    }
+
+    public getContextChunksInPlainText = (chunksToSend: ChunkEntry[]) => {
+        let extraCont = "Here are pieces of code from different files of the project: \n" +
+        chunksToSend.reduce((accumulator, currentValue) => accumulator + currentValue.content + "\n\n", "");
+        return extraCont;
+    }
+
+    private cosineSimilarityRank = async (query: string, chunkEntries: ChunkEntry[], topN: number):Promise<ChunkEntry[]>  => {
+        const queryEmbedding = await this.getEmbedding(query);
+        let chunksWithScore = Array.from(chunkEntries)
+        .map((chunkEntry, index) => ({
+            entry: chunkEntry,
+            score: 0,
+        }));
+        const progressOptions = {
+            location: vscode.ProgressLocation.Notification,
+            title: this.app.extConfig.getUiText("Filtering chunks step 2..."),
+            cancellable: true
+        };
+        await vscode.window.withProgress(progressOptions, async (progress, token) => {
+            let processed = 0;
+            const total = chunksWithScore.length;
+            for (const entry of chunksWithScore) {
+                if (token.isCancellationRequested) {
+                    break;
+                }
+                processed++;
+                progress.report({
+                    // message: `Indexing ${vscode.workspace.asRelativePath(file)}`,
+                    increment: (1 / total) * 100
+                });
+                entry.score = await this.cosineSimilarity(queryEmbedding, entry.entry.content);
+            }
+        });
+
+        return chunksWithScore.sort((a, b) => b.score - a.score)
+        .slice(0, topN)
+        .map(({ entry: chunkEntry }) => chunkEntry);
+    }
+
+    private cosineSimilarity = async (a: number[], text: string): Promise<number> => {
+        let b = await this.getEmbedding(text)
+        if (!b || b.length == 0 || !a || a.length == 0) {
+            throw new Error("Error getting embeddings.");
+          }
+        if (!b || b.length == 0 || a.length !== b.length) {
+          throw new Error("Error - vectors must have the same length.");
+        }
+
+        let dotProduct = 0;
+        for (let i = 0; i < a.length; i++) {
+          dotProduct += a[i] * b[i];
+        }
+
+        const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
+        const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
+
+        if (magnitudeA === 0 || magnitudeB === 0) {
+          return 0;
+        }
+
+        // Calculate cosine similarity
+        return dotProduct / (magnitudeA * magnitudeB);
+      }
+
+    private rankTexts = (keywords: string[], chunkEntries: ChunkEntry[], topN: number): ChunkEntry[] => {
+        if (!keywords.length || !chunkEntries.length) return [];
+
+        const tokenizedDocs = chunkEntries.map(this.tokenizeChunkEntry);
+        const stats = Utils.computeBM25Stats(tokenizedDocs);
+        const queryTerms = Array.from(new Set(keywords.flatMap(this.tokenize)));
+
+        const sortedChunks = Array.from(chunkEntries)
+            .map((chunkEntry, index) => ({
+                entry: chunkEntry,
+                score: Utils.bm25Score(queryTerms, index, stats),
+            }))
+            .sort((a, b) => b.score - a.score)
+
+        const topChunks = sortedChunks.slice(0, topN)
+        return topChunks.map(({ entry: chunkEntry }) => chunkEntry);
+    }
+
+    private tokenizeChunkEntry = (chunkEntry: ChunkEntry): string[] => {
+        return chunkEntry.content.split(/([A-Z]?[a-z]+)|[_\-\.\s]+/)
+        .filter(Boolean) // Remove empty strings from the result
+        .map(word => word.toLowerCase());
+    }
+
+    private tokenize = (text: string): string[] => {
+        return text.split(/([A-Z]?[a-z]+)|[_\-\.\s]+/)
+        .filter(Boolean) // Remove empty strings from the result
+        .map(word => word.toLowerCase());
+    }
+
+    private async getEmbedding(text: string): Promise<number[]> {
+        try {
+            const output = await this.app.llamaServer.getEmbeddings(text);
+            if (output && output.data && output.data.length > 0) {
+                return Array.from(output.data[0].embedding);
+            } else {
+                console.error('Failed to generate embedding:');
+                return [];
+            }
+        } catch (error) {
+            console.error('Failed to generate embedding:', error);
+            return [];
+        }
+    }
+
+    isImageOrVideoFile = (filename: string): boolean => {
+        const imageExtensions = [
+            // image extensions
+            '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg', '.tiff',
+            // Standard video formats
+            '.mp4', '.mov', '.avi', '.wmv', '.flv', '.webm', '.mkv',
+            // High-quality formats
+            '.mpg', '.mpeg', '.m4v', '.vob', '.m2ts', '.prores', '.dnxhd',
+            // Specialized formats
+            '.mxf', '.ogv', '.3gp', '.3g2',
+            // Others
+            '.rm', '.swf', '.asf', '.divx',
+            // VR formats
+            '.360', '.vr'
+        ];
+        const lowerCaseFilename = filename.toLowerCase();
+        return imageExtensions.some(ext => lowerCaseFilename.endsWith(ext));
+    }
+
+    getFileProperties = (uri: string): FileProperties | undefined => {
+        return this.filesProperties.get(uri);
+    }
+
+    async addDocument(uri: string, content: string) {
+        try {
+            const hash = this.app.lruResultCache.getHash(content);
+            if (this.filesProperties.get(uri)?.hash === hash) {
+                return;
+            }
+            this.filesProperties.set(uri, {hash: hash, updated: Date.now()});
+
+            try {
+                this.removeChunkEntries(uri);
+            } catch (error) {
+                console.log('Failed delete element from RAG:', error);
+            }
+            // Split the content into chunks and add them
+            const lines = content.split(/\r?\n/);
+            for (let i = 0; i < lines.length; i+= this.app.extConfig.rag_max_lines_per_chunk) {
+                const startLine = i; // + this.app.extConfig.MAX_LINES_PER_RAG_CHUNK < lines.length ? i : Math.max(0, lines.length - this.app.extConfig.MAX_LINES_PER_RAG_CHUNK);
+                let endLine = Math.min(lines.length, i + this.app.extConfig.rag_max_lines_per_chunk);
+                let chunkLines = lines.slice(startLine, endLine);
+                let chunk = chunkLines.join('\n');
+                if (chunk.length > this.app.extConfig.rag_chunk_max_chars){
+                    chunk = "";
+                    let j = 0;
+                    let nextLine = this.getChunkLine(chunkLines, j);
+                    while (chunk.length + nextLine.length  + 1 < this.app.extConfig.rag_chunk_max_chars && j < chunkLines.length){
+                        chunk += "\n" + nextLine;
+                        j++;
+                        nextLine = this.getChunkLine(chunkLines, j);
+                    }
+                    endLine = startLine + j
+                    // Make sure next iteration starts after the last added line
+                    i = startLine + j - this.app.extConfig.rag_max_lines_per_chunk
+                }
+                // const embedding = await this.getEmbedding(chunk);
+                let chunkContent = "\nFile Name: "  + uri + "\nFrom line: " + (startLine + 1) + "\nTo line: " + endLine + "\nContent:\n" + chunk
+                const chunkHash = this.app.lruResultCache.getHash(chunkContent)
+                this.entries.set(this.nextEntryId, { uri: uri, content: chunkContent, firstLine: startLine + 1, lastLine: endLine, hash: chunkHash});
+                if (this.entries.size >= this.app.extConfig.rag_max_chunks) break;
+                this.nextEntryId++;
+            }
+        } catch (error) {
+            console.error('Failed to add document to RAG:', error);
+        }
+    }
+
+    private getChunkLine(chunkLines: string[], j: number) {
+        return chunkLines[j].length > this.app.extConfig.rag_max_chars_per_chunk_line ? chunkLines[j].substring(0, this.app.extConfig.rag_max_chars_per_chunk_line) : chunkLines[j];
+    }
+
+    private removeChunkEntries(uri: string) {
+        const filteredIds = Array.from(this.entries)
+            .filter(([_, value]) => value.uri === uri)
+            .map(([key, _]) => key);
+        for (let id of filteredIds) {
+            this.entries.delete(id);
+        }
+    }
+
+    async removeDocument(uri: string) {
+        this.removeChunkEntries(uri);
+        this.filesProperties.delete(uri);
+    }
+
+    async indexWorkspaceFiles() {
+        try {
+            const files = await this.getFilesRespectingGitignore()
+
+            // Show progress
+            const progressOptions = {
+                location: vscode.ProgressLocation.Notification,
+                title: this.app.extConfig.getUiText("Indexing files..."),
+                cancellable: true
+            };
+            await vscode.window.withProgress(progressOptions, async (progress, token) => {
+                const total = files.length;
+                let processed = 0;
+
+                this.app.logger.addEventLog("RAG", "START_RAG_INDEXING", "")
+                for (const file of files) {
+                    if (token.isCancellationRequested) {
+                        break;
+                    }
+                    if (this.isImageOrVideoFile(file.toString())) continue;
+
+                    try {
+                        const document = await vscode.workspace.openTextDocument(file);
+                        await this.addDocument(file.toString(), document.getText());
+
+                        processed++;
+                        progress.report({
+                            message: `Indexing ${vscode.workspace.asRelativePath(file)}`,
+                            increment: (1 / total) * 100
+                        });
+                    } catch (error) {
+                        console.error(`Failed to index file ${file.toString()}:`, error);
+                    }
+                    if (this.entries.size >= this.app.extConfig.rag_max_chunks) break;
+                }
+                this.app.logger.addEventLog("RAG", "END_RAG_INDEXING", "Files: " + processed + " Chunks: " + this.entries.size)
+                vscode.window.showInformationMessage(this.app.extConfig.getUiText("Indexed") + " " + processed +"/" + files.length +" "
+                + this.app.extConfig.getUiText("files for RAG search"));
+            });
+
+        } catch (error) {
+            console.error('Failed to index workspace files:', error);
+            vscode.window.showErrorMessage('Failed to index workspace files');
+        }
+    }
+
+    getFilesRespectingGitignore = async (): Promise<vscode.Uri[]> => {
+        const workspaceFolders = vscode.workspace.workspaceFolders;
+        if (!workspaceFolders || workspaceFolders.length === 0) {
+            return [];
+        }
+
+        const rootUri = workspaceFolders[0].uri;
+        const result: vscode.Uri[] = [];
+        const igMap = new Map<string, ignore.Ignore>();
+
+        // First pass: Collect all .gitignore files and their rules
+        const gitignoreUris = await vscode.workspace.findFiles('**/.gitignore', '');
+        await Promise.all(gitignoreUris.map(async uri => {
+            try {
+                const content = await vscode.workspace.fs.readFile(uri);
+                const dir = path.dirname(uri.fsPath);
+                igMap.set(dir, ignore().add(content.toString()));
+            } catch (error) {
+                console.error(`Error reading .gitignore at ${uri.fsPath}:`, error);
+            }
+        }));
+
+        // Second pass: Traverse directory tree while respecting ignore rules
+        async function traverse(dirUri: vscode.Uri) {
+            const dirPath = dirUri.fsPath;
+
+            if (isIgnored(dirPath)) {
+                return;
+            }
+
+            let entries: [string, vscode.FileType][];
+            try {
+                entries = await vscode.workspace.fs.readDirectory(dirUri);
+            } catch {
+                return; // Skip directories we can't read
+            }
+
+            for (const [name, type] of entries) {
+                const entryUri = vscode.Uri.file(path.join(dirPath, name));
+
+                if (type === vscode.FileType.Directory) {
+                    if (entryUri.toString().toLowerCase().endsWith(".git")) continue
+                    await traverse(entryUri);
+                } else if (!isIgnored(entryUri.fsPath)) {
+                    result.push(entryUri);
+                }
+            }
+        }
+
+        function isIgnored(fsPath: string): boolean {
+            let currentDir = path.dirname(fsPath);
+            const target = path.basename(fsPath);
+
+            // Check ignore rules from closest to farthest
+            while (true) {
+                if (igMap.has(currentDir)) {
+                    const relative = path.relative(currentDir, fsPath);
+                    if (igMap.get(currentDir)!.ignores(relative)) {
+                        return true;
+                    }
+                }
+
+                const parentDir = path.dirname(currentDir);
+                if (parentDir === currentDir) break; // Reached root
+                currentDir = parentDir;
+            }
+
+            return false;
+        }
+
+        await traverse(rootUri);
+        return result;
+    }
+
+    private getFilesFromQuery = (text: string): string[] => {
+        // Only allows letters, numbers, underscores, dots, and hyphens in filenames
+        const regex = /@([a-zA-Z0-9_.-]+)(?=[,.?!\s]|$)/g;
+        return [...text.matchAll(regex)].map(match => match[1]);
+    }
+}
diff --git a/src/chat-with-ai.ts b/src/chat-with-ai.ts
index 450fe26..a25b279 100644
--- a/src/chat-with-ai.ts
+++ b/src/chat-with-ai.ts
@@ -13,28 +13,39 @@ export class ChatWithAi {
         this.app = application;
     }
 
-    showChatWithAi = (withContext: boolean, context: vscode.ExtensionContext) => {
+    showChatWithAi = async (withContext: boolean, context: vscode.ExtensionContext) => {
         const editor = vscode.window.activeTextEditor;
         let webviewIdentifier = 'htmlChatWithAiViewer'
         let panelTitle = this.app.extConfig.getUiText("Chat with AI")??""
         let aiPanel  = this.askAiPanel
         let extraCont = "";
+        let query: string|undefined = undefined
         if (withContext){
-             aiPanel = this.askAiWithContextPanel
-             if (!aiPanel) this.sentContextChunks =  []
-             webviewIdentifier = 'htmlChatWithAiWithContextViewer'
-             let chunksToSend = this.app.extraContext.chunks.filter((_, index) => !this.sentContextChunks.includes(this.app.extraContext.chunksHash[index]));
-             let chunksToSendHash = this.app.extraContext.chunksHash.filter((item) => !this.sentContextChunks.includes(item));
-             if (chunksToSend.length > 0) extraCont = Utils.getChunksInPlainText(chunksToSend);
-             this.sentContextChunks.push(...chunksToSendHash)
-             panelTitle = this.app.extConfig.getUiText("Chat with AI with project context")??""
+            query = await vscode.window.showInputBox({
+                placeHolder: this.app.extConfig.getUiText('Enter your question...'),
+                prompt: this.app.extConfig.getUiText('What would you like to ask AI?'),
+                ignoreFocusOut: true
+            });
+
+            if (!query) {
+                return
+            }
+
+            aiPanel = this.askAiWithContextPanel
+            if (!aiPanel) this.sentContextChunks =  []
+            webviewIdentifier = 'htmlChatWithAiWithContextViewer'
+            panelTitle = this.app.extConfig.getUiText("Chat with AI with project context")??""
         }
-        let selectedText = ""
+        let queryToSend = ""
         if (editor) {
-            selectedText = editor.document.getText(editor.selection);
-            if (selectedText.length > 0) selectedText = "Explain the following source code: " + selectedText
+            queryToSend = editor.document.getText(editor.selection);
+            if (queryToSend.length > 0) queryToSend = "Explain the following source code: " + queryToSend
+        }
+        if (query) {
+            queryToSend = query
         }
         if (!aiPanel) {
+            const createWebviewTimeInMs = Date.now()
             aiPanel = vscode.window.createWebviewPanel(
                 webviewIdentifier,
                 panelTitle,
@@ -44,6 +55,7 @@ export class ChatWithAi {
                     retainContextWhenHidden: true,
                 }
             );
+            this.lastActiveEditor = editor;
             if (withContext) this.askAiWithContextPanel = aiPanel;
             else this.askAiPanel = aiPanel;
 
@@ -62,15 +74,17 @@ export class ChatWithAi {
                 }
             });
             // Wait for the page to load before sending message
+            if (query) extraCont = await this.prepareRagContext(query);
             setTimeout(async () => {
-                if (aiPanel) aiPanel.webview.postMessage({ command: 'setText', text: selectedText, context: extraCont });
-            }, 1000);
+                if (aiPanel) aiPanel.webview.postMessage({ command: 'setText', text: queryToSend, context: extraCont });
+            }, Math.max(0, 3000 - (Date.now() - createWebviewTimeInMs)));
         } else {
             aiPanel.reveal();
             this.lastActiveEditor = editor;
+            if (query) extraCont = await this.prepareRagContext(query);
             // Wait for the page to load before sending message
             setTimeout(async () => {
-                if (aiPanel) aiPanel.webview.postMessage({ command: 'setText', text: selectedText, context: extraCont });
+                if (aiPanel) aiPanel.webview.postMessage({ command: 'setText', text: queryToSend, context: extraCont });
             }, 500);
         }
     }
@@ -150,6 +164,17 @@ export class ChatWithAi {
         `;
     }
 
+    private prepareRagContext = async (query: string) => {
+        let extraCont: string = ""
+        const contextChunks = await this.app.chatContext.getRagContextChunks(query);
+        let chunksToSend = contextChunks.filter((_, index) => !this.sentContextChunks.includes(contextChunks[index].hash));
+        let chunksToSendHash = chunksToSend.map(chunk => chunk.hash);
+        if (chunksToSend.length > 0) extraCont = this.app.chatContext.getContextChunksInPlainText(chunksToSend);
+        this.sentContextChunks.push(...chunksToSendHash);
 
+        const contextFiles = await this.app.chatContext.getRagFilesContext(query);
+        if (contextFiles && contextFiles.length > 0) extraCont += "\n" + contextFiles;
 
+        return extraCont
+    }
 }
diff --git a/src/completion.ts b/src/completion.ts
index 83a0244..073c2b6 100644
--- a/src/completion.ts
+++ b/src/completion.ts
@@ -262,7 +262,7 @@ export class Completion {
             let futureSuggestion = suggestionLines.slice(1).join('\n')
             let cached_completion = this.app.lruResultCache.get(futureHashKey)
             if (cached_completion != undefined) return;
-            else this.app.lruResultCache.put(futureHashKey, futureSuggestion);
+            else this.app.lruResultCache.put(futureHashKey, futureSuggestion)
         }
     }
 
diff --git a/src/configuration.ts b/src/configuration.ts
index 606978c..f80a819 100644
--- a/src/configuration.ts
+++ b/src/configuration.ts
@@ -9,12 +9,14 @@ export class Configuration {
     enabled = true;
     launch_completion = ""
     launch_chat = ""
+    launch_embeddings = ""
     launch_training_completion = ""
     launch_training_chat = ""
     lora_completion = ""
     lora_chat = ""
     endpoint = "http=//127.0.0.1:8012";
     endpoint_chat = "http=//127.0.0.1:8011";
+    endpoint_embeddings = "http=//127.0.0.1:8010";
     auto = true;
     api_key = "";
     self_signed_certificate = "";
@@ -38,6 +40,15 @@ export class Configuration {
     openai_client_model: string = "";
     openai_prompt_template: string = "<|fim_prefix|>{inputPrefix}{prompt}<|fim_suffix|>{inputSuffix}<|fim_middle|>";
 
+    rag_chunk_max_chars = 800
+    rag_max_lines_per_chunk = 40
+    rag_max_chars_per_chunk_line = 300
+    rag_max_chunks = 50000
+    rag_max_bm25_filter_chunks = 47
+    rag_max_embedding_filter_chunks = 5
+    rag_max_context_files = 3
+    rag_max_context_file_chars = 10000
+
     // additional configs
     // TODO: change to snake_case for consistency
     axiosRequestConfig = {};
@@ -51,6 +62,7 @@ export class Configuration {
     MAX_QUEUED_CHUNKS = 16;
     DELAY_BEFORE_COMPL_REQUEST = 150;
     MAX_EVENTS_IN_LOG = 250;
+    EDIT_TEXT_DIFF_WINDOW_CONTEXT_LINEX = 20;
 
     config: vscode.WorkspaceConfiguration;
 
@@ -86,8 +98,10 @@ export class Configuration {
         // TODO Handle the case of wrong types for the configuration values
         this.endpoint = this.trimTrailingSlash(String(config.get<string>("endpoint")));
         this.endpoint_chat = this.trimTrailingSlash(String(config.get<string>("endpoint_chat")));
+        this.endpoint_embeddings = this.trimTrailingSlash(String(config.get<string>("endpoint_embeddings")));
         this.launch_completion = String(config.get<string>("launch_completion"));
         this.launch_chat = String(config.get<string>("launch_chat"));
+        this.launch_embeddings = String(config.get<string>("launch_embeddings"));
         this.launch_training_completion = String(config.get<string>("launch_training_completion"));
         this.launch_training_chat = String(config.get<string>("launch_training_chat"));
         this.lora_completion = String(config.get<string>("lora_completion"));
@@ -101,6 +115,7 @@ export class Configuration {
         this.n_prefix = Number(config.get<number>("n_prefix"));
         this.n_suffix = Number(config.get<number>("n_suffix"));
         this.n_predict = Number(config.get<number>("n_predict"));
+        this.rag_chunk_max_chars = Number(config.get<number>("rag_chunk_max_chars"));
         this.t_max_prompt_ms = Number(config.get<number>("t_max_prompt_ms"));
         this.t_max_predict_ms = Number(config.get<number>("t_max_predict_ms"));
         this.show_info = Boolean(config.get<boolean>("show_info"));
@@ -110,6 +125,13 @@ export class Configuration {
         this.ring_chunk_size = Number(config.get<number>("ring_chunk_size"));
         this.ring_scope = Number(config.get<number>("ring_scope"));
         this.ring_update_ms = Number(config.get<number>("ring_update_ms"));
+        this.rag_max_lines_per_chunk = Number(config.get<number>("rag_max_lines_per_chunk"));
+        this.rag_max_chars_per_chunk_line = Number(config.get<number>("rag_max_chars_per_chunk_line"));
+        this.rag_max_chunks = Number(config.get<number>("rag_max_chunks"));
+        this.rag_max_bm25_filter_chunks = Number(config.get<number>("rag_max_bm25_filter_chunks"));
+        this.rag_max_embedding_filter_chunks = Number(config.get<number>("rag_max_embedding_filter_chunks"));
+        this.rag_max_context_files = Number(config.get<number>("rag_max_context_files"));
+        this.rag_max_context_file_chars = Number(config.get<number>("rag_max_context_file_chars"));
         this.language = String(config.get<string>("language"));
         this.disabledLanguages = config.get<string[]>("disabledLanguages") || [];
         this.enabled = Boolean(config.get<boolean>("enabled", true));
@@ -130,6 +152,13 @@ export class Configuration {
         }
     };
 
+    isRagConfigChanged = (event: vscode.ConfigurationChangeEvent) => {
+        return event.affectsConfiguration("llama-vscode.rag_chunk_max_chars")
+        || event.affectsConfiguration("llama-vscode.rag_max_lines_per_chunk")
+        || event.affectsConfiguration("llama-vscode.rag_max_files")
+        || event.affectsConfiguration("llama-vscode.rag_max_chars_per_chunk_line");
+    }
+
     trimTrailingSlash = (s: string): string => {
         if (s.length > 0 && s[s.length - 1] === "/") {
             return s.slice(0, -1);
diff --git a/src/extension.ts b/src/extension.ts
index 5ee2449..fdae8d7 100644
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -22,6 +22,9 @@ export function activate(context: vscode.ExtensionContext) {
     app.architect.registerCommandEditSelectedText(context);
     app.architect.registerCommandAcceptTextEdit(context);
     app.architect.registerCommandRejectTextEdit(context);
+    app.architect.setOnSaveDeleteFileForDb(context);
+    app.architect.setOnChangeWorkspaceFolders(context)
+    app.architect.init()
 }
 
 export function deactivate() {
diff --git a/src/llama-server.ts b/src/llama-server.ts
index 629d9bf..60af40d 100644
--- a/src/llama-server.ts
+++ b/src/llama-server.ts
@@ -24,11 +24,27 @@ export interface LlamaChatResponse {
     choices: [{message:{content?: string}}];
 }
 
+export interface LlamaEmbeddingsResponse {
+    "model": string,
+    "object": string,
+    "usage": {
+        "prompt_tokens": number,
+        "total_tokens": number
+    },
+    "data": [
+        {
+        "embedding": number[],
+        "index": number,
+        "object": string
+        }
+    ]
+}
 
 export class LlamaServer {
     private app: Application
     private vsCodeFimTerminal: Terminal | undefined;
     private vsCodeChatTerminal: Terminal | undefined;
+    private vsCodeEmbeddingsTerminal: Terminal | undefined;
     private vsCodeTrainTerminal: Terminal | undefined;
     private readonly defaultRequestParams = {
         top_k: 40,
@@ -43,10 +59,6 @@ export class LlamaServer {
         this.vsCodeFimTerminal = undefined;
     }
 
-    private replacePlaceholders(template: string, replacements: { [key: string]: string }): string {
-        return template.replace(/{(\w+)}/g, (_, key) => replacements[key] || "");
-    }
-
     private async handleOpenAICompletion(
         chunks: any[],
         inputPrefix: string,
@@ -67,7 +79,7 @@ export class LlamaServer {
 
         const rsp = await client.completions.create({
             model: this.app.extConfig.openai_client_model || "",
-            prompt: additional_context + this.replacePlaceholders(this.app.extConfig.openai_prompt_template, replacements),
+            prompt: additional_context + this. app.prompts.replacePlaceholders(this.app.extConfig.openai_prompt_template, replacements),
             max_tokens: this.app.extConfig.n_predict,
             temperature: 0.1,
             top_p: this.defaultRequestParams.top_p,
@@ -121,25 +133,25 @@ export class LlamaServer {
         };
     }
 
-    private createChatRequestPayload(noPredict: boolean, instructions: string, originalText: string, chunks: any[], context: string, nindent?: number) {
+    private createChatEditRequestPayload(noPredict: boolean, instructions: string, originalText: string, chunks: any[], context: string, nindent?: number) {
         const CHUNKS_PLACEHOLDER = "[chunks]";
         const INSTRUCTIONS_PLACEHOLDER = "[instructions]";
         const ORIGINAL_TEXT_PLACEHOLDER = "[originalText]";
         const CONTEXT_PLACEHOLDER = "[context]";
-        let editTextTemplate = `${CHUNKS_PLACEHOLDER}\n\nModify the following original code according to the instructions. Output only the modified code. No explanations.\n\ninstructions:\n${INSTRUCTIONS_PLACEHOLDER}\n\noriginal code:\n${ORIGINAL_TEXT_PLACEHOLDER}\n\nmodified code:`
+        // let editTextTemplate = `${CHUNKS_PLACEHOLDER}\n\nModify the following original code according to the instructions. Output only the modified code. No explanations.\n\ninstructions:\n${INSTRUCTIONS_PLACEHOLDER}\n\noriginal code:\n${ORIGINAL_TEXT_PLACEHOLDER}\n\nmodified code:`
         if (noPredict) {
             return {
                 // input_extra: chunks,
                 "messages": [
-              {
-                "role": "system",
-                "content": "You are an expert coder."
-              },
-              {
-                "role": "user",
-                "content": Utils.getChunksInPlainText(chunks)
-              }
-            ],
+                    {
+                        "role": "system",
+                        "content": "You are an expert coder."
+                    },
+                    {
+                        "role": "user",
+                        "content": context
+                    }
+                ],
                 n_predict: 0,
                 samplers: [],
                 cache_prompt: true,
@@ -148,7 +160,11 @@ export class LlamaServer {
                 ...(this.app.extConfig.lora_completion.trim() != "" && { lora: [{ id: 0, scale: 0.5 }] })
             };
         }
-        
+        const replacements = {
+            chunks: Utils.getChunksInPlainText(chunks),
+            instructions: instructions,
+            originalText: originalText,
+        }
         return {
             "messages": [
               {
@@ -157,9 +173,7 @@ export class LlamaServer {
               },
               {
                 "role": "user",
-                "content": editTextTemplate.replace(CHUNKS_PLACEHOLDER, Utils.getChunksInPlainText(chunks))
-                            .replace(INSTRUCTIONS_PLACEHOLDER, instructions).replace(ORIGINAL_TEXT_PLACEHOLDER, originalText)
-                            .replace(CONTEXT_PLACEHOLDER, context)
+                "content": this.app.prompts.replacePlaceholders(this.app.prompts.CHAT_EDIT_TEXT, replacements)
               }
             ],
             "stream": false,
@@ -188,6 +202,43 @@ export class LlamaServer {
           };
     }
 
+    private createChatRequestPayload(content: string) {
+        return {
+            "messages": [
+              {
+                "role": "system",
+                "content": "You are an expert coder."
+              },
+              {
+                "role": "user",
+                "content": content
+              }
+            ],
+            "stream": false,
+            "cache_prompt": true,
+            "samplers": "edkypmxt",
+            "temperature": 0.8,
+            "dynatemp_range": 0,
+            "dynatemp_exponent": 1,
+            "top_k": 40,
+            "top_p": 0.95,
+            "min_p": 0.05,
+            "typical_p": 1,
+            "xtc_probability": 0,
+            "xtc_threshold": 0.1,
+            "repeat_last_n": 64,
+            "repeat_penalty": 1,
+            "presence_penalty": 0,
+            "frequency_penalty": 0,
+            "dry_multiplier": 0,
+            "dry_base": 1.75,
+            "dry_allowed_length": 2,
+            "dry_penalty_last_n": -1,
+            "max_tokens": -1,
+            "timings_per_token": false,
+            ...(this.app.extConfig.lora_chat.trim() != "" && { lora: [{ id: 0, scale: 0.5 }] })
+          };
+    }
 
     getFIMCompletion = async (
         inputPrefix: string,
@@ -212,7 +263,7 @@ export class LlamaServer {
         return response.status === STATUS_OK ? response.data : undefined;
     };
 
-    getChatCompletion = async (
+    getChatEditCompletion = async (
         instructions: string,
         originalText: string,
         context: string,
@@ -221,7 +272,19 @@ export class LlamaServer {
     ): Promise<LlamaChatResponse | undefined> => {
         const response = await axios.post<LlamaChatResponse>(
             `${this.app.extConfig.endpoint_chat}/v1/chat/completions`,
-            this.createChatRequestPayload(false, instructions, originalText, chunks, context, nindent),
+            this.createChatEditRequestPayload(false, instructions, originalText, chunks, context, nindent),
+            this.app.extConfig.axiosRequestConfig
+        );
+
+        return response.status === STATUS_OK ? response.data : undefined;
+    };
+
+    getChatCompletion = async (
+        prompt: string,
+    ): Promise<LlamaChatResponse | undefined> => {
+        const response = await axios.post<LlamaChatResponse>(
+            `${this.app.extConfig.endpoint_chat}/v1/chat/completions`,
+            this.createChatRequestPayload(prompt),
             this.app.extConfig.axiosRequestConfig
         );
 
@@ -240,13 +303,27 @@ export class LlamaServer {
             this.createRequestPayload(true, "", "", chunks, "", undefined),
             this.app.extConfig.axiosRequestConfig
         );
+    };
+
+    getEmbeddings = async (text: string): Promise<LlamaEmbeddingsResponse | undefined> => {
+        try {
+            const response = await axios.post<LlamaEmbeddingsResponse>(
+                `${this.app.extConfig.endpoint_embeddings}/v1/embeddings`,
+                {
+                    "input": text,
+                    "model": "GPT-4",
+                    "encoding_format": "float"
+                },
+                this.app.extConfig.axiosRequestConfig
+            );
+            return response.data;
+        } catch (error: any) {
+            console.error('Failed to get embeddings:', error);
+            vscode.window.showInformationMessage(this.app.extConfig.getUiText("Error getting embeddings") + " " + error.message);
+            return undefined;
+        }
+
 
-        // make a request to the API to prepare for the next chat request
-        axios.post<LlamaResponse>(
-            `${this.app.extConfig.endpoint_chat}/v1/chat/completions`,
-            this.createChatRequestPayload(true, "", "", chunks, "", undefined),
-            this.app.extConfig.axiosRequestConfig
-        );
     };
 
     shellFimCmd = (launchCmd: string): void => {
@@ -285,6 +362,24 @@ export class LlamaServer {
         }
     }
 
+    shellEmbeddingsCmd = (launchCmd: string): void => {
+        if (!launchCmd) {
+            vscode.window.showInformationMessage(this.app.extConfig.getUiText("There is no command to execute.")??"");
+            return;
+        }
+        try {
+            this.vsCodeEmbeddingsTerminal = vscode.window.createTerminal({
+                name: 'llama.cpp Embeddings Terminal'
+            });
+            this.vsCodeEmbeddingsTerminal.show(true);
+            this.vsCodeEmbeddingsTerminal.sendText(launchCmd);
+        } catch(err){
+            if (err instanceof Error) {
+                vscode.window.showInformationMessage(this.app.extConfig.getUiText("Error executing command") + " " + launchCmd +" : " + err.message);
+            }
+        }
+    }
+
     shellTrainCmd = (trainCmd: string): void => {
         if (!trainCmd) {
             vscode.window.showInformationMessage(this.app.extConfig.getUiText("There is no command to execute.")??"");
@@ -311,6 +406,10 @@ export class LlamaServer {
         if (this.vsCodeChatTerminal) this.vsCodeChatTerminal.dispose();
     }
 
+    killEmbeddingsCmd = (): void => {
+        if (this.vsCodeEmbeddingsTerminal) this.vsCodeEmbeddingsTerminal.dispose();
+    }
+
     killTrainCmd = (): void => {
         if (this.vsCodeTrainTerminal) this.vsCodeTrainTerminal.dispose();
     }
diff --git a/src/lru-cache.ts b/src/lru-cache.ts
index 8f60a6a..4508e12 100644
--- a/src/lru-cache.ts
+++ b/src/lru-cache.ts
@@ -70,5 +70,4 @@ export class LRUCache {
     getMap = () => {
         return this.map;
     }
-
 }
diff --git a/src/menu.ts b/src/menu.ts
index 2d98a72..e1b1461 100644
--- a/src/menu.ts
+++ b/src/menu.ts
@@ -35,7 +35,6 @@ export class Menu {
                     description: this.app.extConfig.getUiText(`Opens a chat with AI window with project context inside VS Code using server from property endpoint_chat`)
                 })
 
-
         if (process.platform === 'darwin') { // if mac os
             menuItems.push(
                 {
@@ -53,7 +52,7 @@ export class Menu {
                 {
                     label: this.app.extConfig.getUiText('Start completion model') + ' Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)',
                     description: this.app.extConfig.getUiText(`Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`)
-                }, 
+                },
                 {
                     label: this.app.extConfig.getUiText('Start chat model') + ' Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)',
                     description: this.app.extConfig.getUiText(`Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`)
@@ -69,6 +68,10 @@ export class Menu {
                 {
                     label: this.app.extConfig.getUiText('Start chat model') + ' Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)',
                     description: this.app.extConfig.getUiText(`Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`)
+                },
+                {
+                    label: this.app.extConfig.getUiText('Start embeddings model') + ' Nomic-Embed-Text-V2-GGUF',
+                    description: this.app.extConfig.getUiText(`Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`)
                 })
         }
 
@@ -80,15 +83,19 @@ export class Menu {
             {
                 label: this.app.extConfig.getUiText("Start chat llama.cpp server")??"",
                 description: this.app.extConfig.getUiText(`Runs the command from property launch_chat`)
+            },
+            {
+                label: this.app.extConfig.getUiText("Start embeddings llama.cpp server")??"",
+                description: this.app.extConfig.getUiText(`Runs the command from property launch_embeddings`)
             })
-        if (this.app.extConfig.launch_training_completion.trim() != "") { 
+        if (this.app.extConfig.launch_training_completion.trim() != "") {
             menuItems.push(
             {
                 label: this.app.extConfig.getUiText("Start training completion model")??"",
                 description: this.app.extConfig.getUiText(`Runs the command from property launch_training_completion`)
             })
         }
-        if (this.app.extConfig.launch_training_chat.trim() != "") { 
+        if (this.app.extConfig.launch_training_chat.trim() != "") {
                 menuItems.push(
             {
                 label: this.app.extConfig.getUiText("Start training chat model")??"",
@@ -103,13 +110,17 @@ export class Menu {
             {
                 label: this.app.extConfig.getUiText("Stop chat llama.cpp server")??"",
                 description: this.app.extConfig.getUiText(`Stops chat llama.cpp server if it was started from llama.vscode menu`)
+            },
+            {
+                label: this.app.extConfig.getUiText("Stop embeddings llama.cpp server")??"",
+                description: this.app.extConfig.getUiText(`Stops embeddings llama.cpp server if it was started from llama.vscode menu`)
             })
-        if (this.app.extConfig.launch_training_completion.trim() != "" || this.app.extConfig.launch_training_chat.trim() != "") { 
+        if (this.app.extConfig.launch_training_completion.trim() != "" || this.app.extConfig.launch_training_chat.trim() != "") {
             menuItems.push(
             {
                 label: this.app.extConfig.getUiText("Stop training")??"",
                 description: this.app.extConfig.getUiText(`Stops training if it was started from llama.vscode menu`)
-            })     
+            })
         }
 
         return menuItems.filter(Boolean) as vscode.QuickPickItem[];
@@ -122,13 +133,16 @@ export class Menu {
         let endpointParts = this.app.extConfig.endpoint.split(":");
         let port = endpointParts[endpointParts.length -1]
         let endpointChatParts = this.app.extConfig.endpoint_chat.split(":");
+        let endpointEmbeddingParts = this.app.extConfig.endpoint_embeddings.split(":");
         let portChat = endpointChatParts[endpointChatParts.length -1]
+        let portEmbedding = endpointEmbeddingParts[endpointEmbeddingParts.length -1]
         if (!Number.isInteger(Number(port))) port =  DEFAULT_PORT_FIM_MODEL
-        let llmMacVramTemplate = " brew install llama.cpp && llama-server --" + PRESET_PLACEHOLDER + " --port " + port 
+        let llmMacVramTemplate = " brew install llama.cpp && llama-server --" + PRESET_PLACEHOLDER + " --port " + port
         let llmMacCpuTemplate = " brew install llama.cpp && llama-server -hf " + MODEL_PLACEHOLDER + " --port " + port + " -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256"
-        let llmMacChatVramTemplate = " brew install llama.cpp && llama-server -hf " + MODEL_PLACEHOLDER + " --port " + portChat + " -ngl 99 -fa -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 " 
+        let llmMacChatVramTemplate = " brew install llama.cpp && llama-server -hf " + MODEL_PLACEHOLDER + " --port " + portChat + " -ngl 99 -fa -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 "
         let llmMacChatCpuTemplate = " brew install llama.cpp && llama-server -hf " + MODEL_PLACEHOLDER + " --port " + portChat + " -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256"
-        
+        let llmMacEmbeddingCpuTemplate = " brew install llama.cpp && llama-server -hf " + MODEL_PLACEHOLDER + " --port " + portEmbedding + " -ub 2048 -b 2048 --ctx-size 2048 --embeddings"
+
         switch (selected.label) {
             case "$(gear) " +  this.app.extConfig.getUiText("Edit Settings..."):
                 await vscode.commands.executeCommand('workbench.action.openSettings', 'llama-vscode');
@@ -144,7 +158,7 @@ export class Menu {
             case this.app.extConfig.getUiText('Start completion model') + ' Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)':
                 await this.app.llamaServer.killFimCmd();
                 await this.app.llamaServer.shellFimCmd(llmMacVramTemplate.replace(PRESET_PLACEHOLDER, "fim-qwen-7b-default"));
-                break;  
+                break;
             case this.app.extConfig.getUiText('Start completion model') + ' Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)':
                 await this.app.llamaServer.killFimCmd();
                 await this.app.llamaServer.shellFimCmd(llmMacCpuTemplate.replace(MODEL_PLACEHOLDER, "ggml-org/Qwen2.5-Coder-0.5B-Instruct-Q8_0-GGUF"));
@@ -160,11 +174,15 @@ export class Menu {
             case this.app.extConfig.getUiText('Start chat model') + ' Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)':
                 await this.app.llamaServer.killChatCmd();
                 await this.app.llamaServer.shellChatCmd(llmMacChatVramTemplate.replace(MODEL_PLACEHOLDER, "ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF"));
-                break;  
+                break;
             case this.app.extConfig.getUiText('Start chat model') + ' Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)':
                 await this.app.llamaServer.killChatCmd();
                 await this.app.llamaServer.shellChatCmd(llmMacChatCpuTemplate.replace(MODEL_PLACEHOLDER, "ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF"));
                 break;
+            case this.app.extConfig.getUiText('Start embeddings model') + ' Nomic-Embed-Text-V2-GGUF':
+                await this.app.llamaServer.killEmbeddingsCmd();
+                await this.app.llamaServer.shellEmbeddingsCmd(llmMacEmbeddingCpuTemplate.replace(MODEL_PLACEHOLDER, "ggml-org/Nomic-Embed-Text-V2-GGUF"));
+                break;
             case this.app.extConfig.getUiText('Start completion llama.cpp server'):
                 await this.app.llamaServer.killFimCmd();
                 let commandCompletion = this.app.extConfig.launch_completion
@@ -176,7 +194,12 @@ export class Menu {
                 let commandChat = this.app.extConfig.launch_chat
                 if (this.app.extConfig.lora_chat.trim() != "") commandChat += " --lora " + this.app.extConfig.lora_chat
                 await this.app.llamaServer.shellChatCmd(commandChat);
-                break; 
+                break;
+            case this.app.extConfig.getUiText('Start embeddings llama.cpp server'):
+                await this.app.llamaServer.killEmbeddingsCmd();
+                let commandEmbeddings = this.app.extConfig.launch_embeddings
+                await this.app.llamaServer.shellEmbeddingsCmd(commandEmbeddings);
+                break;
             case this.app.extConfig.getUiText('Start training completion model'):
                 await this.app.llamaServer.killTrainCmd();
                 await this.app.llamaServer.shellTrainCmd(this.app.extConfig.launch_training_completion);
@@ -184,10 +207,13 @@ export class Menu {
             case this.app.extConfig.getUiText('Start training chat model'):
                 await this.app.llamaServer.killTrainCmd();
                 await this.app.llamaServer.shellTrainCmd(this.app.extConfig.launch_training_chat);
-                break;       
+                break;
             case this.app.extConfig.getUiText("Stop completion llama.cpp server"):
                 await this.app.llamaServer.killFimCmd();
                 break;
+            case this.app.extConfig.getUiText("Stop embeddings llama.cpp server"):
+                await this.app.llamaServer.killEmbeddingsCmd();
+                break;
             case this.app.extConfig.getUiText("Stop chat llama.cpp server"):
                 await this.app.llamaServer.killChatCmd();
                 break;
@@ -209,6 +235,7 @@ export class Menu {
         }
         this.app.statusbar.updateStatusBarText();
     }
+
     private async handleCompletionToggle(label: string, currentLanguage: string | undefined, languageSettings: Record<string, boolean>) {
         const config = this.app.extConfig.config;
         if (label.includes(this.app.extConfig.getUiText('All Completions')??"")) {
@@ -220,7 +247,6 @@ export class Menu {
         }
     }
 
-
     showMenu = async (context: vscode.ExtensionContext) => {
         const currentLanguage = vscode.window.activeTextEditor?.document.languageId;
         const isLanguageEnabled = currentLanguage ? this.app.extConfig.isCompletionEnabled(undefined, currentLanguage) : true;
diff --git a/src/prompts.ts b/src/prompts.ts
new file mode 100644
index 0000000..399c891
--- /dev/null
+++ b/src/prompts.ts
@@ -0,0 +1,22 @@
+import {Application} from "./application";
+
+export class Prompts {
+    private app: Application
+
+    CHAT_GET_KEY_WORDS = "Analyze the text below and extract the most important keywords. Don't include @ in the keywords. Ensure no word is repeated in the output. Format the response strictly as:\nkeyword1|keyword2|...\nText: {prompt}"
+    CHAT_GET_SYNONYMS = "Get up to two different synonyms for each of the following words and make one list from all of them in format word1|word2|word3.\nWords: {keywords} "
+
+    CHAT_EDIT_TEXT = `{chunks}\n\nModify the following original code according to the instructions. Output only the modified code. No explanations.\n\ninstructions:\n{instructions}\n\noriginal code:\n{originalText}\n\nmodified code:`
+
+    constructor(application: Application) {
+        this.app = application;
+    }
+
+    public replacePlaceholders(template: string, replacements: { [key: string]: string }): string {
+        return template.replace(/{(\w+)}/g, (_, key) => replacements[key] || "");
+    }
+
+    public replaceOnePlaceholders(template: string, key: string, replacement: string): string {
+        return template.replace("{"+key+"}", replacement);
+    }
+}
diff --git a/src/statusbar.ts b/src/statusbar.ts
index aa6551c..1584309 100644
--- a/src/statusbar.ts
+++ b/src/statusbar.ts
@@ -9,6 +9,11 @@ export class Statusbar {
         this.app = application;
     }
 
+    showTextInfo = (text: string | undefined) =>{
+        if (text == undefined ) this.llamaVscodeStatusBarItem.text = "llama-vscode";
+        else this.llamaVscodeStatusBarItem.text = "llama-vscode | " + text;
+    }
+
     showInfo = (data: LlamaResponse | undefined) => {
         if (data == undefined || data.content == undefined || data.content.trim() == "" ) {
             if (this.app.extConfig.show_info) {
diff --git a/src/text-editor.ts b/src/text-editor.ts
index c85c3c2..85c101b 100644
--- a/src/text-editor.ts
+++ b/src/text-editor.ts
@@ -54,8 +54,8 @@ export class TextEditor {
 
         try {
             // Get completion from llama server
-            const data = await this.app.llamaServer.getChatCompletion(
-                prompt, 
+            const data = await this.app.llamaServer.getChatEditCompletion(
+                prompt,
                 this.selectedText,
                 context,
                 this.app.extraContext.chunks,
@@ -67,7 +67,7 @@ export class TextEditor {
                 return;
             }
             this.currentSuggestion = this.removeFirstAndLastLinesIfBackticks(data.choices[0].message.content.trim());
-            
+
             // Show the suggestion in a diff view
             await this.showDiffView(editor, this.currentSuggestion);
             this.setSuggestionVisible(true);
@@ -84,40 +84,40 @@ export class TextEditor {
 
     private removeFirstAndLastLinesIfBackticks(input: string): string {
         const lines = input.split('\n'); // Split the string into lines
-    
+
         // Remove the first line if it starts with ```
         if (lines[0]?.trim().startsWith('```')) {
             lines.shift(); // Remove the first line
         }
-    
+
         // Remove the last line if it starts with ```
         if (lines[lines.length - 1]?.trim().startsWith('```')) {
             lines.pop(); // Remove the last line
         }
-    
+
         return lines.join('\n'); // Join the remaining lines back into a string
     }
 
     private async showDiffView(editor: vscode.TextEditor, suggestion: string) {
         // Get context before and after the selection
-        const contextLines = 25;
+        const contextLines = this.app.extConfig.EDIT_TEXT_DIFF_WINDOW_CONTEXT_LINEX;
         const startLine = Math.max(0, this.selection!.start.line - contextLines);
         const endLine = Math.min(editor.document.lineCount - 1, this.selection!.end.line + contextLines);
-        
+
         // Get the text before the selection
         const beforeRange = new vscode.Range(startLine, 0, this.selection!.start.line, 0);
         const beforeText = editor.document.getText(beforeRange);
-        
+
         // Get the text after the selection
         const afterRange = new vscode.Range(this.selection!.end.line, editor.document.lineAt(this.selection!.end.line).text.length, endLine, editor.document.lineAt(endLine).text.length);
         const afterText = editor.document.getText(afterRange);
-        
+
         // Combine the context with the suggestion
         const fullSuggestion = beforeText + suggestion + afterText;
-        
+
         // Create a temporary document for the suggestion using a custom scheme
         const uri = vscode.Uri.parse('llama-suggestion:suggestion.txt');
-        
+
         // Register a content provider for our custom scheme
         const provider = new class implements vscode.TextDocumentContentProvider {
             onDidChange?: vscode.Event<vscode.Uri>;
@@ -125,14 +125,14 @@ export class TextEditor {
                 return fullSuggestion;
             }
         };
-        
+
         // Register the provider
         const registration = vscode.workspace.registerTextDocumentContentProvider('llama-suggestion', provider);
-        
+
         // Create a diff editor with read-only content
         const diffTitle = 'Text Edit Suggestion';
         await vscode.commands.executeCommand('vscode.diff', editor.document.uri, uri, diffTitle);
-        
+
         // Store the registration to dispose later
         this.registration = registration;
     }
@@ -144,7 +144,7 @@ export class TextEditor {
 
         await this.applyChange(this.currentEditor, this.currentSuggestion);
         this.setSuggestionVisible(false);
-        
+
         // Clean up after applying the change
         await this.cleanup();
     }
@@ -155,7 +155,7 @@ export class TextEditor {
         }
 
         this.setSuggestionVisible(false);
-        
+
         // Clean up without applying the change
         await this.cleanup();
     }
@@ -169,16 +169,16 @@ export class TextEditor {
     private async cleanup() {
         // Close the diff editor
         await vscode.commands.executeCommand('workbench.action.closeActiveEditor');
-        
+
         // Dispose of the content provider registration
         if (this.registration) {
             this.registration.dispose();
             this.registration = undefined;
         }
-        
+
         this.currentSuggestion = undefined;
         this.currentEditor = undefined;
         this.selection = undefined;
         this.setSuggestionVisible(false);
     }
-} 
\ No newline at end of file
+}
diff --git a/src/translations.ts b/src/translations.ts
index ab0ac20..aa01257 100644
--- a/src/translations.ts
+++ b/src/translations.ts
@@ -1,20 +1,20 @@
 /*
 
-Here a the prompt to get the transation from LLM (update the last row with the strings, whcih should be translated; first check if the string already exists):
+Here is a prompt to get the transation from LLM (update the last row with the strings, whcih should be translated; first check if the string already exists):
 
 # Background information
 
-I am translating the UI of an application from English to other languages. 
+I am translating the UI of an application from English to other languages.
 
 # Instruction
 
-I will give you several expressions in English, separated by "|".  I'd like you to translate each of them into the following languages: Bulgarian, German, Russian, Spanish, Chinese, French. 
+I will give you several expressions in English, separated by "|".  I'd like you to translate each of them into the following languages: Bulgarian, German, Russian, Spanish, Chinese, French.
 
 # Example output
 
 Please show each translation in a separate row. Start each row with "[" and finish it with "],". Start with the English expression. Here is a concrete example to illustrate how I want you to format the output if you get as input "no suggestion|thinking..."
 
-["no suggestion", "нямам предложение", "kein Vorschlag", "нет предложения", "ninguna propuesta", "无建议", "pas de suggestion"], 
+["no suggestion", "нямам предложение", "kein Vorschlag", "нет предложения", "ninguna propuesta", "无建议", "pas de suggestion"],
 ["thinking...", "мисля...", "Ich denke...", "думаю...", "pensando...", "思考...", "pense..."],
 
 Edit Settings...|View Documentation...|Chat with AI|Chat with AI with project context
@@ -29,36 +29,50 @@ export const translations: string[][] = [
     ["View Documentation...", "Преглед на документацията...", "Dokumentation anzeigen...", "Просмотреть документацию...", "Ver la documentación...", "查看文档...", "Voir la documentation..."],
     ["Chat with AI", "Чат с ИИ", "Mit KI chatten", "Чат с ИИ", "Chatear con IA", "与 AI 聊天", "Discuter avec l'IA"],
     ["Chat with AI with project context", "Чат с ИИ с контекст на проекта", "Mit KI chatten mit Projektkontext", "Чат с ИИ с контекстом проекта", "Chatear con IA con contexto del proyecto", "在项目上下文中与 AI 聊天", "Discuter avec l'IA avec le contexte du projet"],
-    ["Opens a chat with AI window inside VS Code using server from property endpoint_chat", "Отваря чат с AI прозорец в рамките на VS Code, използвайки сървър от свойството endpoint_chat", "Öffnet ein Chat-Fenster mit KI in VS Code unter Verwendung des Servers aus der Eigenschaft endpoint_chat", "Открывает окно чата с ИИ внутри VS Code, используя сервер из свойства endpoint_chat", "Abre una ventana de chat con IA dentro de VS Code utilizando el servidor de la propiedad endpoint_chat", "在 VS Code 内使用属性 endpoint_chat 的服务器打开 AI 聊天窗口", "Ouvre une fenêtre de chat avec l'IA dans VS Code en utilisant le serveur défini dans la propriété endpoint_chat"], 
+    ["Opens a chat with AI window inside VS Code using server from property endpoint_chat", "Отваря чат с AI прозорец в рамките на VS Code, използвайки сървър от свойството endpoint_chat", "Öffnet ein Chat-Fenster mit KI in VS Code unter Verwendung des Servers aus der Eigenschaft endpoint_chat", "Открывает окно чата с ИИ внутри VS Code, используя сервер из свойства endpoint_chat", "Abre una ventana de chat con IA dentro de VS Code utilizando el servidor de la propiedad endpoint_chat", "在 VS Code 内使用属性 endpoint_chat 的服务器打开 AI 聊天窗口", "Ouvre une fenêtre de chat avec l'IA dans VS Code en utilisant le serveur défini dans la propriété endpoint_chat"],
     ["Opens a chat with AI window with project context inside VS Code using server from property endpoint_chat", "Отваря чат с AI прозорец с контекст на проекта в рамките на VS Code, използвайки сървър от свойството endpoint_chat", "Öffnet ein Chat-Fenster mit KI und Projektkontext in VS Code unter Verwendung des Servers aus der Eigenschaft endpoint_chat", "Открывает окно чата с ИИ с контекстом проекта внутри VS Code, используя сервер из свойства endpoint_chat", "Abre una ventana de chat con IA con contexto del proyecto dentro de VS Code utilizando el servidor de la propiedad endpoint_chat", "在 VS Code 内使用属性 endpoint_chat 的服务器打开带有项目上下文的 AI 聊天窗口", "Ouvre une fenêtre de chat avec l'IA incluant le contexte du projet dans VS Code en utilisant le serveur défini dans la propriété endpoint_chat"],
-    ["Disable", "Деактивиране", "Deaktivieren", "Отключить", "Desactivar", "禁用", "Désactiver"], 
-    ["Enable", "Активиране", "Aktivieren", "Включить", "Activar", "启用", "Activer"], 
-    ["Completions for", "Завършване за", "Vervollständigungen für", "Завершения для", "Completaciones para", "完成建议适用于", "Complétions pour"], 
-    ["Currently", "В момента", "Derzeit", "В настоящее время", "Actualmente", "目前", "Actuellement"], 
-    ["enabled", "активирано", "aktiviert", "включено", "habilitado", "已启用", "activé"], 
-    ["disabled", "деактивирано", "deaktiviert", "отключено", "deshabilitado", "已禁用", "désactivé"], 
-    ["All Completions", "Всички завършвания", "Alle Vervollständigungen", "Все автозаполнения", "Todas las completaciones", "所有补全", "Toutes les complétions"], 
-    ["Turn off completions globally", "Изключете завършванията глобално", "Vervollständigungen global deaktivieren", "Отключить автозаполнение глобально", "Desactivar las completaciones globalmente", "全局关闭补全", "Désactiver les complétions globalement"], 
-    ["Turn on completions globally", "Включете завършванията глобално", "Vervollständigungen global aktivieren", "Включить автозаполнение глобально", "Activar las completaciones globalmente", "全局开启补全", "Activer les complétions globalement"], 
-    ["Start completion llama.cpp server", "Стартиране на сървър за завършване llama.cpp", "Startet den Abschluss-Server von llama.cpp", "Запуск сервера завершения llama.cpp", "Iniciar servidor de finalización llama.cpp", "启动 llama.cpp 补全服务器", "Démarrer le serveur de complétion llama.cpp"], 
-    ["Runs the command from property launch_completion", "Изпълнява командата от свойството launch_completion", "Führt den Befehl aus der Eigenschaft launch_completion aus", "Выполняет команду из свойства launch_completion", "Ejecuta el comando de la propiedad launch_completion", "执行来自 launch_completion 属性的命令", "Exécute la commande de la propriété launch_completion"], 
+    ["Disable", "Деактивиране", "Deaktivieren", "Отключить", "Desactivar", "禁用", "Désactiver"],
+    ["Enable", "Активиране", "Aktivieren", "Включить", "Activar", "启用", "Activer"],
+    ["Completions for", "Завършване за", "Vervollständigungen für", "Завершения для", "Completaciones para", "完成建议适用于", "Complétions pour"],
+    ["Currently", "В момента", "Derzeit", "В настоящее время", "Actualmente", "目前", "Actuellement"],
+    ["enabled", "активирано", "aktiviert", "включено", "habilitado", "已启用", "activé"],
+    ["disabled", "деактивирано", "deaktiviert", "отключено", "deshabilitado", "已禁用", "désactivé"],
+    ["All Completions", "Всички завършвания", "Alle Vervollständigungen", "Все автозаполнения", "Todas las completaciones", "所有补全", "Toutes les complétions"],
+    ["Turn off completions globally", "Изключете завършванията глобално", "Vervollständigungen global deaktivieren", "Отключить автозаполнение глобально", "Desactivar las completaciones globalmente", "全局关闭补全", "Désactiver les complétions globalement"],
+    ["Turn on completions globally", "Включете завършванията глобално", "Vervollständigungen global aktivieren", "Включить автозаполнение глобально", "Activar las completaciones globalmente", "全局开启补全", "Activer les complétions globalement"],
+    ["Start completion llama.cpp server", "Стартиране на сървър за завършване llama.cpp", "Startet den Abschluss-Server von llama.cpp", "Запуск сервера завершения llama.cpp", "Iniciar servidor de finalización llama.cpp", "启动 llama.cpp 补全服务器", "Démarrer le serveur de complétion llama.cpp"],
+    ["Runs the command from property launch_completion", "Изпълнява командата от свойството launch_completion", "Führt den Befehl aus der Eigenschaft launch_completion aus", "Выполняет команду из свойства launch_completion", "Ejecuta el comando de la propiedad launch_completion", "执行来自 launch_completion 属性的命令", "Exécute la commande de la propriété launch_completion"],
     ["Start chat llama.cpp server", "Стартиране на сървър за чат llama.cpp", "Startet den Chat-Server von llama.cpp", "Запуск сервера чата llama.cpp", "Iniciar servidor de chat llama.cpp", "启动 llama.cpp 聊天服务器", "Démarrer le serveur de chat llama.cpp"],
-    ["Runs the command from property launch_chat", "Изпълнява командата от свойството launch_chat", "Führt den Befehl aus der Eigenschaft launch_chat aus", "Выполняет команду из свойства launch_chat", "Ejecuta el comando de la propiedad launch_chat", "执行来自 launch_chat 属性的命令", "Exécute la commande de la propriété launch_chat"], 
-    ["Stop completion llama.cpp server", "Спиране на сървъра за завършване llama.cpp", "Beenden des Abschluss-Servers llama.cpp", "Остановка сервера завершения llama.cpp", "Detener el servidor de finalización llama.cpp", "停止完成 llama.cpp 服务器", "Arrêter le serveur de complétion llama.cpp"], 
-    ["Stops completion llama.cpp server if it was started from llama.vscode menu", "Спира сървъра за завършване llama.cpp, ако е стартиран от менюто на llama.vscode", "Beendet den Abschluss-Server llama.cpp, wenn er aus dem Menü von llama.vscode gestartet wurde", "Останавливает сервер завершения llama.cpp, если он был запущен из меню llama.vscode", "Detiene el servidor de finalización llama.cpp si se inició desde el menú de llama.vscode", "如果从 llama.vscode 菜单启动，则停止完成 llama.cpp 服务器", "Arrête le serveur de complétion llama.cpp s'il a été démarré à partir du menu llama.vscode"], 
-    ["Stop chat llama.cpp server", "Спиране на чат сървър llama.cpp", "Beenden des Chat-Servers llama.cpp", "Остановка чат-сервера llama.cpp", "Detener el servidor de chat llama.cpp", "停止聊天 llama.cpp 服务器", "Arrêter le serveur de chat llama.cpp"], 
+    ["Runs the command from property launch_chat", "Изпълнява командата от свойството launch_chat", "Führt den Befehl aus der Eigenschaft launch_chat aus", "Выполняет команду из свойства launch_chat", "Ejecuta el comando de la propiedad launch_chat", "执行来自 launch_chat 属性的命令", "Exécute la commande de la propriété launch_chat"],
+    ["Stop completion llama.cpp server", "Спиране на сървъра за завършване llama.cpp", "Beenden des Abschluss-Servers llama.cpp", "Остановка сервера завершения llama.cpp", "Detener el servidor de finalización llama.cpp", "停止完成 llama.cpp 服务器", "Arrêter le serveur de complétion llama.cpp"],
+    ["Stops completion llama.cpp server if it was started from llama.vscode menu", "Спира сървъра за завършване llama.cpp, ако е стартиран от менюто на llama.vscode", "Beendet den Abschluss-Server llama.cpp, wenn er aus dem Menü von llama.vscode gestartet wurde", "Останавливает сервер завершения llama.cpp, если он был запущен из меню llama.vscode", "Detiene el servidor de finalización llama.cpp si se inició desde el menú de llama.vscode", "如果从 llama.vscode 菜单启动，则停止完成 llama.cpp 服务器", "Arrête le serveur de complétion llama.cpp s'il a été démarré à partir du menu llama.vscode"],
+    ["Stop chat llama.cpp server", "Спиране на чат сървър llama.cpp", "Beenden des Chat-Servers llama.cpp", "Остановка чат-сервера llama.cpp", "Detener el servidor de chat llama.cpp", "停止聊天 llama.cpp 服务器", "Arrêter le serveur de chat llama.cpp"],
     ["Stops chat llama.cpp server if it was started from llama.vscode menu", "Спира чат сървъра llama.cpp, ако е стартиран от менюто на llama.vscode", "Beendet den Chat-Server llama.cpp, wenn er aus dem Menü von llama.vscode gestartet wurde", "Останавливает чат-сервер llama.cpp, если он был запущен из меню llama.vscode", "Detiene el servidor de chat llama.cpp si se inició desde el menú de llama.vscode", "如果从 llama.vscode 菜单启动，则停止聊天 llama.cpp 服务器", "Arrête le serveur de chat llama.cpp s'il a été démarré à partir du menu llama.vscode"],
     ["Start completion model", "Стартиране на модела за допълнение", "Vervollständigungsmodell starten", "Запуск модели дополнения", "Iniciar modelo de completado", "启动补全模型", "Démarrer le modèle de complétion"],
     ["Start chat model", "Стартиране на модела за чат", "Chat-Modell starten", "Запуск модели чата", "Iniciar modelo de chat", "启动聊天模型", "Démarrer le modèle de chat"],
     ["Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server", "Изисква Brew, инсталира/актуализира llama.cpp сървъра, изтегля модела, ако не е наличен и стартира llama.cpp сървъра", "Erfordert Brew, installiert/aktualisiert den llama.cpp-Server, lädt das Modell herunter, falls nicht verfügbar, und startet den llama.cpp-Server", "Требуется Brew, устанавливает/обновляет сервер llama.cpp, скачивает модель, если она недоступна, и запускает сервер llama.cpp", "Requiere Brew, instala/actualiza el servidor llama.cpp, descarga el modelo si no está disponible, y ejecuta el servidor llama.cpp", "需要 brew，安装/升级 llama.cpp 服务器，如果模型不可用则下载模型，并运行 llama.cpp 服务器", "Nécessite Brew, installe/met à jour le serveur llama.cpp, télécharge le modèle s'il n'est pas disponible, et lance le serveur llama.cpp"],
-    ["Error getting response. Please check if llama.cpp server is running.", "Грешка при получаване на отговор. Моля, проверете дали сървърът на llama.cpp работи.", "Fehler beim Abrufen der Antwort. Bitte prüfen Sie, ob der llama.cpp-Server läuft.", "Ошибка при получении ответа. Пожалуйста, проверьте, запущен ли сервер llama.cpp.", "Error al obtener respuesta. Por favor, verifique si el servidor de llama.cpp está en ejecución.", "获取响应时出错。请检查llama.cpp服务器是否正在运行。", "Erreur lors de l'obtention de la réponse. Veuillez vérifier si le serveur llama.cpp est en cours d'exécution."], 
-    ["llama-vscode extension is updated.", "Разширението llama-vscode е актуализирано.", "Die llama-vscode-Erweiterung ist aktualisiert.", "Расширение llama-vscode обновлено.", "La extensión llama-vscode está actualizada.", "llama-vscode扩展已更新。", "L'extension llama-vscode est mise à jour."], 
-    ["There is no command to execute.", "Няма команда за изпълнение.", "Es gibt keinen Befehl zum Ausführen.", "Нет команды для выполнения.", "No hay comando para ejecutar.", "没有可执行的命令。", "Il n'y a aucune commande à exécuter."], 
-    ["Error executing command", "Грешка при изпълнение на командата", "Fehler beim Ausführen des Befehls", "Ошибка выполнения команды", "Error al ejecutar el comando", "执行命令时出错", "Erreur lors de l'exécution de la commande"], 
+    ["Error getting response. Please check if llama.cpp server is running.", "Грешка при получаване на отговор. Моля, проверете дали сървърът на llama.cpp работи.", "Fehler beim Abrufen der Antwort. Bitte prüfen Sie, ob der llama.cpp-Server läuft.", "Ошибка при получении ответа. Пожалуйста, проверьте, запущен ли сервер llama.cpp.", "Error al obtener respuesta. Por favor, verifique si el servidor de llama.cpp está en ejecución.", "获取响应时出错。请检查llama.cpp服务器是否正在运行。", "Erreur lors de l'obtention de la réponse. Veuillez vérifier si le serveur llama.cpp est en cours d'exécution."],
+    ["llama-vscode extension is updated.", "Разширението llama-vscode е актуализирано.", "Die llama-vscode-Erweiterung ist aktualisiert.", "Расширение llama-vscode обновлено.", "La extensión llama-vscode está actualizada.", "llama-vscode扩展已更新。", "L'extension llama-vscode est mise à jour."],
+    ["There is no command to execute.", "Няма команда за изпълнение.", "Es gibt keinen Befehl zum Ausführen.", "Нет команды для выполнения.", "No hay comando para ejecutar.", "没有可执行的命令。", "Il n'y a aucune commande à exécuter."],
+    ["Error executing command", "Грешка при изпълнение на командата", "Fehler beim Ausführen des Befehls", "Ошибка выполнения команды", "Error al ejecutar el comando", "执行命令时出错", "Erreur lors de l'exécution de la commande"],
     ["Start training completion model", "Стартиране на модел за завършване на обучение", "Starten des Trainingsabschlussmodells", "Запуск модели завершения обучения", "Iniciar modelo de finalización de entrenamiento", "启动训练完成模型", "Démarrer le modèle de complétion d'entraînement"],
     ["Runs the command from property launch_training_completion", "Изпълнява командата от свойството launch_training_completion", "Führt den Befehl aus der Eigenschaft launch_training_completion aus", "Выполняет команду из свойства launch_training_completion", "Ejecuta el comando desde la propiedad launch_training_completion", "从属性 launch_training_completion 运行命令", "Exécute la commande depuis la propriété launch_training_completion"],
     ["Start training chat model", "Стартиране на модел за чат обучение", "Starten des Chat-Trainingsmodells", "Запуск модели обучения чата", "Iniciar modelo de entrenamiento de chat", "启动训练聊天模型", "Démarrer le modèle d'entraînement de chat"],
     ["Runs the command from property launch_training_chat", "Изпълнява командата от свойството launch_training_chat", "Führt den Befehl aus der Eigenschaft launch_training_chat aus", "Выполняет команду из свойства launch_training_chat", "Ejecuta el comando desde la propiedad launch_training_chat", "从属性 launch_training_chat 运行命令", "Exécute la commande depuis la propriété launch_training_chat"],
     ["Stop training", "Спиране на обучението", "Training beenden", "Остановить обучение", "Detener entrenamiento", "停止训练", "Arrêter l'entraînement"],
     ["Stops training if it was started from llama.vscode menu", "Спира обучението, ако е стартирано от менюто llama.vscode", "Stoppt das Training, wenn es über das Menü llama.vscode gestartet wurde", "Останавливает обучение, если оно было запущено из меню llama.vscode", "Detiene el entrenamiento si se inició desde el menú llama.vscode", "如果从 llama.vscode 菜单启动，则停止训练", "Arrête l'entraînement s'il a été lancé depuis le menu llama.vscode"],
-  ];
\ No newline at end of file
+    ["Extracting keywords from query...", "Извличане на ключови думи от заявката...", "Schlüsselwörter aus der Abfrage extrahieren...", "Извлечение ключевых слов из запроса...", "Extrayendo palabras clave de la consulta...", "从查询中提取关键词...", "Extraction des mots-clés de la requête..."],
+    ["Filtering chunks step 1...", "Филтриране на части стъпка 1...", "Filterung der Datenblöcke Schritt 1...", "Фильтрация фрагментов шаг 1...", "Filtrado de fragmentos paso 1...", "过滤数据块步骤1...", "Filtrage des segments étape 1..."],
+    ["Filtering chunks step 2...", "Филтриране на части стъпка 2...", "Filterung der Datenblöcke Schritt 2...", "Фильтрация фрагментов шаг 2...", "Filtrado de fragmentos paso 2...", "过滤数据块步骤2...", "Filtrage des segments étape 2..."],
+    ["Context chunks ready.", "Контекстните части са готови.", "Kontextblöcke bereit.", "Контекстные фрагменты готовы.", "Fragmentos de contexto listos.", "上下文块已准备就绪。", "Segments de contexte prêts."],
+    ["Indexing files...", "Индексиране на файлове...", "Dateien werden indiziert...", "Индексация файлов...", "Indexando archivos...", "正在索引文件...", "Indexation des fichiers..."],
+    ["Indexed", "Индексирани", "Indiziert", "Проиндексировано", "Indexado", "已索引", "Indexé"],
+    ["files for RAG search", "файлове за RAG търсене", "Dateien für RAG-Suche", "файлы для RAG-поиска", "archivos para búsqueda RAG", "用于RAG搜索的文件", "fichiers pour la recherche RAG"],
+    ["Enter your question...", "Въведете въпроса си...", "Geben Sie Ihre Frage ein...", "Введите ваш вопрос...", "Ingresa tu pregunta...", "输入您的问题...", "Entrez votre question..."],
+    ["What would you like to ask AI?", "Какво бихте искали да попитате ИИ?", "Was möchten Sie die KI fragen?", "Что вы хотите спросить у ИИ?", "¿Qué te gustaría preguntarle a la IA?", "您想向AI提问什么？", "Que souhaitez-vous demander à l'IA ?"],
+    ["Start embeddings llama.cpp server", "Стартиране на embeddings llama.cpp сървър", "Embeddings llama.cpp Server starten", "Запустить сервер embeddings llama.cpp", "Iniciar servidor embeddings llama.cpp", "启动 embeddings llama.cpp 服务器", "Démarrer le serveur embeddings llama.cpp"],
+    ["Runs the command from property launch_embeddings", "Изпълнява командата от свойството launch_embeddings", "Führt den Befehl aus der Eigenschaft launch_embeddings aus", "Запускает команду из свойства launch_embeddings", "Ejecuta el comando desde la propiedad launch_embeddings", "从属性 launch_embeddings 运行命令", "Exécute la commande à partir de la propriété launch_embeddings"],
+    ["Stop embeddings llama.cpp server", "Спиране на embeddings llama.cpp сървър", "Embeddings llama.cpp Server stoppen", "Остановить сервер embeddings llama.cpp", "Detener servidor embeddings llama.cpp", "停止 embeddings llama.cpp 服务器", "Arrêter le serveur embeddings llama.cpp"],
+    ["Stops embeddings llama.cpp server if it was started from llama.vscode menu", "Спира embeddings llama.cpp сървър, ако е стартиран от менюто на llama.vscode", "Stoppt den Embeddings llama.cpp Server, wenn er über das llama.vscode-Menü gestartet wurde", "Останавливает сервер embeddings llama.cpp, если он был запущен из меню llama.vscode", "Detiene el servidor embeddings llama.cpp si se inició desde el menú de llama.vscode", "如果 embeddings llama.cpp 服务器是从 llama.vscode 菜单启动的，则停止它", "Arrête le serveur embeddings llama.cpp s'il a été démarré depuis le menu llama.vscode"],
+    ["Start embeddings model", "Стартиране на модел за ембединги", "Starte Embeddings-Modell", "Запуск модели эмбеддингов", "Iniciar modelo de incrustaciones", "启动嵌入模型", "Démarrer le modèle d'incorporations"],
+  ];
diff --git a/src/utils.ts b/src/utils.ts
index 797e19e..f370e4e 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -1,5 +1,13 @@
 import vscode from "vscode";
 
+interface BM25Stats {
+    avgDocLength: number;
+    docFreq: Record<string, number>;
+    docLengths: number[];
+    termFreq: Record<string, Record<number, number>>
+    totalDocs: number;
+}
+
 export class Utils {
     static getLeadingSpaces = (input: string): string => {
         // Match the leading spaces using a regular expression
@@ -28,9 +36,77 @@ export class Utils {
     }
 
     static getChunksInPlainText = (chunksToSend: any[]) => {
-        let extraCont = "Here are pieces of code from different files of the project: \n" 
-        + chunksToSend.reduce((accumulator, currentValue) => accumulator + "\nFile Name: " 
+        let extraCont = "Here are pieces of code from different files of the project: \n"
+        + chunksToSend.reduce((accumulator, currentValue) => accumulator + "\nFile Name: "
         + currentValue.filename + "\nText:\n" + currentValue.text + "\n\n", "");
         return extraCont;
     }
+
+    static computeBM25Stats = (docs: string[][]): BM25Stats => {
+        const docFreq: Map<string, number> = new Map();
+        const termFreq: Map<string, Map<number, number>> = new Map();
+        const docLengths: number[] = [];
+        let totalDocs = 0;
+
+        for (let docId = 0; docId < docs.length; docId++) {
+            const doc = docs[docId];
+            docLengths.push(doc.length);
+            const termsInDoc = new Set<string>();
+
+            for (const term of doc) {
+                // Update term frequency (per-doc)
+                if (!termFreq.has(term)) {
+                    termFreq.set(term, new Map());
+                }
+                const termDocMap = termFreq.get(term)!;
+                termDocMap.set(docId, (termDocMap.get(docId) || 0) + 1);
+
+                termsInDoc.add(term);
+            }
+
+            // Update document frequency (global)
+            for (const term of termsInDoc) {
+                docFreq.set(term, (docFreq.get(term) || 0) + 1);
+            }
+
+            totalDocs++;
+        }
+
+        const avgDocLength = docLengths.reduce((a, b) => a + b, 0) / totalDocs;
+        return {
+            avgDocLength,
+            docFreq: Object.fromEntries(docFreq),  // Convert to Record if needed
+            docLengths,
+            termFreq: Object.fromEntries(
+                Array.from(termFreq).map(([k, v]) => [k, Object.fromEntries(v)])
+            ),
+            totalDocs
+        };
+    };
+
+    static bm25Score = (
+        queryTerms: string[],
+        docIndex: number,
+        stats: BM25Stats,
+        k1 = 1.5,
+        b = 0.75
+    ): number => {
+        let score = 0;
+
+        for (const term of queryTerms) {
+            if (!stats.termFreq[term]) continue;
+
+            const tf = stats.termFreq[term][docIndex] || 0;
+            const idf = Math.log(
+                (stats.totalDocs - stats.docFreq[term] + 0.5) / (stats.docFreq[term] + 0.5) + 1
+            );
+
+            const numerator = tf * (k1 + 1);
+            const denominator = tf + k1 * (1 - b + b * stats.docLengths[docIndex] / stats.avgDocLength);
+
+            score += idf * numerator / denominator;
+        }
+
+        return score;
+    }
 }