ggml-org · ggerganov · May 9, 2025 · May 4, 2025 · May 4, 2025 · May 5, 2025
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -2,7 +2,7 @@
   "name": "llama-vscode",
   "displayName": "llama-vscode",
   "description": "Local LLM-assisted text completion using llama.cpp",
-  "version": "0.0.9-beta-3",
+  "version": "0.0.10-beta-1",
   "publisher": "ggml-org",
   "repository": "https://github.com/ggml-org/llama.vscode",
   "engines": {
@@ -31,55 +31,55 @@
     "commands": [
       {
         "command": "extension.triggerInlineCompletion",
-        "title": "Trigger Inline Completion"
+        "title": "llama-vscode: Trigger Inline Completion"
       },
       {
         "command": "extension.triggerNoCacheCompletion",
-        "title": "Trigger No Cache Completion"
+        "title": "llama-vscode: Trigger No Cache Completion"
       },
       {
         "command": "extension.copyIntercept",
-        "title": "Copy Intercept"
+        "title": "llama-vscode: Copy Intercept"
       },
       {
         "command": "extension.cutIntercept",
-        "title": "Cut Intercept"
+        "title": "llama-vscode: Cut Intercept"
       },
       {
         "command": "extension.acceptFirstLine",
-        "title": "Accept First Line"
+        "title": "llama-vscode: Accept First Line"
       },
       {
         "command": "extension.acceptFirstWord",
-        "title": "Accept First Word"
+        "title": "llama-vscode: Accept First Word"
       },
       {
         "command": "extension.copyChunks",
-        "title": "Copy Chunks"
+        "title": "llama-vscode: Copy Chunks"
       },
       {
         "command": "extension.showMenu",
-        "title": "Show Menu"
+        "title": "llama-vscode: Show Menu"
       },
       {
         "command": "extension.askAi",
-        "title": "Ask AI"
+        "title": "llama-vscode: Ask AI"
       },
       {
         "command": "extension.askAiWithContext",
-        "title": "Ask AI With Context"
+        "title": "llama-vscode: Ask AI With Context"
       },
       {
         "command": "extension.editSelectedText",
-        "title": "Edit Selected Text with AI"
+        "title": "llama-vscode: Edit Selected Text with AI"
       },
       {
         "command": "extension.acceptTextEdit",
-        "title": "Accept Text Edit Suggestion"
+        "title": "llama-vscode: Accept Text Edit Suggestion"
       },
       {
         "command": "extension.rejectTextEdit",
-        "title": "Reject Text Edit Suggestion"
+        "title": "llama-vscode: Reject Text Edit Suggestion"
       }
     ],
     "keybindings": [
@@ -122,8 +122,7 @@
         "command": "extension.acceptFirstWord",
         "key": "ctrl+right",
         "when": "editorTextFocus && inlineSuggestionVisible"
-      }
-      ,
+      },
       {
         "command": "extension.showMenu",
         "key": "ctrl+shift+m",
@@ -169,6 +168,11 @@
           "default": "cd c:/ai ; ./llama-server.exe -m qwen2.5-coder-3b-instruct-q6_k.gguf -ngl 99 --port 8011 --path C:/llama.cpp/llama.cpp/examples/server/webui/dist",
           "description": "Shell command for starting chat llama.cpp server, executed from the menu"
         },
+        "llama-vscode.launch_embeddings": {
+          "type": "string",
+          "default": "cd c:/ai ; ./llama-server.exe -m all-MiniLM-L6-v2-Q8_0.gguf --port 8010",
+          "description": "Shell command for starting chat llama.cpp server, executed from the menu"
+        },
         "llama-vscode.launch_training_completion": {
           "type": "string",
           "default": "",
@@ -199,6 +203,11 @@
           "default": "http://127.0.0.1:8011",
           "description": "The URL to be used by the extension for chat with ai."
         },
+        "llama-vscode.endpoint_embeddings": {
+          "type": "string",
+          "default": "http://127.0.0.1:8010",
+          "description": "The URL to be used by the extension for creating embeddings."
+        },
         "llama-vscode.auto": {
           "type": "boolean",
           "default": true,
@@ -274,6 +283,46 @@
           "default": 1000,
           "description": "how often to process queued chunks in normal mode"
         },
+        "llama-vscode.rag_chunk_max_chars": {
+          "type": "number",
+          "default": 2000,
+          "description": "Max number of chars per RAG chunk"
+        },
+        "llama-vscode.rag_max_lines_per_chunk": {
+          "type": "number",
+          "default": 60,
+          "description": "Max number of lines per RAG chunk"
+        },
+        "llama-vscode.rag_max_chars_per_chunk_line": {
+          "type": "number",
+          "default": 300,
+          "description": "max chars for a chunk line, the rest of the line is cut"
+        },
+        "llama-vscode.rag_max_chunks": {
+          "type": "number",
+          "default": 30000,
+          "description": "max cunks for the RAG search"
+        },
+        "llama-vscode.rag_max_bm25_filter_chunks": {
+          "type": "number",
+          "default": 47,
+          "description": "max RAG chunks to filter with BM25 algorithm"
+        },
+        "llama-vscode.rag_max_embedding_filter_chunks": {
+          "type": "number",
+          "default": 5,
+          "description": "max RAG chunks to provide as context to the LLM"
+        },
+        "llama-vscode.rag_max_context_files": {
+          "type": "number",
+          "default": 3,
+          "description": "max number of complete files to send as context to the LLM"
+        },
+        "llama-vscode.rag_max_context_file_chars": {
+          "type": "number",
+          "default": 5000,
+          "description": "max chars for a context file. If the file is bigger it will be cut to avoid too big context."
+        },
         "llama-vscode.language": {
           "type": "string",
           "default": "en",
@@ -285,14 +334,14 @@
           "description": "Enable/disable completions"
         },
         "llama-vscode.languageSettings": {
-            "type": "object",
-            "default": {
-              "*": true
-            },
-            "additionalProperties": {
-              "type": "boolean"
-            },
-            "description": "Enable/disable suggestions for specific languages"
+          "type": "object",
+          "default": {
+            "*": true
+          },
+          "additionalProperties": {
+            "type": "boolean"
+          },
+          "description": "Enable/disable suggestions for specific languages"
         },
         "llama-vscode.use_openai_endpoint": {
           "type": "boolean",
@@ -326,6 +375,7 @@
   },
   "dependencies": {
     "axios": "^1.1.2",
+    "ignore": "^7.0.4",
     "openai": "^4.80.1"
   },
   "devDependencies": {

diff --git a/src/application.ts b/src/application.ts
@@ -9,6 +9,8 @@ import {Completion} from "./completion";
 import {Logger} from "./logger";
 import { ChatWithAi } from "./chat-with-ai";
 import { TextEditor } from "./text-editor";
+import { ChatContext } from "./chat-context";
+import { Prompts } from "./prompts";
 
 export class Application {
     private static instance: Application;
@@ -23,6 +25,8 @@ export class Application {
     public logger: Logger
     public askAi: ChatWithAi
     public textEditor: TextEditor
+    public chatContext: ChatContext
+    public prompts: Prompts
 
     private constructor() {
         this.extConfig = new Configuration()
@@ -36,6 +40,8 @@ export class Application {
         this.logger = new Logger(this)
         this.askAi = new ChatWithAi(this)
         this.textEditor = new TextEditor(this)
+        this.chatContext = new ChatContext(this)
+        this.prompts = new Prompts(this)
     }
 
     public static getInstance(): Application {

diff --git a/src/architect.ts b/src/architect.ts
@@ -1,4 +1,6 @@
 // TODO
+// При липсащ ембеддинг сървер да дава грешка, за да се разбира, че има проблем
+//
 // Ако се използва лора за чат сървера - да се подава в заявката от webui
 // Идеи
 // - Използване на агенти (?)
@@ -14,10 +16,56 @@ export class Architect {
         this.app = application;
     }
 
+    init = () => {
+        // Start indexing workspace files
+        if (this.app.extConfig.endpoint_embeddings.trim() != "") {
+            setTimeout(() => {
+                this.app.chatContext.indexWorkspaceFiles().catch(error => {
+                    console.error('Failed to index workspace files:', error);
+                });
+            }, 0);
+        }
+    }
+
+    setOnSaveDeleteFileForDb = (context: vscode.ExtensionContext) => {
+        const saveListener = vscode.workspace.onDidSaveTextDocument(async (document) => {
+            try {
+                if (!this.app.chatContext.isImageOrVideoFile(document.uri.toString())){
+                    // Update after a delay and only if the file is not changed in the meantime to avoid too often updates
+                    let updateTime = Date.now()
+                    let fileProperties = this.app.chatContext.getFileProperties(document.uri.toString())
+                    if (fileProperties) fileProperties.updated = updateTime;
+                    setTimeout(async () => {
+                        if (fileProperties && fileProperties.updated > updateTime ) {
+                            return;
+                        }
+                        this.app.chatContext.addDocument(document.uri.toString(), document.getText());
+                    }, 5000);
+                }
+            } catch (error) {
+                console.error('Failed to add document to RAG:', error);
+            }
+        });
+        context.subscriptions.push(saveListener);
+
+        // Add file delete listener for vector RAG
+        const deleteListener = vscode.workspace.onDidDeleteFiles(async (event) => {
+            for (const file of event.files) {
+                try {
+                    await this.app.chatContext.removeDocument(file.toString());
+                } catch (error) {
+                    console.error('Failed to remove document from RAG:', error);
+                }
+            }
+        });
+        context.subscriptions.push(deleteListener);
+    }
+
     setOnChangeConfiguration = (context: vscode.ExtensionContext) => {
         let configurationChangeDisp = vscode.workspace.onDidChangeConfiguration((event) => {
             const config = vscode.workspace.getConfiguration("llama-vscode");
             this.app.extConfig.updateOnEvent(event, config);
+            if (this.app.extConfig.isRagConfigChanged(event)) this.init()
             vscode.window.showInformationMessage(this.app.extConfig.getUiText(`llama-vscode extension is updated.`)??"");
         });
         context.subscriptions.push(configurationChangeDisp);
@@ -31,7 +79,7 @@ export class Architect {
                     this.app.extraContext.pickChunkAroundCursor(previousEditor.selection.active.line, previousEditor.document);
                 }, 0);
             }
-            
+
             if (editor) {
                 // Editor is now active in the UI, pick a chunk
                 let activeDocument = editor.document;
@@ -100,6 +148,17 @@ export class Architect {
         context.subscriptions.push(onSaveDocDisposable);
     }
 
+    setOnChangeWorkspaceFolders = (context: vscode.ExtensionContext) => {
+        // Listen for new workspace folders being added
+        context.subscriptions.push(
+            vscode.workspace.onDidChangeWorkspaceFolders(event => {
+                event.added.forEach(folder => {
+                    this.init();
+                });
+            })
+        );
+    }
+
     registerCommandManualCompletion = (context: vscode.ExtensionContext) => {
         const triggerManualCompletionDisposable = vscode.commands.registerCommand('extension.triggerInlineCompletion', async () => {
             // Manual triggering of the completion with a shortcut
@@ -147,7 +206,18 @@ export class Architect {
             if (this.app.lruResultCache.size() > 0){
                 completionCache = Array.from(this.app.lruResultCache.getMap().entries()).reduce((accumulator, [key, value]) => accumulator + "Key: " + key + "\nCompletion:\n" +  value + "\n\n" , "");
             }
-            vscode.env.clipboard.writeText("Events:\n" + eventLogsCombined + "\n\n------------------------------\n" + "Extra context: \n" + extraContext + "\n\n------------------------------\nCompletion cache: \n" + completionCache)
+            let firstChunks = ""
+            if (this.app.chatContext.entries.size > 0){
+                firstChunks = Array.from(this.app.chatContext.entries.entries()).slice(0,5).reduce((accumulator, [key, value]) => accumulator + "ID: " + key + "\nFile:\n" +  value.uri +
+                "\nfirst line:\n" +  value.firstLine +
+                "\nlast line:\n" +  value.lastLine +
+                "\nChunk:\n" +  value.content + "\n\n" , "");
+            }
+            vscode.env.clipboard.writeText("Events:\n" + eventLogsCombined +
+                 "\n\n------------------------------\n" +
+                 "Extra context: \n" + extraContext +
+                 "\n\n------------------------------\nCompletion cache: \n" + completionCache +
+                 "\n\n------------------------------\nChunks: \n" + firstChunks)
         });
         context.subscriptions.push(triggerCopyChunksDisposable);
     }