Menu items for Install/Upgrade llama.cpp, download a model and start llama.cpp server (mac)

igardev · web-flow · commit 41966cd337bb · 2025-02-21T13:09:03.000+02:00
- Install/Upgrade llama.cpp, download a model and start llama.cpp server with one menu item (only for mac)
- new property launch_cmd for executing a shell command (for example for starting llama.cpp) from the menu
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,4 @@ dist
 node_modules
 .vscode-test/
 *.vsix
+.idea/
diff --git a/package.json b/package.json
@@ -56,6 +56,10 @@
       {
         "command": "extension.copyChunks",
         "title": "Copy Chunks"
+      },
+      {
+        "command": "extension.showMenu",
+        "title": "Show Menu"
       }
     ],
     "keybindings": [
@@ -77,7 +81,7 @@
       {
         "command": "extension.copyChunks",
         "key": "ctrl+shift+,",
-        "when": "editorTextFocus"
+        "when": "true"
       },
       {
         "command": "extension.copyIntercept",
@@ -99,11 +103,22 @@
         "key": "ctrl+right",
         "when": "editorTextFocus && inlineSuggestionVisible"
       }
+      ,
+      {
+        "command": "extension.showMenu",
+        "key": "ctrl+shift+m",
+        "when": "true"
+      }
     ],
     "configuration": {
       "type": "object",
       "title": "llama.vscode Configuration",
       "properties": {
+        "llama-vscode.launch_cmd": {
+          "type": "string",
+          "default": "",
+          "description": "Shell command executed from menu"
+        },
         "llama-vscode.endpoint": {
           "type": "string",
           "default": "http://127.0.0.1:8012",
diff --git a/src/architect.ts b/src/architect.ts
@@ -74,7 +74,15 @@ export class Architect {
         context.subscriptions.push(acceptFirstWordCommand);
     }
 
-
+    registerCommandShowMenu = (context: vscode.ExtensionContext) => {
+        const showMenuCommand = vscode.commands.registerCommand(
+            'extension.showMenu',
+            async () => {
+                await this.app.menu.showMenu();
+            }
+        );
+        context.subscriptions.push(showMenuCommand);
+    }
 
     setPeriodicRingBufferUpdate = (context: vscode.ExtensionContext) => {
         const ringBufferIntervalId = setInterval(this.app.extraContext.periodicRingBufferUpdate, this.app.extConfig.ring_update_ms);
diff --git a/src/configuration.ts b/src/configuration.ts
@@ -4,6 +4,7 @@ import OpenAI from "openai";
 export class Configuration {
     // extension configs
     enabled = true;
+    launch_cmd = ""
     endpoint = "http=//127.0.0.1:8012";
     auto = true;
     api_key = "";
@@ -92,6 +93,7 @@ export class Configuration {
     private updateConfigs = (config: vscode.WorkspaceConfiguration) => {
         // TODO Handle the case of wrong types for the configuration values
         this.endpoint = this.trimTrailingSlash(String(config.get<string>("endpoint")));
+        this.launch_cmd = String(config.get<string>("launch_cmd"));
         this.use_openai_endpoint = Boolean(config.get<boolean>("use_openai_endpoint"));
         this.openai_client_model = String(config.get<string>("openai_client_model"));
         this.openai_prompt_template = String(config.get<string>("openai_prompt_template"));
diff --git a/src/extension.ts b/src/extension.ts
@@ -1,8 +1,9 @@
 import * as vscode from 'vscode';
 import {Application} from "./application";
 
+let app: Application
 export function activate(context: vscode.ExtensionContext) {
-    let app = Application.getInstance();
+    app = Application.getInstance();
     app.architect.setStatusBar(context)
     app.architect.setOnChangeConfiguration(context);
     app.architect.setCompletionProvider(context);
@@ -15,8 +16,10 @@ export function activate(context: vscode.ExtensionContext) {
     app.architect.setOnChangeActiveFile(context);
     app.architect.registerCommandAcceptFirstLine(context);
     app.architect.registerCommandAcceptFirstWord(context);
+    app.architect.registerCommandShowMenu(context);
 }
 
 export function deactivate() {
-    // Nothing to do. VS Code will dispose all registerd disposables
+    // VS Code will dispose all registerd disposables
+    app.llamaServer.killCmd();
 }
diff --git a/src/llama-server.ts b/src/llama-server.ts
@@ -1,5 +1,7 @@
 import axios from "axios";
 import {Application} from "./application";
+import { EventEmitter } from 'events';
+import vscode, { Terminal } from "vscode";
 
 const STATUS_OK = 200;
 
@@ -21,6 +23,8 @@ export interface LlamaResponse {
 export class LlamaServer {
     // private extConfig: Configuration;
     private app: Application
+    private vsCodeTerminal: Terminal | undefined;
+    private eventEmitter: EventEmitter;
     private readonly defaultRequestParams = {
         top_k: 40,
         top_p: 0.99,
@@ -31,6 +35,8 @@ export class LlamaServer {
 
     constructor(application: Application) {
         this.app = application;
+        this.eventEmitter = new EventEmitter();
+        this.vsCodeTerminal = undefined;
     }
 
     private replacePlaceholders(template: string, replacements: { [key: string]: string }): string {
@@ -145,4 +151,23 @@ export class LlamaServer {
             this.app.extConfig.axiosRequestConfig
         );
     };
+
+    onlaunchCmdClose = (callback: (data: { code: number, stderr: string }) => void): void => {
+        this.eventEmitter.on('processClosed', callback);
+    }
+
+    shellCmd = (launchCmd: string): void => {
+        if (!launchCmd) {
+            return;
+        }
+        this.vsCodeTerminal = vscode.window.createTerminal({
+            name: 'llama.cpp Command Terminal'
+        });
+        this.vsCodeTerminal.show(true);
+        this.vsCodeTerminal.sendText(launchCmd);
+    }
+
+    killCmd = (): void => {       
+        if (this.vsCodeTerminal) this.vsCodeTerminal.dispose();
+    }        
 }
diff --git a/src/menu.ts b/src/menu.ts
@@ -8,7 +8,7 @@ export class Menu {
     }
 
     createMenuItems = (currentLanguage: string | undefined, isLanguageEnabled: boolean): vscode.QuickPickItem[] => {
-        return [
+        let menuItems = [
             {
                 label: `${this.app.extConfig.enabled ? 'Disable' : 'Enable'} All Completions`,
                 description: `Turn ${this.app.extConfig.enabled ? 'off' : 'on'} completions globally`
@@ -22,15 +22,86 @@ export class Menu {
             },
             {
                 label: "$(book) View Documentation...",
-            }
-        ].filter(Boolean) as vscode.QuickPickItem[];
+            }]
+
+        if (process.platform === 'darwin') { // if mac os
+            menuItems.push(
+                {
+                    label: "Start model Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
+                    description: `Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`
+                },
+                {
+                    label: "Start model Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
+                    description: `Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`
+                },
+                {
+                    label: "Start model Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
+                    description: `Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`
+                },
+                {
+                    label: "Start model Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)",
+                    description: `Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`
+                }, 
+                {
+                    label: "Start model Qwen2.5-Coder-0.5B-Q8_0-GGUF (CPU Only)",
+                    description: `Requires brew, installs/upgrades llama.cpp server, downloads the model if not available, and runs llama.cpp server`
+                })
+        }
+
+        menuItems.push(
+            {
+                label: "Start llama.cpp server with custom command from launch_cmd property",
+                description: `Runs the command from property launch_cmd`
+            },
+            {
+                label: "Stop llama.cpp server",
+                description: `Stops llama.cpp server if it was started from llama.vscode menu."`
+            })     
+
+        return menuItems.filter(Boolean) as vscode.QuickPickItem[];
     }
 
     handleMenuSelection = async (selected: vscode.QuickPickItem, currentLanguage: string | undefined, languageSettings: Record<string, boolean>) => {
+        const DEFAULT_PORT_FIM_MODEL = "8012"
+        const PRESET_PLACEHOLDER = "[preset]";
+        const MODEL_PLACEHOLDER = "[model]"
+        let endpointParts = this.app.extConfig.endpoint.split(":");
+        let port = endpointParts[endpointParts.length -1]
+        if (!Number.isInteger(Number(port))) port =  DEFAULT_PORT_FIM_MODEL
+        let llmMacVramTemplate = " brew install llama.cpp && llama-server --" + PRESET_PLACEHOLDER + " --port " + port 
+        let llmMacCpuTemplate = " brew install llama.cpp && llama-server -hf " + MODEL_PLACEHOLDER + " --port " + port + " -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256"
+        
         switch (selected.label) {
             case "$(gear) Edit Settings...":
                 await vscode.commands.executeCommand('workbench.action.openSettings', 'llama-vscode');
                 break;
+            case "$(gear) Start model Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)":
+                await this.app.llamaServer.killCmd();
+                await this.app.llamaServer.shellCmd(llmMacVramTemplate.replace(PRESET_PLACEHOLDER, "fim-qwen-1.5b-default"));
+                break;
+            case "Start model Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)":
+                await this.app.llamaServer.killCmd();
+                await this.app.llamaServer.shellCmd(llmMacVramTemplate.replace(PRESET_PLACEHOLDER, "fim-qwen-3b-default"));
+                break;
+            case "Start model Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)":
+                await this.app.llamaServer.killCmd();
+                await this.app.llamaServer.shellCmd(llmMacVramTemplate.replace(PRESET_PLACEHOLDER, "fim-qwen-7b-default"));
+                break;  
+            case "Start model Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)":
+                await this.app.llamaServer.killCmd();
+                await this.app.llamaServer.shellCmd(llmMacCpuTemplate.replace(MODEL_PLACEHOLDER, "ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF"));
+                break;
+            case "Start model Qwen2.5-Coder-0.5B-Q8_0-GGUF (CPU Only)":
+                await this.app.llamaServer.killCmd();
+                await this.app.llamaServer.shellCmd(llmMacCpuTemplate.replace(MODEL_PLACEHOLDER, "ggml-org/Qwen2.5-Coder-0.5B-Q8_0-GGUF"));
+                break;
+            case "Start llama.cpp server with custom command from launch_cmd property":
+                await this.app.llamaServer.killCmd();
+                await this.app.llamaServer.shellCmd(this.app.extConfig.launch_cmd);
+                break;      
+            case "Stop llama.cpp server":
+                await this.app.llamaServer.killCmd();
+                break;
             case "$(book) View Documentation...":
                 await vscode.env.openExternal(vscode.Uri.parse('https://github.com/ggml-org/llama.vscode'));
                 break;