ggml-org
diff --git a/‎src/application.ts‎
Lines changed: 42 additions & 0 deletions b/‎src/application.ts‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎src/architect.ts‎
Lines changed: 30 additions & 539 deletions b/‎src/architect.ts‎
Lines changed: 30 additions & 539 deletions
diff --git a/‎src/completion.ts‎
Lines changed: 294 additions & 0 deletions b/‎src/completion.ts‎
Lines changed: 294 additions & 0 deletions
diff --git a/‎src/configuration.ts‎
Lines changed: 19 additions & 5 deletions b/‎src/configuration.ts‎
Lines changed: 19 additions & 5 deletions
@@ -0,0 +1,42 @@
+import {Configuration} from "./configuration";
+import {ExtraContext} from "./extra-context";
+import {LlamaServer} from "./llama-server";
+import {LRUCache} from "./lru-cache";
+import {Architect} from "./architect";
+import {Statusbar} from "./statusbar";
+import {Menu} from "./menu";
+import {Completion} from "./completion";
+import {Logger} from "./logger";
+
+export class Application {
+    private static instance: Application;
+    public extConfig: Configuration;
+    public extraContext: ExtraContext;
+    public llamaServer: LlamaServer
+    public lruResultCache: LRUCache
+    public architect: Architect
+    public statusbar: Statusbar
+    public menu: Menu
+    public completion: Completion
+    public logger: Logger
+
+    private constructor() {
+        this.extConfig = new Configuration()
+        this.llamaServer = new LlamaServer(this)
+        this.extraContext = new ExtraContext(this)
+        this.lruResultCache = new LRUCache(this.extConfig.max_cache_keys);
+        this.architect = new Architect(this);
+        this.statusbar = new Statusbar(this)
+        this.menu = new Menu(this)
+        this.completion = new Completion(this)
+        this.logger = new Logger(this)
+    }
+
+    public static getInstance(): Application {
+        if (!Application.instance) {
+            Application.instance = new Application();
+        }
+        return Application.instance;
+    }
+
+}
@@ -0,0 +1,294 @@
+import {Application} from "./application";
+import {LlamaResponse} from "./llama-server";
+import vscode from "vscode";
+import {Utils} from "./utils";
+
+interface CompletionDetails {
+    completion: string;
+    position: vscode.Position;
+    inputPrefix: string;
+    inputSuffix: string;
+    prompt: string;
+}
+
+export class Completion {
+    private app: Application
+    private isRequestInProgress = false
+    isForcedNewRequest = false
+    lastCompletion: CompletionDetails = {completion: "", position: new vscode.Position(0, 0), inputPrefix: "", inputSuffix: "", prompt: ""};
+
+    constructor(application: Application) {
+        this.app = application;
+    }
+
+    // Class field is used instead of a function to make "this" available
+    getCompletionItems = async (document: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext, token: vscode.CancellationToken): Promise<vscode.InlineCompletionList | vscode.InlineCompletionItem[] | null> => {
+        let group = "GET_COMPLETION_" + Date.now();
+        if (!this.app.extConfig.auto && context.triggerKind == vscode.InlineCompletionTriggerKind.Automatic) {
+            this.app.logger.addEventLog(group, "MANUAL_MODE_AUTOMATIC_TRIGGERING_RETURN", "")
+            return null;
+        }
+
+        // Start only if the previous request is finiched
+        while (this.isRequestInProgress) {
+            await Utils.delay(this.app.extConfig.DELAY_BEFORE_COMPL_REQUEST);
+            if (token.isCancellationRequested) {
+                this.app.logger.addEventLog(group, "CANCELLATION_TOKEN_RETURN", "waiting")
+                return null;
+            }
+        }
+        this.isRequestInProgress = true // Just before leaving the function should be set to false
+        this.app.extraContext.lastComplStartTime = Date.now();
+
+        // Gather local context
+        const prefixLines = Utils.getPrefixLines(document, position, this.app.extConfig.n_prefix);
+        const suffixLines = Utils.getSuffixLines(document, position, this.app.extConfig.n_suffix);
+        const lineText = document.lineAt(position.line).text
+        const cursorIndex = position.character;
+        const linePrefix = lineText.slice(0, cursorIndex);
+        const lineSuffix = lineText.slice(cursorIndex);
+        const nindent = lineText.length - lineText.trimStart().length
+        if (context.triggerKind == vscode.InlineCompletionTriggerKind.Automatic && lineSuffix.length > this.app.extConfig.max_line_suffix) {
+            this.isRequestInProgress = false
+            this.app.logger.addEventLog(group, "TOO_LONG_SUFFIX_RETURN", "")
+            return null
+        }
+        const prompt = linePrefix;
+        const inputPrefix = prefixLines.join('\n') + '\n';
+        const inputSuffix = lineSuffix + '\n' + suffixLines.join('\n') + '\n';
+
+        // Reuse cached completion if available.
+        try {
+            let data: LlamaResponse | undefined
+            let hashKey = this.app.lruResultCache.getHash(inputPrefix + "|" + inputSuffix + "|" + prompt)
+            let completion = this.getCachedCompletion(hashKey, inputPrefix, inputSuffix, prompt)
+            let isCachedResponse = !this.isForcedNewRequest && completion != undefined
+            if (!isCachedResponse) {
+                this.isForcedNewRequest = false
+                if (token.isCancellationRequested){
+                    this.isRequestInProgress = false
+                    this.app.logger.addEventLog(group, "CANCELLATION_TOKEN_RETURN", "just before server request")
+                    return null;
+                }
+                this.app.statusbar.showThinkingInfo();
+
+                data = await this.app.llamaServer.getFIMCompletion(inputPrefix, inputSuffix, prompt, this.app.extraContext.chunks, nindent)
+                if (data != undefined) completion = data.content;
+                else completion = undefined
+            }
+            if (completion == undefined || completion.trim() == ""){
+                this.app.statusbar.showInfo(undefined);
+                this.isRequestInProgress = false
+                this.app.logger.addEventLog(group, "NO_SUGGESTION_RETURN", "")
+                return [];
+            }
+
+            let suggestionLines = completion.split(/\r?\n/)
+            Utils.removeTrailingNewLines(suggestionLines);
+
+            if (this.shouldDiscardSuggestion(suggestionLines, document, position, linePrefix, lineSuffix)) {
+                this.app.statusbar.showInfo(undefined);
+                this.isRequestInProgress = false
+                this.app.logger.addEventLog(group, "DISCARD_SUGGESTION_RETURN", "")
+                return [];
+            }
+
+            completion = this.updateSuggestion(suggestionLines, lineSuffix);
+
+            if (!isCachedResponse) this.app.lruResultCache.put(hashKey, completion)
+            this.lastCompletion = this.getCompletionDetails(completion, position, inputPrefix, inputSuffix, prompt);
+
+            // Run async as not needed for the suggestion
+            setTimeout(async () => {
+                if (isCachedResponse) this.app.statusbar.showCachedInfo()
+                else this.app.statusbar.showInfo(data);
+                if (!token.isCancellationRequested && lineSuffix.trim() === ""){
+                    await this.cacheFutureSuggestion(inputPrefix, inputSuffix, prompt, suggestionLines);
+                    await this.cacheFutureAcceptLineSuggestion(inputPrefix, inputSuffix, prompt, suggestionLines);
+                }
+                if (!token.isCancellationRequested){
+                    this.app.extraContext.addFimContextChunks(position, context, document);
+                }
+            }, 0);
+            this.isRequestInProgress = false
+            this.app.logger.addEventLog(group, "NORMAL_RETURN", suggestionLines[0])
+            return [this.getCompletion(completion, position)];
+        } catch (err) {
+            console.error("Error fetching llama completion:", err);
+            vscode.window.showInformationMessage(`Error getting response. Please check if llama.cpp server is running. `);
+            let errorMessage = "Error fetching completion"
+            if (err instanceof Error) {
+                vscode.window.showInformationMessage(err.message);
+                errorMessage = err.message
+            }
+            this.isRequestInProgress = false
+            this.app.logger.addEventLog(group, "ERROR_RETURN", errorMessage)
+            return [];
+        }
+    }
+
+    private getCachedCompletion = (hashKey: string, inputPrefix: string, inputSuffix: string, prompt: string) => {
+        let result = this.app.lruResultCache.get(hashKey);
+        if (result != undefined) return result
+        for (let i = prompt.length; i >= 0; i--) {
+            let newPrompt = prompt.slice(0, i)
+            let promptCut = prompt.slice(i)
+            let hash = this.app.lruResultCache.getHash(inputPrefix + "|" + inputSuffix + "|" + newPrompt)
+            let result = this.app.lruResultCache.get(hash)
+            if (result != undefined && promptCut == result.slice(0,promptCut.length)) return result.slice(prompt.length - newPrompt.length)
+        }
+
+        return undefined
+    }
+
+    getCompletion = (completion: string, position: vscode.Position) => {
+        return new vscode.InlineCompletionItem(
+            completion,
+            new vscode.Range(position, position)
+        );
+    }
+
+    private getCompletionDetails = (completion: string, position: vscode.Position, inputPrefix: string, inputSuffix: string, prompt: string) => {
+        return { completion: completion, position: position, inputPrefix: inputPrefix, inputSuffix: inputSuffix, prompt: prompt };
+    }
+
+    // logic for discarding predictions that repeat existing text
+    shouldDiscardSuggestion = (suggestionLines: string[], document: vscode.TextDocument, position: vscode.Position, linePrefix: string, lineSuffix: string) => {
+        let discardSuggestion = false;
+        if (suggestionLines.length == 0) return true;
+        // truncate the suggestion if the first line is empty
+        if (suggestionLines.length == 1 && suggestionLines[0].trim() == "") return true;
+
+        // if cursor on the last line don't discard
+        if (position.line == document.lineCount - 1) return false;
+
+        // ... and the next lines are repeated
+        if (suggestionLines.length > 1
+            && (suggestionLines[0].trim() == "" || suggestionLines[0].trim() == lineSuffix.trim())
+            && suggestionLines.slice(1).every((value, index) => value === document.lineAt((position.line + 1) + index).text))
+            return true;
+
+        // truncate the suggestion if it repeats the suffix
+        if (suggestionLines.length == 1 && suggestionLines[0] == lineSuffix) return true;
+
+        // find the first non-empty line (strip whitespace)
+        let firstNonEmptyDocLine = position.line + 1;
+        while (firstNonEmptyDocLine < document.lineCount && document.lineAt(firstNonEmptyDocLine).text.trim() === "")
+            firstNonEmptyDocLine++;
+
+        // if all lines to the end of file are empty don't discard
+        if (firstNonEmptyDocLine >= document.lineCount) return false;
+
+        if (linePrefix + suggestionLines[0] === document.lineAt(firstNonEmptyDocLine).text) {
+            // truncate the suggestion if it repeats the next line
+            if (suggestionLines.length == 1) return true;
+
+            // ... or if the second line of the suggestion is the prefix of line l:cmp_y + 1
+            if (suggestionLines.length === 2
+                && suggestionLines[1] == document.lineAt(firstNonEmptyDocLine + 1).text.slice(0, suggestionLines[1].length))
+                return true;
+
+            // ... or if the middle chunk of lines of the suggestion is the same as the following non empty lines of the document
+            if (suggestionLines.length > 2 && suggestionLines.slice(1).every((value, index) => value === document.lineAt((firstNonEmptyDocLine + 1) + index).text))
+                return true;
+        }
+        return discardSuggestion;
+    }
+
+    // cut part of the completion in some special cases
+    updateSuggestion = (suggestionLines: string[], lineSuffix: string) => {
+        if (lineSuffix.trim() != "") {
+            if (suggestionLines[0].endsWith(lineSuffix)) return suggestionLines[0].slice(0, -lineSuffix.length);
+            if (suggestionLines.length > 1) return suggestionLines[0];
+        }
+
+        return suggestionLines.join("\n");
+    }
+
+    private  cacheFutureSuggestion = async (inputPrefix: string, inputSuffix: string, prompt: string, suggestionLines: string[]) => {
+        let futureInputPrefix = inputPrefix;
+        let futureInputSuffix = inputSuffix;
+        let futurePrompt = prompt + suggestionLines[0];
+        if (suggestionLines.length > 1) {
+            futureInputPrefix = inputPrefix + prompt + suggestionLines.slice(0, -1).join('\n') + '\n';
+            futurePrompt = suggestionLines[suggestionLines.length - 1];
+            let futureInputPrefixLines = futureInputPrefix.slice(0,-1).split(/\r?\n/)
+            if (futureInputPrefixLines.length > this.app.extConfig.n_prefix){
+                futureInputPrefix = futureInputPrefixLines.slice(futureInputPrefixLines.length - this.app.extConfig.n_prefix).join('\n')+ '\n';
+            }
+        }
+        let futureHashKey = this.app.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt)
+        let cached_completion = this.app.lruResultCache.get(futureHashKey)
+        if (cached_completion != undefined) return;
+        let futureData = await this.app.llamaServer.getFIMCompletion(futureInputPrefix, futureInputSuffix, futurePrompt, this.app.extraContext.chunks, prompt.length - prompt.trimStart().length);
+        let futureSuggestion = "";
+        if (futureData != undefined && futureData.content != undefined && futureData.content.trim() != "") {
+            futureSuggestion = futureData.content;
+            let suggestionLines = futureSuggestion.split(/\r?\n/)
+            Utils.removeTrailingNewLines(suggestionLines);
+            futureSuggestion = suggestionLines.join('\n')
+            let futureHashKey = this.app.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt);
+            this.app.lruResultCache.put(futureHashKey, futureSuggestion);
+        }
+    }
+
+    private  cacheFutureAcceptLineSuggestion = async (inputPrefix: string, inputSuffix: string, prompt: string, suggestionLines: string[]) => {
+        // For one line suggestion there is nothing to cache
+        if (suggestionLines.length > 1) {
+            let futureInputSuffix = inputSuffix;
+            let futureInputPrefix = inputPrefix + prompt + suggestionLines[0] + '\n';
+            let futurePrompt = "";
+            let futureHashKey = this.app.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt)
+            let futureSuggestion = suggestionLines.slice(1).join('\n')
+            let cached_completion = this.app.lruResultCache.get(futureHashKey)
+            if (cached_completion != undefined) return;
+            else this.app.lruResultCache.put(futureHashKey, futureSuggestion);
+        }
+    }
+
+    insertNextWord = async (editor: vscode.TextEditor) => {
+        // Retrieve the last inline completion item
+        const lastSuggestion = this.lastCompletion.completion;
+        if (!lastSuggestion) {
+            return;
+        }
+        let lastSuggestioLines = lastSuggestion.split(/\r?\n/)
+        let firstLine = lastSuggestioLines[0];
+        let prefix = Utils.getLeadingSpaces(firstLine)
+        let firstWord = prefix + firstLine.trimStart().split(' ')[0] || '';
+        let insertText = firstWord
+
+        if (firstWord === "" && lastSuggestioLines.length > 1) {
+            let secondLine = lastSuggestioLines[1];
+            prefix = Utils.getLeadingSpaces(secondLine)
+            firstWord = prefix + secondLine.trimStart().split(' ')[0] || '';
+            insertText = '\n' + firstWord
+        }
+
+        // Insert the first word at the cursor
+        const position = editor.selection.active;
+        await editor.edit(editBuilder => {
+            editBuilder.insert(position, insertText);
+        });
+    }
+
+    insertFirstLine = async (editor: vscode.TextEditor) => {
+        // Retrieve the last inline completion item
+        const lastItem = this.lastCompletion.completion;
+        if (!lastItem) {
+            return;
+        }
+        let lastSuggestioLines = lastItem.split('\n')
+        let insertLine = lastSuggestioLines[0] || '';
+
+        if (insertLine.trim() == "" && lastSuggestioLines.length > 1) {
+            insertLine = '\n' + lastSuggestioLines[1];
+        }
+
+        // Insert the first line at the cursor
+        const position = editor.selection.active;
+        await editor.edit(editBuilder => {
+            editBuilder.insert(position, insertLine);
+        });
+    }
+}
@@ -31,6 +31,7 @@ export class Configuration {
     // TODO: change to snake_case for consistency
     axiosRequestConfig = {};
     disabledLanguages: string[] = [];
+    languageSettings:Record<string, boolean> = {}
 
     // TODO: change to snake_case for consistency
     RING_UPDATE_MIN_TIME_LAST_COMPL = 3000;
@@ -40,6 +41,8 @@ export class Configuration {
     DELAY_BEFORE_COMPL_REQUEST = 150;
     MAX_EVENTS_IN_LOG = 250;
 
+    config: vscode.WorkspaceConfiguration;
+
     private languageBg = new Map<string, string>([
         ["no suggestion", "нямам предложение"],
         ["thinking...", "мисля..."],
@@ -79,8 +82,9 @@ export class Configuration {
         ["fr", this.languageFr],
     ]);
 
-    constructor(config: vscode.WorkspaceConfiguration) {
-        this.updateConfigs(config);
+    constructor() {
+        this.config = vscode.workspace.getConfiguration("llama-vscode");
+        this.updateConfigs(this.config);
         this.setLlamaRequestConfig();
         this.setOpenAiClient();
     }
@@ -108,6 +112,7 @@ export class Configuration {
         this.language = String(config.get<string>("language"));
         this.disabledLanguages = config.get<string[]>("disabledLanguages") || [];
         this.enabled = Boolean(config.get<boolean>("enabled", true));
+        this.languageSettings = config.get<Record<string, boolean>>('languageSettings') || {};
     };
 
     getUiText = (uiText: string): string | undefined => {
@@ -146,12 +151,21 @@ export class Configuration {
     setOpenAiClient = () => {
         this.openai_client = null;
         if (this.use_openai_endpoint) {
-            const openai = new OpenAI({
+            this.openai_client = new OpenAI({
                 apiKey: this.api_key || "empty",
                 baseURL: this.endpoint,
             });
-
-            this.openai_client = openai;
         }
     };
+
+    isCompletionEnabled = (document?: vscode.TextDocument, language?: string): boolean =>  {
+        if (!this.enabled) return false;
+
+        const languageToCheck = language ?? document?.languageId;
+        if (languageToCheck) {
+            return this.languageSettings[languageToCheck] ?? true;
+        }
+
+        return true;
+    }
 }