|
| 1 | +import {Application} from "./application"; |
| 2 | +import {LlamaResponse} from "./llama-server"; |
| 3 | +import vscode from "vscode"; |
| 4 | +import {Utils} from "./utils"; |
| 5 | + |
| 6 | +interface CompletionDetails { |
| 7 | + completion: string; |
| 8 | + position: vscode.Position; |
| 9 | + inputPrefix: string; |
| 10 | + inputSuffix: string; |
| 11 | + prompt: string; |
| 12 | +} |
| 13 | + |
| 14 | +export class Completion { |
| 15 | + private app: Application |
| 16 | + private isRequestInProgress = false |
| 17 | + isForcedNewRequest = false |
| 18 | + lastCompletion: CompletionDetails = {completion: "", position: new vscode.Position(0, 0), inputPrefix: "", inputSuffix: "", prompt: ""}; |
| 19 | + |
| 20 | + constructor(application: Application) { |
| 21 | + this.app = application; |
| 22 | + } |
| 23 | + |
| 24 | + // Class field is used instead of a function to make "this" available |
| 25 | + getCompletionItems = async (document: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext, token: vscode.CancellationToken): Promise<vscode.InlineCompletionList | vscode.InlineCompletionItem[] | null> => { |
| 26 | + let group = "GET_COMPLETION_" + Date.now(); |
| 27 | + if (!this.app.extConfig.auto && context.triggerKind == vscode.InlineCompletionTriggerKind.Automatic) { |
| 28 | + this.app.logger.addEventLog(group, "MANUAL_MODE_AUTOMATIC_TRIGGERING_RETURN", "") |
| 29 | + return null; |
| 30 | + } |
| 31 | + |
| 32 | + // Start only if the previous request is finiched |
| 33 | + while (this.isRequestInProgress) { |
| 34 | + await Utils.delay(this.app.extConfig.DELAY_BEFORE_COMPL_REQUEST); |
| 35 | + if (token.isCancellationRequested) { |
| 36 | + this.app.logger.addEventLog(group, "CANCELLATION_TOKEN_RETURN", "waiting") |
| 37 | + return null; |
| 38 | + } |
| 39 | + } |
| 40 | + this.isRequestInProgress = true // Just before leaving the function should be set to false |
| 41 | + this.app.extraContext.lastComplStartTime = Date.now(); |
| 42 | + |
| 43 | + // Gather local context |
| 44 | + const prefixLines = Utils.getPrefixLines(document, position, this.app.extConfig.n_prefix); |
| 45 | + const suffixLines = Utils.getSuffixLines(document, position, this.app.extConfig.n_suffix); |
| 46 | + const lineText = document.lineAt(position.line).text |
| 47 | + const cursorIndex = position.character; |
| 48 | + const linePrefix = lineText.slice(0, cursorIndex); |
| 49 | + const lineSuffix = lineText.slice(cursorIndex); |
| 50 | + const nindent = lineText.length - lineText.trimStart().length |
| 51 | + if (context.triggerKind == vscode.InlineCompletionTriggerKind.Automatic && lineSuffix.length > this.app.extConfig.max_line_suffix) { |
| 52 | + this.isRequestInProgress = false |
| 53 | + this.app.logger.addEventLog(group, "TOO_LONG_SUFFIX_RETURN", "") |
| 54 | + return null |
| 55 | + } |
| 56 | + const prompt = linePrefix; |
| 57 | + const inputPrefix = prefixLines.join('\n') + '\n'; |
| 58 | + const inputSuffix = lineSuffix + '\n' + suffixLines.join('\n') + '\n'; |
| 59 | + |
| 60 | + // Reuse cached completion if available. |
| 61 | + try { |
| 62 | + let data: LlamaResponse | undefined |
| 63 | + let hashKey = this.app.lruResultCache.getHash(inputPrefix + "|" + inputSuffix + "|" + prompt) |
| 64 | + let completion = this.getCachedCompletion(hashKey, inputPrefix, inputSuffix, prompt) |
| 65 | + let isCachedResponse = !this.isForcedNewRequest && completion != undefined |
| 66 | + if (!isCachedResponse) { |
| 67 | + this.isForcedNewRequest = false |
| 68 | + if (token.isCancellationRequested){ |
| 69 | + this.isRequestInProgress = false |
| 70 | + this.app.logger.addEventLog(group, "CANCELLATION_TOKEN_RETURN", "just before server request") |
| 71 | + return null; |
| 72 | + } |
| 73 | + this.app.statusbar.showThinkingInfo(); |
| 74 | + |
| 75 | + data = await this.app.llamaServer.getFIMCompletion(inputPrefix, inputSuffix, prompt, this.app.extraContext.chunks, nindent) |
| 76 | + if (data != undefined) completion = data.content; |
| 77 | + else completion = undefined |
| 78 | + } |
| 79 | + if (completion == undefined || completion.trim() == ""){ |
| 80 | + this.app.statusbar.showInfo(undefined); |
| 81 | + this.isRequestInProgress = false |
| 82 | + this.app.logger.addEventLog(group, "NO_SUGGESTION_RETURN", "") |
| 83 | + return []; |
| 84 | + } |
| 85 | + |
| 86 | + let suggestionLines = completion.split(/\r?\n/) |
| 87 | + Utils.removeTrailingNewLines(suggestionLines); |
| 88 | + |
| 89 | + if (this.shouldDiscardSuggestion(suggestionLines, document, position, linePrefix, lineSuffix)) { |
| 90 | + this.app.statusbar.showInfo(undefined); |
| 91 | + this.isRequestInProgress = false |
| 92 | + this.app.logger.addEventLog(group, "DISCARD_SUGGESTION_RETURN", "") |
| 93 | + return []; |
| 94 | + } |
| 95 | + |
| 96 | + completion = this.updateSuggestion(suggestionLines, lineSuffix); |
| 97 | + |
| 98 | + if (!isCachedResponse) this.app.lruResultCache.put(hashKey, completion) |
| 99 | + this.lastCompletion = this.getCompletionDetails(completion, position, inputPrefix, inputSuffix, prompt); |
| 100 | + |
| 101 | + // Run async as not needed for the suggestion |
| 102 | + setTimeout(async () => { |
| 103 | + if (isCachedResponse) this.app.statusbar.showCachedInfo() |
| 104 | + else this.app.statusbar.showInfo(data); |
| 105 | + if (!token.isCancellationRequested && lineSuffix.trim() === ""){ |
| 106 | + await this.cacheFutureSuggestion(inputPrefix, inputSuffix, prompt, suggestionLines); |
| 107 | + await this.cacheFutureAcceptLineSuggestion(inputPrefix, inputSuffix, prompt, suggestionLines); |
| 108 | + } |
| 109 | + if (!token.isCancellationRequested){ |
| 110 | + this.app.extraContext.addFimContextChunks(position, context, document); |
| 111 | + } |
| 112 | + }, 0); |
| 113 | + this.isRequestInProgress = false |
| 114 | + this.app.logger.addEventLog(group, "NORMAL_RETURN", suggestionLines[0]) |
| 115 | + return [this.getCompletion(completion, position)]; |
| 116 | + } catch (err) { |
| 117 | + console.error("Error fetching llama completion:", err); |
| 118 | + vscode.window.showInformationMessage(`Error getting response. Please check if llama.cpp server is running. `); |
| 119 | + let errorMessage = "Error fetching completion" |
| 120 | + if (err instanceof Error) { |
| 121 | + vscode.window.showInformationMessage(err.message); |
| 122 | + errorMessage = err.message |
| 123 | + } |
| 124 | + this.isRequestInProgress = false |
| 125 | + this.app.logger.addEventLog(group, "ERROR_RETURN", errorMessage) |
| 126 | + return []; |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + private getCachedCompletion = (hashKey: string, inputPrefix: string, inputSuffix: string, prompt: string) => { |
| 131 | + let result = this.app.lruResultCache.get(hashKey); |
| 132 | + if (result != undefined) return result |
| 133 | + for (let i = prompt.length; i >= 0; i--) { |
| 134 | + let newPrompt = prompt.slice(0, i) |
| 135 | + let promptCut = prompt.slice(i) |
| 136 | + let hash = this.app.lruResultCache.getHash(inputPrefix + "|" + inputSuffix + "|" + newPrompt) |
| 137 | + let result = this.app.lruResultCache.get(hash) |
| 138 | + if (result != undefined && promptCut == result.slice(0,promptCut.length)) return result.slice(prompt.length - newPrompt.length) |
| 139 | + } |
| 140 | + |
| 141 | + return undefined |
| 142 | + } |
| 143 | + |
| 144 | + getCompletion = (completion: string, position: vscode.Position) => { |
| 145 | + return new vscode.InlineCompletionItem( |
| 146 | + completion, |
| 147 | + new vscode.Range(position, position) |
| 148 | + ); |
| 149 | + } |
| 150 | + |
| 151 | + private getCompletionDetails = (completion: string, position: vscode.Position, inputPrefix: string, inputSuffix: string, prompt: string) => { |
| 152 | + return { completion: completion, position: position, inputPrefix: inputPrefix, inputSuffix: inputSuffix, prompt: prompt }; |
| 153 | + } |
| 154 | + |
| 155 | + // logic for discarding predictions that repeat existing text |
| 156 | + shouldDiscardSuggestion = (suggestionLines: string[], document: vscode.TextDocument, position: vscode.Position, linePrefix: string, lineSuffix: string) => { |
| 157 | + let discardSuggestion = false; |
| 158 | + if (suggestionLines.length == 0) return true; |
| 159 | + // truncate the suggestion if the first line is empty |
| 160 | + if (suggestionLines.length == 1 && suggestionLines[0].trim() == "") return true; |
| 161 | + |
| 162 | + // if cursor on the last line don't discard |
| 163 | + if (position.line == document.lineCount - 1) return false; |
| 164 | + |
| 165 | + // ... and the next lines are repeated |
| 166 | + if (suggestionLines.length > 1 |
| 167 | + && (suggestionLines[0].trim() == "" || suggestionLines[0].trim() == lineSuffix.trim()) |
| 168 | + && suggestionLines.slice(1).every((value, index) => value === document.lineAt((position.line + 1) + index).text)) |
| 169 | + return true; |
| 170 | + |
| 171 | + // truncate the suggestion if it repeats the suffix |
| 172 | + if (suggestionLines.length == 1 && suggestionLines[0] == lineSuffix) return true; |
| 173 | + |
| 174 | + // find the first non-empty line (strip whitespace) |
| 175 | + let firstNonEmptyDocLine = position.line + 1; |
| 176 | + while (firstNonEmptyDocLine < document.lineCount && document.lineAt(firstNonEmptyDocLine).text.trim() === "") |
| 177 | + firstNonEmptyDocLine++; |
| 178 | + |
| 179 | + // if all lines to the end of file are empty don't discard |
| 180 | + if (firstNonEmptyDocLine >= document.lineCount) return false; |
| 181 | + |
| 182 | + if (linePrefix + suggestionLines[0] === document.lineAt(firstNonEmptyDocLine).text) { |
| 183 | + // truncate the suggestion if it repeats the next line |
| 184 | + if (suggestionLines.length == 1) return true; |
| 185 | + |
| 186 | + // ... or if the second line of the suggestion is the prefix of line l:cmp_y + 1 |
| 187 | + if (suggestionLines.length === 2 |
| 188 | + && suggestionLines[1] == document.lineAt(firstNonEmptyDocLine + 1).text.slice(0, suggestionLines[1].length)) |
| 189 | + return true; |
| 190 | + |
| 191 | + // ... or if the middle chunk of lines of the suggestion is the same as the following non empty lines of the document |
| 192 | + if (suggestionLines.length > 2 && suggestionLines.slice(1).every((value, index) => value === document.lineAt((firstNonEmptyDocLine + 1) + index).text)) |
| 193 | + return true; |
| 194 | + } |
| 195 | + return discardSuggestion; |
| 196 | + } |
| 197 | + |
| 198 | + // cut part of the completion in some special cases |
| 199 | + updateSuggestion = (suggestionLines: string[], lineSuffix: string) => { |
| 200 | + if (lineSuffix.trim() != "") { |
| 201 | + if (suggestionLines[0].endsWith(lineSuffix)) return suggestionLines[0].slice(0, -lineSuffix.length); |
| 202 | + if (suggestionLines.length > 1) return suggestionLines[0]; |
| 203 | + } |
| 204 | + |
| 205 | + return suggestionLines.join("\n"); |
| 206 | + } |
| 207 | + |
| 208 | + private cacheFutureSuggestion = async (inputPrefix: string, inputSuffix: string, prompt: string, suggestionLines: string[]) => { |
| 209 | + let futureInputPrefix = inputPrefix; |
| 210 | + let futureInputSuffix = inputSuffix; |
| 211 | + let futurePrompt = prompt + suggestionLines[0]; |
| 212 | + if (suggestionLines.length > 1) { |
| 213 | + futureInputPrefix = inputPrefix + prompt + suggestionLines.slice(0, -1).join('\n') + '\n'; |
| 214 | + futurePrompt = suggestionLines[suggestionLines.length - 1]; |
| 215 | + let futureInputPrefixLines = futureInputPrefix.slice(0,-1).split(/\r?\n/) |
| 216 | + if (futureInputPrefixLines.length > this.app.extConfig.n_prefix){ |
| 217 | + futureInputPrefix = futureInputPrefixLines.slice(futureInputPrefixLines.length - this.app.extConfig.n_prefix).join('\n')+ '\n'; |
| 218 | + } |
| 219 | + } |
| 220 | + let futureHashKey = this.app.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt) |
| 221 | + let cached_completion = this.app.lruResultCache.get(futureHashKey) |
| 222 | + if (cached_completion != undefined) return; |
| 223 | + let futureData = await this.app.llamaServer.getFIMCompletion(futureInputPrefix, futureInputSuffix, futurePrompt, this.app.extraContext.chunks, prompt.length - prompt.trimStart().length); |
| 224 | + let futureSuggestion = ""; |
| 225 | + if (futureData != undefined && futureData.content != undefined && futureData.content.trim() != "") { |
| 226 | + futureSuggestion = futureData.content; |
| 227 | + let suggestionLines = futureSuggestion.split(/\r?\n/) |
| 228 | + Utils.removeTrailingNewLines(suggestionLines); |
| 229 | + futureSuggestion = suggestionLines.join('\n') |
| 230 | + let futureHashKey = this.app.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt); |
| 231 | + this.app.lruResultCache.put(futureHashKey, futureSuggestion); |
| 232 | + } |
| 233 | + } |
| 234 | + |
| 235 | + private cacheFutureAcceptLineSuggestion = async (inputPrefix: string, inputSuffix: string, prompt: string, suggestionLines: string[]) => { |
| 236 | + // For one line suggestion there is nothing to cache |
| 237 | + if (suggestionLines.length > 1) { |
| 238 | + let futureInputSuffix = inputSuffix; |
| 239 | + let futureInputPrefix = inputPrefix + prompt + suggestionLines[0] + '\n'; |
| 240 | + let futurePrompt = ""; |
| 241 | + let futureHashKey = this.app.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt) |
| 242 | + let futureSuggestion = suggestionLines.slice(1).join('\n') |
| 243 | + let cached_completion = this.app.lruResultCache.get(futureHashKey) |
| 244 | + if (cached_completion != undefined) return; |
| 245 | + else this.app.lruResultCache.put(futureHashKey, futureSuggestion); |
| 246 | + } |
| 247 | + } |
| 248 | + |
| 249 | + insertNextWord = async (editor: vscode.TextEditor) => { |
| 250 | + // Retrieve the last inline completion item |
| 251 | + const lastSuggestion = this.lastCompletion.completion; |
| 252 | + if (!lastSuggestion) { |
| 253 | + return; |
| 254 | + } |
| 255 | + let lastSuggestioLines = lastSuggestion.split(/\r?\n/) |
| 256 | + let firstLine = lastSuggestioLines[0]; |
| 257 | + let prefix = Utils.getLeadingSpaces(firstLine) |
| 258 | + let firstWord = prefix + firstLine.trimStart().split(' ')[0] || ''; |
| 259 | + let insertText = firstWord |
| 260 | + |
| 261 | + if (firstWord === "" && lastSuggestioLines.length > 1) { |
| 262 | + let secondLine = lastSuggestioLines[1]; |
| 263 | + prefix = Utils.getLeadingSpaces(secondLine) |
| 264 | + firstWord = prefix + secondLine.trimStart().split(' ')[0] || ''; |
| 265 | + insertText = '\n' + firstWord |
| 266 | + } |
| 267 | + |
| 268 | + // Insert the first word at the cursor |
| 269 | + const position = editor.selection.active; |
| 270 | + await editor.edit(editBuilder => { |
| 271 | + editBuilder.insert(position, insertText); |
| 272 | + }); |
| 273 | + } |
| 274 | + |
| 275 | + insertFirstLine = async (editor: vscode.TextEditor) => { |
| 276 | + // Retrieve the last inline completion item |
| 277 | + const lastItem = this.lastCompletion.completion; |
| 278 | + if (!lastItem) { |
| 279 | + return; |
| 280 | + } |
| 281 | + let lastSuggestioLines = lastItem.split('\n') |
| 282 | + let insertLine = lastSuggestioLines[0] || ''; |
| 283 | + |
| 284 | + if (insertLine.trim() == "" && lastSuggestioLines.length > 1) { |
| 285 | + insertLine = '\n' + lastSuggestioLines[1]; |
| 286 | + } |
| 287 | + |
| 288 | + // Insert the first line at the cursor |
| 289 | + const position = editor.selection.active; |
| 290 | + await editor.edit(editBuilder => { |
| 291 | + editBuilder.insert(position, insertLine); |
| 292 | + }); |
| 293 | + } |
| 294 | +} |
0 commit comments