Skip to content

Commit d375aed

Browse files
igardevigardevggerganov
authored
Add RAG search for Ask With AI with project conext (#56)
* Add RAG search for Ask With AI with project conext * Remove duplicated call for getting context. * Chat with project supports providing files as context with @ prefix (i.e. @test.cpp) * Reindex files if rag settings are changed * Add menu item for starting embedding server on mac. * Improve excuding the files from .gitignore; reduce the memory usage by BM25 algorith. * Update file chunks on save improvement, progress bar for calculating embeddings for RAG. search. * Add prefix llama-vscode for the shortcut commands. This way it is easier to filter them. * Removed senidng extra context chunks to the chat server. Show error in case of problem with embeddings server. If embeddings server endpoint is not available - shows message and uses only BM25 filtering. * Typing error fix in translations * style : fix whitespaces + disable extra context for chat edit * config : adjust params * menu : fix embedding commands --------- Co-authored-by: igardev <[email protected]> Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 05fcdd4 commit d375aed

17 files changed

+969
-132
lines changed

package-lock.json

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 74 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "llama-vscode",
33
"displayName": "llama-vscode",
44
"description": "Local LLM-assisted text completion using llama.cpp",
5-
"version": "0.0.9-beta-3",
5+
"version": "0.0.10-beta-1",
66
"publisher": "ggml-org",
77
"repository": "https://github.com/ggml-org/llama.vscode",
88
"engines": {
@@ -31,55 +31,55 @@
3131
"commands": [
3232
{
3333
"command": "extension.triggerInlineCompletion",
34-
"title": "Trigger Inline Completion"
34+
"title": "llama-vscode: Trigger Inline Completion"
3535
},
3636
{
3737
"command": "extension.triggerNoCacheCompletion",
38-
"title": "Trigger No Cache Completion"
38+
"title": "llama-vscode: Trigger No Cache Completion"
3939
},
4040
{
4141
"command": "extension.copyIntercept",
42-
"title": "Copy Intercept"
42+
"title": "llama-vscode: Copy Intercept"
4343
},
4444
{
4545
"command": "extension.cutIntercept",
46-
"title": "Cut Intercept"
46+
"title": "llama-vscode: Cut Intercept"
4747
},
4848
{
4949
"command": "extension.acceptFirstLine",
50-
"title": "Accept First Line"
50+
"title": "llama-vscode: Accept First Line"
5151
},
5252
{
5353
"command": "extension.acceptFirstWord",
54-
"title": "Accept First Word"
54+
"title": "llama-vscode: Accept First Word"
5555
},
5656
{
5757
"command": "extension.copyChunks",
58-
"title": "Copy Chunks"
58+
"title": "llama-vscode: Copy Chunks"
5959
},
6060
{
6161
"command": "extension.showMenu",
62-
"title": "Show Menu"
62+
"title": "llama-vscode: Show Menu"
6363
},
6464
{
6565
"command": "extension.askAi",
66-
"title": "Ask AI"
66+
"title": "llama-vscode: Ask AI"
6767
},
6868
{
6969
"command": "extension.askAiWithContext",
70-
"title": "Ask AI With Context"
70+
"title": "llama-vscode: Ask AI With Context"
7171
},
7272
{
7373
"command": "extension.editSelectedText",
74-
"title": "Edit Selected Text with AI"
74+
"title": "llama-vscode: Edit Selected Text with AI"
7575
},
7676
{
7777
"command": "extension.acceptTextEdit",
78-
"title": "Accept Text Edit Suggestion"
78+
"title": "llama-vscode: Accept Text Edit Suggestion"
7979
},
8080
{
8181
"command": "extension.rejectTextEdit",
82-
"title": "Reject Text Edit Suggestion"
82+
"title": "llama-vscode: Reject Text Edit Suggestion"
8383
}
8484
],
8585
"keybindings": [
@@ -122,8 +122,7 @@
122122
"command": "extension.acceptFirstWord",
123123
"key": "ctrl+right",
124124
"when": "editorTextFocus && inlineSuggestionVisible"
125-
}
126-
,
125+
},
127126
{
128127
"command": "extension.showMenu",
129128
"key": "ctrl+shift+m",
@@ -169,6 +168,11 @@
169168
"default": "cd c:/ai ; ./llama-server.exe -m qwen2.5-coder-3b-instruct-q6_k.gguf -ngl 99 --port 8011 --path C:/llama.cpp/llama.cpp/examples/server/webui/dist",
170169
"description": "Shell command for starting chat llama.cpp server, executed from the menu"
171170
},
171+
"llama-vscode.launch_embeddings": {
172+
"type": "string",
173+
"default": "cd c:/ai ; ./llama-server.exe -m all-MiniLM-L6-v2-Q8_0.gguf --port 8010",
174+
"description": "Shell command for starting chat llama.cpp server, executed from the menu"
175+
},
172176
"llama-vscode.launch_training_completion": {
173177
"type": "string",
174178
"default": "",
@@ -199,6 +203,11 @@
199203
"default": "http://127.0.0.1:8011",
200204
"description": "The URL to be used by the extension for chat with ai."
201205
},
206+
"llama-vscode.endpoint_embeddings": {
207+
"type": "string",
208+
"default": "http://127.0.0.1:8010",
209+
"description": "The URL to be used by the extension for creating embeddings."
210+
},
202211
"llama-vscode.auto": {
203212
"type": "boolean",
204213
"default": true,
@@ -274,6 +283,46 @@
274283
"default": 1000,
275284
"description": "how often to process queued chunks in normal mode"
276285
},
286+
"llama-vscode.rag_chunk_max_chars": {
287+
"type": "number",
288+
"default": 2000,
289+
"description": "Max number of chars per RAG chunk"
290+
},
291+
"llama-vscode.rag_max_lines_per_chunk": {
292+
"type": "number",
293+
"default": 60,
294+
"description": "Max number of lines per RAG chunk"
295+
},
296+
"llama-vscode.rag_max_chars_per_chunk_line": {
297+
"type": "number",
298+
"default": 300,
299+
"description": "max chars for a chunk line, the rest of the line is cut"
300+
},
301+
"llama-vscode.rag_max_chunks": {
302+
"type": "number",
303+
"default": 30000,
304+
"description": "max cunks for the RAG search"
305+
},
306+
"llama-vscode.rag_max_bm25_filter_chunks": {
307+
"type": "number",
308+
"default": 47,
309+
"description": "max RAG chunks to filter with BM25 algorithm"
310+
},
311+
"llama-vscode.rag_max_embedding_filter_chunks": {
312+
"type": "number",
313+
"default": 5,
314+
"description": "max RAG chunks to provide as context to the LLM"
315+
},
316+
"llama-vscode.rag_max_context_files": {
317+
"type": "number",
318+
"default": 3,
319+
"description": "max number of complete files to send as context to the LLM"
320+
},
321+
"llama-vscode.rag_max_context_file_chars": {
322+
"type": "number",
323+
"default": 5000,
324+
"description": "max chars for a context file. If the file is bigger it will be cut to avoid too big context."
325+
},
277326
"llama-vscode.language": {
278327
"type": "string",
279328
"default": "en",
@@ -285,14 +334,14 @@
285334
"description": "Enable/disable completions"
286335
},
287336
"llama-vscode.languageSettings": {
288-
"type": "object",
289-
"default": {
290-
"*": true
291-
},
292-
"additionalProperties": {
293-
"type": "boolean"
294-
},
295-
"description": "Enable/disable suggestions for specific languages"
337+
"type": "object",
338+
"default": {
339+
"*": true
340+
},
341+
"additionalProperties": {
342+
"type": "boolean"
343+
},
344+
"description": "Enable/disable suggestions for specific languages"
296345
},
297346
"llama-vscode.use_openai_endpoint": {
298347
"type": "boolean",
@@ -326,6 +375,7 @@
326375
},
327376
"dependencies": {
328377
"axios": "^1.1.2",
378+
"ignore": "^7.0.4",
329379
"openai": "^4.80.1"
330380
},
331381
"devDependencies": {

src/application.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import {Completion} from "./completion";
99
import {Logger} from "./logger";
1010
import { ChatWithAi } from "./chat-with-ai";
1111
import { TextEditor } from "./text-editor";
12+
import { ChatContext } from "./chat-context";
13+
import { Prompts } from "./prompts";
1214

1315
export class Application {
1416
private static instance: Application;
@@ -23,6 +25,8 @@ export class Application {
2325
public logger: Logger
2426
public askAi: ChatWithAi
2527
public textEditor: TextEditor
28+
public chatContext: ChatContext
29+
public prompts: Prompts
2630

2731
private constructor() {
2832
this.extConfig = new Configuration()
@@ -36,6 +40,8 @@ export class Application {
3640
this.logger = new Logger(this)
3741
this.askAi = new ChatWithAi(this)
3842
this.textEditor = new TextEditor(this)
43+
this.chatContext = new ChatContext(this)
44+
this.prompts = new Prompts(this)
3945
}
4046

4147
public static getInstance(): Application {

src/architect.ts

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
// TODO
2+
// При липсащ ембеддинг сървер да дава грешка, за да се разбира, че има проблем
3+
//
24
// Ако се използва лора за чат сървера - да се подава в заявката от webui
35
// Идеи
46
// - Използване на агенти (?)
@@ -14,10 +16,56 @@ export class Architect {
1416
this.app = application;
1517
}
1618

19+
init = () => {
20+
// Start indexing workspace files
21+
if (this.app.extConfig.endpoint_embeddings.trim() != "") {
22+
setTimeout(() => {
23+
this.app.chatContext.indexWorkspaceFiles().catch(error => {
24+
console.error('Failed to index workspace files:', error);
25+
});
26+
}, 0);
27+
}
28+
}
29+
30+
setOnSaveDeleteFileForDb = (context: vscode.ExtensionContext) => {
31+
const saveListener = vscode.workspace.onDidSaveTextDocument(async (document) => {
32+
try {
33+
if (!this.app.chatContext.isImageOrVideoFile(document.uri.toString())){
34+
// Update after a delay and only if the file is not changed in the meantime to avoid too often updates
35+
let updateTime = Date.now()
36+
let fileProperties = this.app.chatContext.getFileProperties(document.uri.toString())
37+
if (fileProperties) fileProperties.updated = updateTime;
38+
setTimeout(async () => {
39+
if (fileProperties && fileProperties.updated > updateTime ) {
40+
return;
41+
}
42+
this.app.chatContext.addDocument(document.uri.toString(), document.getText());
43+
}, 5000);
44+
}
45+
} catch (error) {
46+
console.error('Failed to add document to RAG:', error);
47+
}
48+
});
49+
context.subscriptions.push(saveListener);
50+
51+
// Add file delete listener for vector RAG
52+
const deleteListener = vscode.workspace.onDidDeleteFiles(async (event) => {
53+
for (const file of event.files) {
54+
try {
55+
await this.app.chatContext.removeDocument(file.toString());
56+
} catch (error) {
57+
console.error('Failed to remove document from RAG:', error);
58+
}
59+
}
60+
});
61+
context.subscriptions.push(deleteListener);
62+
}
63+
1764
setOnChangeConfiguration = (context: vscode.ExtensionContext) => {
1865
let configurationChangeDisp = vscode.workspace.onDidChangeConfiguration((event) => {
1966
const config = vscode.workspace.getConfiguration("llama-vscode");
2067
this.app.extConfig.updateOnEvent(event, config);
68+
if (this.app.extConfig.isRagConfigChanged(event)) this.init()
2169
vscode.window.showInformationMessage(this.app.extConfig.getUiText(`llama-vscode extension is updated.`)??"");
2270
});
2371
context.subscriptions.push(configurationChangeDisp);
@@ -31,7 +79,7 @@ export class Architect {
3179
this.app.extraContext.pickChunkAroundCursor(previousEditor.selection.active.line, previousEditor.document);
3280
}, 0);
3381
}
34-
82+
3583
if (editor) {
3684
// Editor is now active in the UI, pick a chunk
3785
let activeDocument = editor.document;
@@ -100,6 +148,17 @@ export class Architect {
100148
context.subscriptions.push(onSaveDocDisposable);
101149
}
102150

151+
setOnChangeWorkspaceFolders = (context: vscode.ExtensionContext) => {
152+
// Listen for new workspace folders being added
153+
context.subscriptions.push(
154+
vscode.workspace.onDidChangeWorkspaceFolders(event => {
155+
event.added.forEach(folder => {
156+
this.init();
157+
});
158+
})
159+
);
160+
}
161+
103162
registerCommandManualCompletion = (context: vscode.ExtensionContext) => {
104163
const triggerManualCompletionDisposable = vscode.commands.registerCommand('extension.triggerInlineCompletion', async () => {
105164
// Manual triggering of the completion with a shortcut
@@ -147,7 +206,18 @@ export class Architect {
147206
if (this.app.lruResultCache.size() > 0){
148207
completionCache = Array.from(this.app.lruResultCache.getMap().entries()).reduce((accumulator, [key, value]) => accumulator + "Key: " + key + "\nCompletion:\n" + value + "\n\n" , "");
149208
}
150-
vscode.env.clipboard.writeText("Events:\n" + eventLogsCombined + "\n\n------------------------------\n" + "Extra context: \n" + extraContext + "\n\n------------------------------\nCompletion cache: \n" + completionCache)
209+
let firstChunks = ""
210+
if (this.app.chatContext.entries.size > 0){
211+
firstChunks = Array.from(this.app.chatContext.entries.entries()).slice(0,5).reduce((accumulator, [key, value]) => accumulator + "ID: " + key + "\nFile:\n" + value.uri +
212+
"\nfirst line:\n" + value.firstLine +
213+
"\nlast line:\n" + value.lastLine +
214+
"\nChunk:\n" + value.content + "\n\n" , "");
215+
}
216+
vscode.env.clipboard.writeText("Events:\n" + eventLogsCombined +
217+
"\n\n------------------------------\n" +
218+
"Extra context: \n" + extraContext +
219+
"\n\n------------------------------\nCompletion cache: \n" + completionCache +
220+
"\n\n------------------------------\nChunks: \n" + firstChunks)
151221
});
152222
context.subscriptions.push(triggerCopyChunksDisposable);
153223
}

0 commit comments

Comments
 (0)