core : fix extra-context requests (#26)

ggerganov · web-flow · commit 92efd37d81c8 · 2025-02-08T16:58:14.000+02:00
diff --git a/src/architect.ts b/src/architect.ts
@@ -383,7 +383,7 @@ export class Architect {
                 }
                 this.showThinkingInfo();
 
-                data = await this.llamaServer.getLlamaCompletion(inputPrefix, inputSuffix, prompt, this.extraContext.chunks, nindent)
+                data = await this.llamaServer.getFIMCompletion(inputPrefix, inputSuffix, prompt, this.extraContext.chunks, nindent)
                 if (data != undefined) completion = data.content;
                 else completion = undefined
             }
@@ -551,7 +551,7 @@ export class Architect {
         let futureHashKey = this.lruResultCache.getHash(futureInputPrefix + "|" + futureInputSuffix + "|" + futurePrompt)
         let cached_completion = this.lruResultCache.get(futureHashKey)
         if (cached_completion != undefined) return;
-        let futureData = await this.llamaServer.getLlamaCompletion(futureInputPrefix, futureInputSuffix, futurePrompt, this.extraContext.chunks, prompt.length - prompt.trimStart().length);
+        let futureData = await this.llamaServer.getFIMCompletion(futureInputPrefix, futureInputSuffix, futurePrompt, this.extraContext.chunks, prompt.length - prompt.trimStart().length);
         let futureSuggestion = "";
         if (futureData != undefined && futureData.content != undefined && futureData.content.trim() != "") {
             futureSuggestion = futureData.content;
diff --git a/src/extra-context.ts b/src/extra-context.ts
@@ -35,7 +35,7 @@ export class ExtraContext {
             }
         }
 
-        this.llamaServer.prepareLlamaForNextCompletion(this.chunks)
+        this.llamaServer.updateExtraContext(this.chunks)
     };
 
     // Class field is used instead of a function to make "this" available
diff --git a/src/llama-server.ts b/src/llama-server.ts
@@ -81,7 +81,21 @@ export class LlamaServer {
         };
     }
 
-    private createRequestPayload(inputPrefix: string, inputSuffix: string, chunks: any[], prompt: string, nindent?: number) {
+    private createRequestPayload(noPredict: boolean, inputPrefix: string, inputSuffix: string, chunks: any[], prompt: string, nindent?: number) {
+        if (noPredict) {
+            return {
+                input_prefix: inputPrefix,
+                input_suffix: inputSuffix,
+                input_extra: chunks,
+                prompt,
+                n_predict: 0,
+                samplers: [],
+                cache_prompt: true,
+                t_max_prompt_ms: this.extConfig.t_max_prompt_ms,
+                t_max_predict_ms: 1,
+            };
+        }
+
         return {
             input_prefix: inputPrefix,
             input_suffix: inputSuffix,
@@ -95,7 +109,7 @@ export class LlamaServer {
         };
     }
 
-    getLlamaCompletion = async (
+    getFIMCompletion = async (
         inputPrefix: string,
         inputSuffix: string,
         prompt: string,
@@ -111,14 +125,14 @@ export class LlamaServer {
         // else, default to llama.cpp
         const response = await axios.post<LlamaResponse>(
             `${this.extConfig.endpoint}/infill`,
-            this.createRequestPayload(inputPrefix, inputSuffix, chunks, prompt, nindent),
+            this.createRequestPayload(false, inputPrefix, inputSuffix, chunks, prompt, nindent),
             this.extConfig.axiosRequestConfig
         );
 
         return response.status === STATUS_OK ? response.data : undefined;
     };
 
-    prepareLlamaForNextCompletion = (chunks: any[]): void => {
+    updateExtraContext = (chunks: any[]): void => {
         // If the server is OpenAI compatible, use the OpenAI API to prepare for the next FIM
         if (this.extConfig.use_openai_endpoint) {
             // wtg 20250207 - per @igardev ... "This makes sense only if there is a server cache"
@@ -129,7 +143,7 @@ export class LlamaServer {
         // else, make a request to the API to prepare for the next FIM
         axios.post<LlamaResponse>(
             `${this.extConfig.endpoint}/infill`,
-            this.createRequestPayload("", "", chunks, "", undefined),
+            this.createRequestPayload(true, "", "", chunks, "", undefined),
             this.extConfig.axiosRequestConfig
         );
     };

Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@ export class ExtraContext {`
`35`	`35`	`}`
`36`	`36`	`}`
`37`	`37`
`38`		`- this.llamaServer.prepareLlamaForNextCompletion(this.chunks)`
	`38`	`+ this.llamaServer.updateExtraContext(this.chunks)`
`39`	`39`	`};`
`40`	`40`
`41`	`41`	`// Class field is used instead of a function to make "this" available`