@@ -81,7 +81,21 @@ export class LlamaServer {
8181 } ;
8282 }
8383
84- private createRequestPayload ( inputPrefix : string , inputSuffix : string , chunks : any [ ] , prompt : string , nindent ?: number ) {
84+ private createRequestPayload ( noPredict : boolean , inputPrefix : string , inputSuffix : string , chunks : any [ ] , prompt : string , nindent ?: number ) {
85+ if ( noPredict ) {
86+ return {
87+ input_prefix : inputPrefix ,
88+ input_suffix : inputSuffix ,
89+ input_extra : chunks ,
90+ prompt,
91+ n_predict : 0 ,
92+ samplers : [ ] ,
93+ cache_prompt : true ,
94+ t_max_prompt_ms : this . extConfig . t_max_prompt_ms ,
95+ t_max_predict_ms : 1 ,
96+ } ;
97+ }
98+
8599 return {
86100 input_prefix : inputPrefix ,
87101 input_suffix : inputSuffix ,
@@ -95,7 +109,7 @@ export class LlamaServer {
95109 } ;
96110 }
97111
98- getLlamaCompletion = async (
112+ getFIMCompletion = async (
99113 inputPrefix : string ,
100114 inputSuffix : string ,
101115 prompt : string ,
@@ -111,14 +125,14 @@ export class LlamaServer {
111125 // else, default to llama.cpp
112126 const response = await axios . post < LlamaResponse > (
113127 `${ this . extConfig . endpoint } /infill` ,
114- this . createRequestPayload ( inputPrefix , inputSuffix , chunks , prompt , nindent ) ,
128+ this . createRequestPayload ( false , inputPrefix , inputSuffix , chunks , prompt , nindent ) ,
115129 this . extConfig . axiosRequestConfig
116130 ) ;
117131
118132 return response . status === STATUS_OK ? response . data : undefined ;
119133 } ;
120134
121- prepareLlamaForNextCompletion = ( chunks : any [ ] ) : void => {
135+ updateExtraContext = ( chunks : any [ ] ) : void => {
122136 // If the server is OpenAI compatible, use the OpenAI API to prepare for the next FIM
123137 if ( this . extConfig . use_openai_endpoint ) {
124138 // wtg 20250207 - per @igardev ... "This makes sense only if there is a server cache"
@@ -129,7 +143,7 @@ export class LlamaServer {
129143 // else, make a request to the API to prepare for the next FIM
130144 axios . post < LlamaResponse > (
131145 `${ this . extConfig . endpoint } /infill` ,
132- this . createRequestPayload ( "" , "" , chunks , "" , undefined ) ,
146+ this . createRequestPayload ( true , "" , "" , chunks , "" , undefined ) ,
133147 this . extConfig . axiosRequestConfig
134148 ) ;
135149 } ;
0 commit comments