@@ -186,13 +186,19 @@ export async function * ollamaDeepseekComplete(prompt: string) : AsyncGenerator<
186186
187187export async function * devchatComplete ( prompt : string ) : AsyncGenerator < CodeCompletionChunk > {
188188 const devchatEndpoint = DevChatConfig . getInstance ( ) . get ( "providers.devchat.api_base" ) ;
189- const completionApiBase = devchatEndpoint + "/completions" ;
189+ const llmApiBase = DevChatConfig . getInstance ( ) . get ( "complete_api_base" ) ;
190+ let completionApiBase = devchatEndpoint + "/completions" ;
191+ if ( llmApiBase ) {
192+ completionApiBase = llmApiBase + "/completions" ;
193+ }
190194
191195 let model = DevChatConfig . getInstance ( ) . get ( "complete_model" ) ;
192196 if ( ! model ) {
193197 model = "ollama/starcoder2:15b" ;
194198 }
195199
200+ const startTimeLLM = process . hrtime ( ) ;
201+
196202 const headers = {
197203 'Content-Type' : 'application/json'
198204 } ;
@@ -217,7 +223,19 @@ export async function * devchatComplete(prompt: string) : AsyncGenerator<CodeCom
217223 const stream = response . body as any ;
218224 const decoder = new TextDecoder ( "utf-8" ) ;
219225
226+ const endTimeLLM = process . hrtime ( startTimeLLM ) ;
227+ const durationLLM = endTimeLLM [ 0 ] + endTimeLLM [ 1 ] / 1e9 ;
228+ logger . channel ( ) ?. debug ( `LLM api post took ${ durationLLM } seconds` ) ;
229+
230+ let hasFirstLine = false ;
231+ let hasFirstChunk = false ;
220232 for await ( const chunk of stream ) {
233+ if ( ! hasFirstChunk ) {
234+ hasFirstChunk = true ;
235+ const endTimeFirstChunk = process . hrtime ( startTimeLLM ) ;
236+ const durationFirstChunk = endTimeFirstChunk [ 0 ] + endTimeFirstChunk [ 1 ] / 1e9 ;
237+ logger . channel ( ) ?. debug ( `LLM first chunk took ${ durationFirstChunk } seconds` ) ;
238+ }
221239 const chunkDataText = decoder . decode ( chunk ) . trim ( ) ;
222240 // split chunkText by "data: ", for example:
223241 // data: 123 data: 456 will split to ["", "data: 123 ", "data: 456"]
@@ -245,6 +263,12 @@ export async function * devchatComplete(prompt: string) : AsyncGenerator<CodeCom
245263
246264 try {
247265 const data = JSON . parse ( chunkText . substring ( 5 ) . trim ( ) ) ;
266+ if ( ! hasFirstLine && data . choices [ 0 ] . text . indexOf ( "\n" ) !== - 1 ) {
267+ hasFirstLine = true ;
268+ const endTimeLine = process . hrtime ( startTimeLLM ) ;
269+ const durationLine = endTimeLine [ 0 ] + endTimeLine [ 1 ] / 1e9 ;
270+ logger . channel ( ) ?. debug ( `LLM first line took ${ durationLine } seconds` ) ;
271+ }
248272 yield {
249273 text : data . choices [ 0 ] . text ,
250274 id : data . id
0 commit comments