@@ -92,7 +92,7 @@ async def _check_response(self, response: Response) -> Dict[str, Any]:
9292 async def _request (
9393 self ,
9494 method : str ,
95- json : Dict [str , Any ],
95+ json : Optional [ Dict [str , Any ] ],
9696 path : str ,
9797 stream : bool = False ,
9898 attempt : int = 1 ,
@@ -291,3 +291,74 @@ async def list_models(self) -> ModelList:
291291 return ModelList (** response )
292292
293293 raise MistralException ("No response received" )
294+
295+ async def completion (
296+ self ,
297+ model : str ,
298+ prompt : str ,
299+ suffix : Optional [str ] = None ,
300+ temperature : Optional [float ] = None ,
301+ max_tokens : Optional [int ] = None ,
302+ top_p : Optional [float ] = None ,
303+ random_seed : Optional [int ] = None ,
304+ stop : Optional [List [str ]] = None ,
305+ ) -> ChatCompletionResponse :
306+ """An asynchronous completion endpoint that returns a single response.
307+
308+ Args:
309+ model (str): model the name of the model to get completions with, e.g. codestral-latest
310+ prompt (str): the prompt to complete
311+ suffix (Optional[str]): the suffix to append to the prompt for fill-in-the-middle completion
312+ temperature (Optional[float], optional): temperature the temperature to use for sampling, e.g. 0.5.
313+ max_tokens (Optional[int], optional): the maximum number of tokens to generate, e.g. 100. Defaults to None.
314+ top_p (Optional[float], optional): the cumulative probability of tokens to generate, e.g. 0.9.
315+ Defaults to None.
316+ random_seed (Optional[int], optional): the random seed to use for sampling, e.g. 42. Defaults to None.
317+ stop (Optional[List[str]], optional): a list of tokens to stop generation at, e.g. ['/n/n']
318+ Returns:
319+ Dict[str, Any]: a response object containing the generated text.
320+ """
321+ request = self ._make_completion_request (
322+ prompt , model , suffix , temperature , max_tokens , top_p , random_seed , stop
323+ )
324+ single_response = self ._request ("post" , request , "v1/fim/completions" )
325+
326+ async for response in single_response :
327+ return ChatCompletionResponse (** response )
328+
329+ raise MistralException ("No response received" )
330+
331+ async def completion_stream (
332+ self ,
333+ model : str ,
334+ prompt : str ,
335+ suffix : Optional [str ] = None ,
336+ temperature : Optional [float ] = None ,
337+ max_tokens : Optional [int ] = None ,
338+ top_p : Optional [float ] = None ,
339+ random_seed : Optional [int ] = None ,
340+ stop : Optional [List [str ]] = None ,
341+ ) -> AsyncGenerator [ChatCompletionStreamResponse , None ]:
342+ """An asynchronous completion endpoint that returns a streaming response.
343+
344+ Args:
345+ model (str): model the name of the model to get completions with, e.g. codestral-latest
346+ prompt (str): the prompt to complete
347+ suffix (Optional[str]): the suffix to append to the prompt for fill-in-the-middle completion
348+ temperature (Optional[float], optional): temperature the temperature to use for sampling, e.g. 0.5.
349+ max_tokens (Optional[int], optional): the maximum number of tokens to generate, e.g. 100. Defaults to None.
350+ top_p (Optional[float], optional): the cumulative probability of tokens to generate, e.g. 0.9.
351+ Defaults to None.
352+ random_seed (Optional[int], optional): the random seed to use for sampling, e.g. 42. Defaults to None.
353+ stop (Optional[List[str]], optional): a list of tokens to stop generation at, e.g. ['/n/n']
354+
355+ Returns:
356+ Dict[str, Any]: a response object containing the generated text.
357+ """
358+ request = self ._make_completion_request (
359+ prompt , model , suffix , temperature , max_tokens , top_p , random_seed , stop , stream = True
360+ )
361+ async_response = self ._request ("post" , request , "v1/fim/completions" , stream = True )
362+
363+ async for json_response in async_response :
364+ yield ChatCompletionStreamResponse (** json_response )
0 commit comments