@@ -38,11 +38,13 @@ class Settings(BaseSettings):
3838 default = None ,
3939 description = "Split layers across multiple GPUs in proportion." ,
4040 )
41- rope_freq_base : float = Field (default = 10000 , ge = 1 , description = "RoPE base frequency" )
42- rope_freq_scale : float = Field (default = 1.0 , description = "RoPE frequency scaling factor" )
43- seed : int = Field (
44- default = 1337 , description = "Random seed. -1 for random."
41+ rope_freq_base : float = Field (
42+ default = 10000 , ge = 1 , description = "RoPE base frequency"
4543 )
44+ rope_freq_scale : float = Field (
45+ default = 1.0 , description = "RoPE frequency scaling factor"
46+ )
47+ seed : int = Field (default = 1337 , description = "Random seed. -1 for random." )
4648 n_batch : int = Field (
4749 default = 512 , ge = 1 , description = "The batch size to use per eval."
4850 )
@@ -186,7 +188,9 @@ def get_settings():
186188 yield settings
187189
188190
189- model_field = Field (description = "The model to use for generating completions." , default = None )
191+ model_field = Field (
192+ description = "The model to use for generating completions." , default = None
193+ )
190194
191195max_tokens_field = Field (
192196 default = 16 , ge = 1 , le = 2048 , description = "The maximum number of tokens to generate."
@@ -373,9 +377,11 @@ async def create_completion(
373377 kwargs = body .model_dump (exclude = exclude )
374378
375379 if body .logit_bias is not None :
376- kwargs ['logits_processor' ] = llama_cpp .LogitsProcessorList ([
377- make_logit_bias_processor (llama , body .logit_bias , body .logit_bias_type ),
378- ])
380+ kwargs ["logits_processor" ] = llama_cpp .LogitsProcessorList (
381+ [
382+ make_logit_bias_processor (llama , body .logit_bias , body .logit_bias_type ),
383+ ]
384+ )
379385
380386 if body .stream :
381387 send_chan , recv_chan = anyio .create_memory_object_stream (10 )
@@ -402,7 +408,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
402408
403409 return EventSourceResponse (
404410 recv_chan , data_sender_callable = partial (event_publisher , send_chan )
405- ) # type: ignore
411+ ) # type: ignore
406412 else :
407413 completion : llama_cpp .Completion = await run_in_threadpool (llama , ** kwargs ) # type: ignore
408414 return completion
@@ -512,9 +518,11 @@ async def create_chat_completion(
512518 kwargs = body .model_dump (exclude = exclude )
513519
514520 if body .logit_bias is not None :
515- kwargs ['logits_processor' ] = llama_cpp .LogitsProcessorList ([
516- make_logit_bias_processor (llama , body .logit_bias , body .logit_bias_type ),
517- ])
521+ kwargs ["logits_processor" ] = llama_cpp .LogitsProcessorList (
522+ [
523+ make_logit_bias_processor (llama , body .logit_bias , body .logit_bias_type ),
524+ ]
525+ )
518526
519527 if body .stream :
520528 send_chan , recv_chan = anyio .create_memory_object_stream (10 )
@@ -542,7 +550,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
542550 return EventSourceResponse (
543551 recv_chan ,
544552 data_sender_callable = partial (event_publisher , send_chan ),
545- ) # type: ignore
553+ ) # type: ignore
546554 else :
547555 completion : llama_cpp .ChatCompletion = await run_in_threadpool (
548556 llama .create_chat_completion , ** kwargs # type: ignore
0 commit comments