Skip to content

Commit 0b121a7

Browse files
committed
Format
1 parent b43917c commit 0b121a7

File tree

2 files changed

+23
-13
lines changed

2 files changed

+23
-13
lines changed

llama_cpp/llama_types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,11 @@ class ChatCompletion(TypedDict):
8787
choices: List[ChatCompletionChoice]
8888
usage: CompletionUsage
8989

90+
9091
class ChatCompletionChunkDeltaEmpty(TypedDict):
9192
pass
9293

94+
9395
class ChatCompletionChunkDelta(TypedDict):
9496
role: NotRequired[Literal["assistant"]]
9597
content: NotRequired[str]

llama_cpp/server/app.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@ class Settings(BaseSettings):
3838
default=None,
3939
description="Split layers across multiple GPUs in proportion.",
4040
)
41-
rope_freq_base: float = Field(default=10000, ge=1, description="RoPE base frequency")
42-
rope_freq_scale: float = Field(default=1.0, description="RoPE frequency scaling factor")
43-
seed: int = Field(
44-
default=1337, description="Random seed. -1 for random."
41+
rope_freq_base: float = Field(
42+
default=10000, ge=1, description="RoPE base frequency"
4543
)
44+
rope_freq_scale: float = Field(
45+
default=1.0, description="RoPE frequency scaling factor"
46+
)
47+
seed: int = Field(default=1337, description="Random seed. -1 for random.")
4648
n_batch: int = Field(
4749
default=512, ge=1, description="The batch size to use per eval."
4850
)
@@ -186,7 +188,9 @@ def get_settings():
186188
yield settings
187189

188190

189-
model_field = Field(description="The model to use for generating completions.", default=None)
191+
model_field = Field(
192+
description="The model to use for generating completions.", default=None
193+
)
190194

191195
max_tokens_field = Field(
192196
default=16, ge=1, le=2048, description="The maximum number of tokens to generate."
@@ -373,9 +377,11 @@ async def create_completion(
373377
kwargs = body.model_dump(exclude=exclude)
374378

375379
if body.logit_bias is not None:
376-
kwargs['logits_processor'] = llama_cpp.LogitsProcessorList([
377-
make_logit_bias_processor(llama, body.logit_bias, body.logit_bias_type),
378-
])
380+
kwargs["logits_processor"] = llama_cpp.LogitsProcessorList(
381+
[
382+
make_logit_bias_processor(llama, body.logit_bias, body.logit_bias_type),
383+
]
384+
)
379385

380386
if body.stream:
381387
send_chan, recv_chan = anyio.create_memory_object_stream(10)
@@ -402,7 +408,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
402408

403409
return EventSourceResponse(
404410
recv_chan, data_sender_callable=partial(event_publisher, send_chan)
405-
) # type: ignore
411+
) # type: ignore
406412
else:
407413
completion: llama_cpp.Completion = await run_in_threadpool(llama, **kwargs) # type: ignore
408414
return completion
@@ -512,9 +518,11 @@ async def create_chat_completion(
512518
kwargs = body.model_dump(exclude=exclude)
513519

514520
if body.logit_bias is not None:
515-
kwargs['logits_processor'] = llama_cpp.LogitsProcessorList([
516-
make_logit_bias_processor(llama, body.logit_bias, body.logit_bias_type),
517-
])
521+
kwargs["logits_processor"] = llama_cpp.LogitsProcessorList(
522+
[
523+
make_logit_bias_processor(llama, body.logit_bias, body.logit_bias_type),
524+
]
525+
)
518526

519527
if body.stream:
520528
send_chan, recv_chan = anyio.create_memory_object_stream(10)
@@ -542,7 +550,7 @@ async def event_publisher(inner_send_chan: MemoryObjectSendStream):
542550
return EventSourceResponse(
543551
recv_chan,
544552
data_sender_callable=partial(event_publisher, send_chan),
545-
) # type: ignore
553+
) # type: ignore
546554
else:
547555
completion: llama_cpp.ChatCompletion = await run_in_threadpool(
548556
llama.create_chat_completion, **kwargs # type: ignore

0 commit comments

Comments
 (0)