Skip to content

Commit cdc7348

Browse files
committed
more logits_all params restored
1 parent b02f6df commit cdc7348

File tree

3 files changed

+5
-1
lines changed

3 files changed

+5
-1
lines changed

llama_cpp/llama.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,9 @@ def free_lora_adapter():
462462

463463
self.n_tokens = 0
464464
self.input_ids: npt.NDArray[np.intc] = np.ndarray((n_ctx,), dtype=np.intc)
465-
self.scores: npt.NDArray[np.single] = np.ndarray((n_batch, self._n_vocab), dtype=np.single)
465+
self.scores: npt.NDArray[np.single] = np.ndarray(
466+
(n_ctx if logits_all == True else n_batch, self._n_vocab), dtype=np.single
467+
)
466468

467469
self._mirostat_mu = ctypes.c_float(
468470
2.0 * 5.0

llama_cpp/server/model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
276276
yarn_orig_ctx=settings.yarn_orig_ctx,
277277
defrag_thold=settings.defrag_thold,
278278
mul_mat_q=settings.mul_mat_q,
279+
logits_all=settings.logits_all,
279280
embedding=settings.embedding,
280281
offload_kqv=settings.offload_kqv,
281282
flash_attn=settings.flash_attn,

llama_cpp/server/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class ModelSettings(BaseSettings):
9999
mul_mat_q: bool = Field(
100100
default=True, description="if true, use experimental mul_mat_q kernels"
101101
)
102+
logits_all: bool = Field(default=True, description="Whether to return logits.")
102103
embedding: bool = Field(default=False, description="Whether to use embeddings.")
103104
offload_kqv: bool = Field(
104105
default=True, description="Whether to offload kqv to the GPU."

0 commit comments

Comments
 (0)