Skip to content

Commit 4d851b0

Browse files
committed
Hook metrics into llamacpp
1 parent 268d4a0 commit 4d851b0

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

guidance/models/llama_cpp/_llama_cpp.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,12 @@ def get_logits(self, token_ids, forced_bytes, current_temp):
193193
batch.logits[n_tokens - 1] = True
194194

195195
ret = llama_cpp.llama_decode(self.model_obj.ctx, batch)
196+
self.metrics.engine_input_tokens += n_tokens
196197
if ret != 0:
197198
raise Exception(f"Call to llama_cpp.llama_decode returned {ret}.")
198199

200+
self.metrics.engine_output_tokens += 1
201+
199202
# get the logits
200203
logits = llama_cpp.llama_get_logits(self.model_obj.ctx)
201204
if llama_cpp.__version__ < "0.2.58":

0 commit comments

Comments
 (0)