Skip to content

Commit 65ff041

Browse files
committed
added more perf stats
1 parent ea21a9d commit 65ff041

File tree

4 files changed

+39
-2
lines changed

4 files changed

+39
-2
lines changed

expose.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,9 @@ extern "C"
278278
int get_last_token_count() {
279279
return last_token_count;
280280
}
281+
int get_last_input_count() {
282+
return last_input_count;
283+
}
281284
int get_last_seed()
282285
{
283286
return last_seed;

expose.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ extern bool generation_finished;
286286
extern float last_eval_time;
287287
extern float last_process_time;
288288
extern int last_token_count;
289+
extern int last_input_count;
289290
extern int last_seed;
290291
extern int total_gens;
291292
extern int total_img_gens;

gpttype_adapter.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ bool generation_finished;
5959
float last_process_time = 0;
6060
float last_eval_time = 0;
6161
int last_token_count = 0;
62+
int last_input_count = 0;
6263
int last_seed = -1;
6364
int total_gens = 0;
6465
int last_draft_success = 0;
@@ -1596,7 +1597,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
15961597
for (auto reject: llama_grammar_reject_candidates(grammar->rules, grammar->stacks, candidates_grammar)) {
15971598
rejects[reject.index] = true;
15981599
}
1599-
1600+
16001601
auto first = candidates->data;
16011602
auto last = first + candidates->size;
16021603
last = std::remove_if(first, last,
@@ -4318,6 +4319,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
43184319
last_eval_time = pt2;
43194320
last_process_time = pt1;
43204321
last_token_count = realnpredict;
4322+
last_input_count = (finaltokcount<0?0:finaltokcount);
43214323
last_seed = kcpp_data->seed;
43224324
last_draft_failed = draft_failures;
43234325
last_draft_success = draft_successes;

koboldcpp.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ def init_library():
525525
handle.get_last_eval_time.restype = ctypes.c_float
526526
handle.get_last_process_time.restype = ctypes.c_float
527527
handle.get_last_token_count.restype = ctypes.c_int
528+
handle.get_last_input_count.restype = ctypes.c_int
528529
handle.get_last_seed.restype = ctypes.c_int
529530
handle.get_last_draft_success.restype = ctypes.c_int
530531
handle.get_last_draft_failed.restype = ctypes.c_int
@@ -3017,6 +3018,7 @@ def do_GET(self):
30173018
lastp = handle.get_last_process_time()
30183019
laste = handle.get_last_eval_time()
30193020
lastc = handle.get_last_token_count()
3021+
lastic = handle.get_last_input_count()
30203022
totalgens = handle.get_total_gens()
30213023
totalimggens = handle.get_total_img_gens()
30223024
totalttsgens = handle.get_total_tts_gens()
@@ -3025,10 +3027,39 @@ def do_GET(self):
30253027
lastseed = handle.get_last_seed()
30263028
lastdraftsuccess = handle.get_last_draft_success()
30273029
lastdraftfailed = handle.get_last_draft_failed()
3030+
t_pp = float(lastp)*float(lastic)*0.001
3031+
t_gen = float(laste)*float(lastc)*0.001
3032+
s_pp = float(lastic)/t_pp if t_pp>0 else 0
3033+
s_gen = float(lastc)/t_gen if t_gen>0 else 0
30283034
uptime = time.time() - start_time
30293035
idletime = time.time() - last_req_time
30303036
is_quiet = True if (args.quiet and args.debugmode != 1) else False
3031-
response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "last_seed":lastseed, "last_draft_success":lastdraftsuccess, "last_draft_failed":lastdraftfailed, "total_gens":totalgens, "stop_reason":stopreason, "total_img_gens":totalimggens, "total_tts_gens":totalttsgens, "total_transcribe_gens":totaltranscribegens, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1), "hordeexitcounter":exitcounter, "uptime":uptime, "idletime":idletime, "quiet":is_quiet}).encode())
3037+
response_body = json.dumps(
3038+
{
3039+
"last_process": lastp,
3040+
"last_eval": laste,
3041+
"last_token_count": lastc,
3042+
"last_input_count": lastic,
3043+
"last_process_time": t_pp,
3044+
"last_eval_time": t_gen,
3045+
"last_process_speed": s_pp,
3046+
"last_eval_speed": s_gen,
3047+
"last_seed": lastseed,
3048+
"last_draft_success": lastdraftsuccess,
3049+
"last_draft_failed": lastdraftfailed,
3050+
"total_gens": totalgens,
3051+
"stop_reason": stopreason,
3052+
"total_img_gens": totalimggens,
3053+
"total_tts_gens": totalttsgens,
3054+
"total_transcribe_gens": totaltranscribegens,
3055+
"queue": requestsinqueue,
3056+
"idle": (0 if modelbusy.locked() else 1),
3057+
"hordeexitcounter": exitcounter,
3058+
"uptime": uptime,
3059+
"idletime": idletime,
3060+
"quiet": is_quiet,
3061+
}
3062+
).encode()
30323063

30333064
elif self.path.endswith('/api/extra/generate/check'):
30343065
if not self.secure_endpoint():

0 commit comments

Comments
 (0)