added more perf stats

LostRuins · LostRuins · commit 65ff0418278c · 2025-06-21T12:12:28.000+08:00
diff --git a/expose.cpp b/expose.cpp
@@ -278,6 +278,9 @@ extern "C"
     int get_last_token_count() {
         return last_token_count;
     }
+    int get_last_input_count() {
+        return last_input_count;
+    }
     int get_last_seed()
     {
         return last_seed;
diff --git a/expose.h b/expose.h
@@ -286,6 +286,7 @@ extern bool generation_finished;
 extern float last_eval_time;
 extern float last_process_time;
 extern int last_token_count;
+extern int last_input_count;
 extern int last_seed;
 extern int total_gens;
 extern int total_img_gens;
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -59,6 +59,7 @@ bool generation_finished;
 float last_process_time = 0;
 float last_eval_time = 0;
 int last_token_count = 0;
+int last_input_count = 0;
 int last_seed = -1;
 int total_gens = 0;
 int last_draft_success = 0;
@@ -1596,7 +1597,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
     for (auto reject: llama_grammar_reject_candidates(grammar->rules, grammar->stacks, candidates_grammar)) {
         rejects[reject.index] = true;
     }
-    
+
     auto first = candidates->data;
     auto last  = first + candidates->size;
     last = std::remove_if(first, last,
@@ -4318,6 +4319,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
     last_eval_time = pt2;
     last_process_time = pt1;
     last_token_count = realnpredict;
+    last_input_count = (finaltokcount<0?0:finaltokcount);
     last_seed = kcpp_data->seed;
     last_draft_failed = draft_failures;
     last_draft_success = draft_successes;
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -525,6 +525,7 @@ def init_library():
     handle.get_last_eval_time.restype = ctypes.c_float
     handle.get_last_process_time.restype = ctypes.c_float
     handle.get_last_token_count.restype = ctypes.c_int
+    handle.get_last_input_count.restype = ctypes.c_int
     handle.get_last_seed.restype = ctypes.c_int
     handle.get_last_draft_success.restype = ctypes.c_int
     handle.get_last_draft_failed.restype = ctypes.c_int
@@ -3017,6 +3018,7 @@ def do_GET(self):
             lastp = handle.get_last_process_time()
             laste = handle.get_last_eval_time()
             lastc = handle.get_last_token_count()
+            lastic = handle.get_last_input_count()
             totalgens = handle.get_total_gens()
             totalimggens = handle.get_total_img_gens()
             totalttsgens = handle.get_total_tts_gens()
@@ -3025,10 +3027,39 @@ def do_GET(self):
             lastseed = handle.get_last_seed()
             lastdraftsuccess = handle.get_last_draft_success()
             lastdraftfailed = handle.get_last_draft_failed()
+            t_pp = float(lastp)*float(lastic)*0.001
+            t_gen = float(laste)*float(lastc)*0.001
+            s_pp = float(lastic)/t_pp if t_pp>0 else 0
+            s_gen = float(lastc)/t_gen if t_gen>0 else 0
             uptime = time.time() - start_time
             idletime = time.time() - last_req_time
             is_quiet = True if (args.quiet and args.debugmode != 1) else False
-            response_body = (json.dumps({"last_process":lastp,"last_eval":laste,"last_token_count":lastc, "last_seed":lastseed, "last_draft_success":lastdraftsuccess, "last_draft_failed":lastdraftfailed, "total_gens":totalgens, "stop_reason":stopreason, "total_img_gens":totalimggens, "total_tts_gens":totalttsgens, "total_transcribe_gens":totaltranscribegens, "queue":requestsinqueue, "idle":(0 if modelbusy.locked() else 1), "hordeexitcounter":exitcounter, "uptime":uptime, "idletime":idletime, "quiet":is_quiet}).encode())
+            response_body = json.dumps(
+                {
+                    "last_process": lastp,
+                    "last_eval": laste,
+                    "last_token_count": lastc,
+                    "last_input_count": lastic,
+                    "last_process_time": t_pp,
+                    "last_eval_time": t_gen,
+                    "last_process_speed": s_pp,
+                    "last_eval_speed": s_gen,
+                    "last_seed": lastseed,
+                    "last_draft_success": lastdraftsuccess,
+                    "last_draft_failed": lastdraftfailed,
+                    "total_gens": totalgens,
+                    "stop_reason": stopreason,
+                    "total_img_gens": totalimggens,
+                    "total_tts_gens": totalttsgens,
+                    "total_transcribe_gens": totaltranscribegens,
+                    "queue": requestsinqueue,
+                    "idle": (0 if modelbusy.locked() else 1),
+                    "hordeexitcounter": exitcounter,
+                    "uptime": uptime,
+                    "idletime": idletime,
+                    "quiet": is_quiet,
+                }
+            ).encode()
 
         elif self.path.endswith('/api/extra/generate/check'):
             if not self.secure_endpoint():

Original file line number	Diff line number	Diff line change
`@@ -278,6 +278,9 @@ extern "C"`
`278`	`278`	`int get_last_token_count() {`
`279`	`279`	`return last_token_count;`
`280`	`280`	`}`
	`281`	`+ int get_last_input_count() {`
	`282`	`+ return last_input_count;`
	`283`	`+ }`
`281`	`284`	`int get_last_seed()`
`282`	`285`	`{`
`283`	`286`	`return last_seed;`