Remove debug-associated and other misceallanous changes

pwilkin · pwilkin · commit 78981b8d0860 · 2025-10-23T20:07:02.000+02:00
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -1951,13 +1951,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.kv_unified = true;
         }
     ).set_env("LLAMA_ARG_KV_SPLIT"));
-    add_opt(common_arg(
-        {"--dump-cache"},
-        "dump cache statistics after each token generation",
-        [](common_params & params) {
-            params.dump_cache = true;
-        }
-    ).set_examples({LLAMA_EXAMPLE_MAIN}));
     add_opt(common_arg(
         {"--no-context-shift"},
         string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
diff --git a/common/common.h b/common/common.h
@@ -399,8 +399,6 @@ struct common_params {
     ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
     ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
 
-    bool dump_cache = false; // dump cache statistics after each token
-
     common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
 
     // multimodal models (see tools/mtmd)
diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp
@@ -154,7 +154,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
 
     if (!ggml_is_quantized(t->type)) {
         uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
-        ggml_print_tensor(data, t->type, t->ne, t->nb, 8);
+        ggml_print_tensor(data, t->type, t->ne, t->nb, 3);
     }
 
     return true;
diff --git a/examples/model-conversion/qwen3stories.sh b/examples/model-conversion/qwen3stories.sh
diff --git a/pyrightconfig.json b/pyrightconfig.json
@@ -6,7 +6,6 @@
   "reportDuplicateImport": "error",
   "reportDeprecated": "warning",
   "reportUnnecessaryTypeIgnoreComment": "information",
-  "reportAttributeAccessIssue": "warning",
   "disableBytesTypePromotions": false, // TODO: change once Python 3.12 is the minimum
   "executionEnvironments": [
     {
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1362,7 +1362,7 @@ void llama_context::output_reorder() {
 //
 
 uint32_t llama_context::graph_max_nodes() const {
-    return std::max<uint32_t>(16384, 512u*model.n_tensors());
+    return std::max<uint32_t>(8192, 128u*model.n_tensors());
 }
 
 llm_graph_result * llama_context::get_gf_res_reserve() const {

Original file line number	Diff line number	Diff line change
`@@ -154,7 +154,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {`
`154`	`154`
`155`	`155`	`if (!ggml_is_quantized(t->type)) {`
`156`	`156`	`uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();`
`157`		`- ggml_print_tensor(data, t->type, t->ne, t->nb, 8);`
	`157`	`+ ggml_print_tensor(data, t->type, t->ne, t->nb, 3);`
`158`	`158`	`}`
`159`	`159`
`160`	`160`	`return true;`
Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,6 @@`
`6`	`6`	`"reportDuplicateImport": "error",`
`7`	`7`	`"reportDeprecated": "warning",`
`8`	`8`	`"reportUnnecessaryTypeIgnoreComment": "information",`
`9`		`- "reportAttributeAccessIssue": "warning",`
`10`	`9`	`"disableBytesTypePromotions": false, // TODO: change once Python 3.12 is the minimum`
`11`	`10`	`"executionEnvironments": [`
`12`	`11`	`{`
Original file line number	Diff line number	Diff line change
`@@ -1362,7 +1362,7 @@ void llama_context::output_reorder() {`
`1362`	`1362`	`//`
`1363`	`1363`
`1364`	`1364`	`uint32_t llama_context::graph_max_nodes() const {`
`1365`		`- return std::max<uint32_t>(16384, 512u*model.n_tensors());`
	`1365`	`+ return std::max<uint32_t>(8192, 128u*model.n_tensors());`
`1366`	`1366`	`}`
`1367`	`1367`
`1368`	`1368`	`llm_graph_result * llama_context::get_gf_res_reserve() const {`