ggml-org
diff --git a/‎common/arg.cpp‎
Lines changed: 0 additions & 7 deletions b/‎common/arg.cpp‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎common/common.h‎
Lines changed: 0 additions & 2 deletions b/‎common/common.h‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎comp.sh‎
Lines changed: 0 additions & 8 deletions b/‎comp.sh‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎examples/eval-callback/eval-callback.cpp‎
Lines changed: 1 addition & 1 deletion b/‎examples/eval-callback/eval-callback.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/model-conversion/qwen3stories.sh‎
Lines changed: 0 additions & 3 deletions b/‎examples/model-conversion/qwen3stories.sh‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎ggml/include/ggml.h‎
Lines changed: 6 additions & 4 deletions b/‎ggml/include/ggml.h‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎ggml/src/ggml-cpu/ggml-cpu.c‎
Lines changed: 3 additions & 8 deletions b/‎ggml/src/ggml-cpu/ggml-cpu.c‎
Lines changed: 3 additions & 8 deletions
@@ -1951,13 +1951,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.kv_unified = true;
         }
     ).set_env("LLAMA_ARG_KV_SPLIT"));
-    add_opt(common_arg(
-        {"--dump-cache"},
-        "dump cache statistics after each token generation",
-        [](common_params & params) {
-            params.dump_cache = true;
-        }
-    ).set_examples({LLAMA_EXAMPLE_MAIN}));
     add_opt(common_arg(
         {"--no-context-shift"},
         string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
 
@@ -399,8 +399,6 @@ struct common_params {
     ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
     ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
 
-    bool dump_cache = false; // dump cache statistics after each token
-
     common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
 
     // multimodal models (see tools/mtmd)
 
@@ -154,7 +154,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
 
     if (!ggml_is_quantized(t->type)) {
         uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
-        ggml_print_tensor(data, t->type, t->ne, t->nb, 8);
+        ggml_print_tensor(data, t->type, t->ne, t->nb, 3);
     }
 
     return true;
 
@@ -245,8 +245,6 @@
 
 #define GGML_MROPE_SECTIONS   4
 
-#define GGML_DELTA_NET_CHUNK    64
-
 #define GGML_UNUSED(x) (void)(x)
 #ifdef __CUDACC__
 template<typename... Args>
@@ -545,8 +543,7 @@ extern "C" {
         GGML_OP_RWKV_WKV6,
         GGML_OP_GATED_LINEAR_ATTN,
         GGML_OP_RWKV_WKV7,
-        GGML_OP_DELTA_NET,
-        GGML_OP_DELTA_NET_RECURRENT,
+        GGML_OP_SOLVE_TRI,
 
         GGML_OP_UNARY,
 
@@ -2498,6 +2495,11 @@ extern "C" {
         bool                  use_qk_l2norm,
         float                 eps_norm);
 
+    GGML_API struct ggml_tensor * ggml_solve_tri(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * x);
+
     // custom operators
 
     typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
 
@@ -2011,13 +2011,9 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_rwkv_wkv7(params, tensor);
             } break;
-        case GGML_OP_DELTA_NET:
+        case GGML_OP_SOLVE_TRI:
             {
-                ggml_compute_forward_delta_net_f32(params, tensor);
-            } break;
-        case GGML_OP_DELTA_NET_RECURRENT:
-            {
-                ggml_compute_forward_delta_net_recurrent_f32(params, tensor);
+                ggml_compute_forward_solve_tri(params, tensor);
             } break;
         case GGML_OP_MAP_CUSTOM1:
             {
@@ -2180,6 +2176,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
                 n_tasks = 1;
             } break;
         case GGML_OP_COUNT_EQUAL:
+        case GGML_OP_SOLVE_TRI:
             {
                 n_tasks = n_threads;
             } break;
@@ -2304,8 +2301,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
         case GGML_OP_POOL_1D:
         case GGML_OP_POOL_2D:
         case GGML_OP_POOL_2D_BACK:
-        case GGML_OP_DELTA_NET_RECURRENT:
-        case GGML_OP_DELTA_NET:
             {
                 n_tasks = 1;
             } break;
Original file line number	Diff line number	Diff line change
`@@ -154,7 +154,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {`
`154`	`154`
`155`	`155`	`if (!ggml_is_quantized(t->type)) {`
`156`	`156`	`uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();`
`157`		`- ggml_print_tensor(data, t->type, t->ne, t->nb, 8);`
	`157`	`+ ggml_print_tensor(data, t->type, t->ne, t->nb, 3);`
`158`	`158`	`}`
`159`	`159`
`160`	`160`	`return true;`