Remove debug comments

mitmul · mitmul · commit 9f01a13b9ea3 · 2025-07-03T14:44:28.000+09:00
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
@@ -244,9 +244,6 @@ void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor *
 
 void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
     if (tensor->data == NULL) {
-        fprintf(stderr, "ERROR: Tensor '%s' data is NULL - cannot read tensor\n",
-                tensor->name ? tensor->name : "unnamed");
-
         // For output tensors that may not have been properly allocated
         if (tensor->flags & GGML_TENSOR_FLAG_OUTPUT) {
             fprintf(stderr, "       Output tensor detected - this may indicate scheduling issue\n");
@@ -280,16 +277,6 @@ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, siz
             //         tensor->name ? tensor->name : "unnamed");
             return;
         }
-
-        // Enhanced error message with tensor information
-        fprintf(stderr, "ERROR: Tensor buffer not set for tensor '%s' (op: %s, type: %s)\n",
-                tensor->name ? tensor->name : "unnamed",
-                ggml_op_name(tensor->op),
-                ggml_type_name(tensor->type));
-        if (tensor->view_src) {
-            fprintf(stderr, "       This is a view tensor with view_src: '%s'\n",
-                    tensor->view_src->name ? tensor->view_src->name : "unnamed");
-        }
     }
     GGML_ASSERT(buf != NULL && "tensor buffer not set");
     GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
@@ -1679,12 +1666,6 @@ void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct gg
 ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
     int backend_index = tensor_backend_id(node);
     if (backend_index == -1) {
-        // Enhanced debugging for unassigned tensors
-        fprintf(stderr, "ERROR: Tensor '%s' (op: %s, flags: 0x%x) has no backend assigned (backend_id = -1)\n",
-                node->name ? node->name : "unnamed",
-                ggml_op_name(node->op),
-                node->flags);
-
         // Try to assign to CPU backend as fallback for output tensors
         if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
             fprintf(stderr, "       Attempting to assign output tensor to CPU backend\n");
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
@@ -853,11 +853,6 @@ ggml_tensor * llm_graph_context::build_inp_pos() const {
 
     cur = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, (int64_t)n_tokens*hparams.n_pos_per_embd());
     ggml_set_input(cur);
-    
-    // Ensure input tensor has a name for debugging
-    if (!cur->name || strlen(cur->name) == 0) {
-        ggml_set_name(cur, "inp_pos");
-    }
 
     res->add_input(std::move(inp));
 
@@ -956,11 +951,6 @@ ggml_tensor * llm_graph_context::build_inp_pos_bucket_enc() const {
 
     cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_tokens);
     ggml_set_input(cur);
-    
-    // Ensure input tensor has a name for debugging
-    if (!cur->name || strlen(cur->name) == 0) {
-        ggml_set_name(cur, "inp_pos_bucket_enc");
-    }
 
     res->add_input(std::move(inp));
 
@@ -978,11 +968,6 @@ ggml_tensor * llm_graph_context::build_inp_pos_bucket_dec() const {
 
     cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_kv, n_tokens);
     ggml_set_input(cur);
-    
-    // Ensure input tensor has a name for debugging
-    if (!cur->name || strlen(cur->name) == 0) {
-        ggml_set_name(cur, "inp_pos_bucket_dec");
-    }
 
     res->add_input(std::move(inp));
 
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -8219,8 +8219,8 @@ struct llm_build_plamo2 : public llm_graph_context {
         ggml_tensor * inpL;
 
         // key variables used in PLaMo-2 attention
-        const int64_t n_embd_head = hparams.n_embd_head_v;
-        ggml_tensor * inp_pos = build_inp_pos();
+        // const int64_t n_embd_head = hparams.n_embd_head_v;
+        // ggml_tensor * inp_pos = build_inp_pos();
 
         // {n_embd, n_tokens}
         inpL = build_inp_embd(model.tok_embd);
@@ -8272,7 +8272,7 @@ struct llm_build_plamo2 : public llm_graph_context {
         ggml_tensor * inpSA = inpL;
 
         // attention layer specific variables
-        const int64_t n_embd_head = hparams.n_embd_head_v;
+        // const int64_t n_embd_head = hparams.n_embd_head_v;
         ggml_tensor * inp_pos = build_inp_pos();
 
         // norm
@@ -8282,9 +8282,9 @@ struct llm_build_plamo2 : public llm_graph_context {
         // self-attention
         {
             // For PLaMo-2 hybrid architecture, get the correct attention context
-            const auto * mctx_hybrid = static_cast<const llama_memory_hybrid_context *>(mctx);
-            const auto * unified_ctx = mctx_hybrid->get_attn();
-            auto inp = std::make_unique<llm_graph_input_attn_kv_unified>(hparams, cparams, unified_ctx);
+            // const auto * mctx_hybrid = static_cast<const llama_memory_hybrid_context *>(mctx);
+            // const auto * unified_ctx = mctx_hybrid->get_attn();
+            auto inp = std::make_unique<llm_graph_input_attn_kv_unified>(hparams, cparams, attn_ctx);
             auto * inp_attn = inp.release();
 
             // PLaMo-2 uses combined QKV tensor
@@ -8341,7 +8341,7 @@ struct llm_build_plamo2 : public llm_graph_context {
 
             // PLaMo-2 GQA: expand K and V heads to match Q heads (equivalent to _expand_kv)
             if (n_head_kv < n_head) {
-                const int n_group = n_head / n_head_kv;
+                // const int n_group = n_head / n_head_kv;
 
                 // manually expand K and V tensors to repeat each head n_group times
                 // create expanded tensors with target dimensions