imatrix: be able to specify the name of the output tensor

Nexesenex · Nexesenex · commit b39aa8e307f9 · 2024-08-15T02:36:22.000+02:00
picked from ik_llama.cpp, a llama_cpp fork maintained by Iwan Kawrakow
diff --git a/common/common.cpp b/common/common.cpp
@@ -1296,6 +1296,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         params.process_output = true;
         return true;
     }
+    if (arg == "--output-tensor-name") {
+        if (++i >= argc) {
+            invalid_param = true;
+            return true;
+        }
+        params.output_tensor_name = argv[i];
+        return true;
+    }
     if (arg == "--no-ppl") {
         params.compute_ppl = false;
         return true;
diff --git a/common/common.h b/common/common.h
@@ -246,6 +246,8 @@ struct gpt_params {
 
     // imatrix params
     std::string out_file = "imatrix.dat"; // save the resulting imatrix to this file
+	
+    std::string output_tensor_name = "output.weight"; // name of the output tensor
 
     int32_t n_out_freq  = 10; // output the imatrix every n_out_freq iterations
     int32_t n_save_freq =  0; // save the imatrix every n_save_freq iterations
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
@@ -83,7 +83,8 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
         if (t->op != GGML_OP_MUL_MAT) return false;
         // why are small batches ignored (<16 tokens)?
         if (src1->ne[1] < 16 || src1->type != GGML_TYPE_F32) return false;
-        if (!(wname.substr(0, 4) == "blk." || (m_params.process_output && wname == "output.weight"))) return false;
+        printf("wname = %s\n", wname.c_str());
+        if (!(wname.substr(0, 4) == "blk." || (m_params.process_output && wname == m_params.output_tensor_name))) return false;
         return true;
     }
 

Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,8 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *`
`83`	`83`	`if (t->op != GGML_OP_MUL_MAT) return false;`
`84`	`84`	`// why are small batches ignored (<16 tokens)?`
`85`	`85`	`if (src1->ne[1] < 16 \|\| src1->type != GGML_TYPE_F32) return false;`
`86`		`- if (!(wname.substr(0, 4) == "blk." \|\| (m_params.process_output && wname == "output.weight"))) return false;`
	`86`	`+ printf("wname = %s\n", wname.c_str());`
	`87`	`+ if (!(wname.substr(0, 4) == "blk." \|\| (m_params.process_output && wname == m_params.output_tensor_name))) return false;`
`87`	`88`	`return true;`
`88`	`89`	`}`
`89`	`90`