Improve regex matching whilst still validating allowed tensors

EAddario · EAddario · commit 3e031bc7047d · 2025-04-17T09:58:09.000+01:00
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
@@ -261,7 +261,6 @@ static bool parse_tensor_type(const char * data, std::vector<tensor_quantization
         printf("\n%s: missing tensor name\n\n", __func__);
         return false;
     }
-
     if (const size_t qt_len = strlen(sep); qt_len == 1) {
         printf("\n%s: missing quantization type\n\n", __func__);
         return false;
@@ -270,37 +269,15 @@ static bool parse_tensor_type(const char * data, std::vector<tensor_quantization
     std::string tn(data, tn_len);
     std::transform(tn.begin(), tn.end(), tn.begin(), tolower);
     sep++;
-    const std::string qt(sep);
-
-    bool found = false;
-    for (const auto & allowed : ALLOWED_TENSOR_TYPE) {
-        std::string tensor;
-        tensor = tn.rfind('.') != std::string::npos ? tn.substr(tn.rfind('.') + 1) : tn;
-        // handle special case of cls.output
-        std::string cls_output = "cls.output";
-        if (tn.find(cls_output) != std::string::npos) {
-            tensor = "cls.output";
-        }
-        // check if an allowed tensor exists and it's at the end of the kv string
-        if (tensor == allowed) {
-            found = true;
-            break;
-        }
-    }
-    if (!found) {
-        printf("\n%s: invalid tensor name '%s'\n\n", __func__, tn.c_str());
-        return false;
-    }
-
-    if (parse_ggml_type(qt.c_str()) == GGML_TYPE_COUNT) {
-        printf("\n%s: invalid quantization type '%s'\n\n", __func__, qt.c_str());
-        return false;
-    }
-
     tensor_quantization tqz;
     tqz.name = tn;
-    tqz.quant = parse_ggml_type(qt.c_str());
+    tqz.quant = parse_ggml_type(sep);
     tensor_type.emplace_back(std::move(tqz));
+    if (tqz.quant == GGML_TYPE_COUNT) {
+        printf("\n%s: invalid quantization type '%s'\n\n", __func__, sep);
+        return false;
+    }
+
     return true;
 }
 
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
@@ -790,17 +790,25 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
                 // unless the user specifies a type
                 if (params->tensor_types) {
                     const std::vector<tensor_quantization> & tensor_types = *static_cast<const std::vector<tensor_quantization> *>(params->tensor_types);
+                    const std::string tensor_name(tensor->name);
                     for (const auto & [tname, qtype] : tensor_types) {
-                        if (std::regex pattern(tname); std::regex_search(tensor->name, pattern)) {
-                            if (qtype != new_type) {
-                                LLAMA_LOG_DEBUG("(overriding %s -> %s), ", ggml_type_name(new_type), ggml_type_name(qtype));
+                        if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) {
+                            for (const auto & allowed : ALLOWED_TENSOR_TYPE) {
+                                if (tensor_name.find(allowed) != std::string::npos) {
+                                    if  (qtype != new_type) {
+                                        LLAMA_LOG_DEBUG("(overriding %s), ", ggml_type_name(new_type));
+                                        new_type = qtype;
+                                        break;
+                                    }
+                                }
                             }
-                            new_type = qtype;
-                            break;
+                            goto loop_exit; // if two or more types are specified for the tensor, first match wins
                         }
                     }
                 }
+                loop_exit:;
             }
+
             if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {
                 new_type = params->token_embedding_type;
             }

Original file line number	Diff line number	Diff line change
`@@ -790,17 +790,25 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::`
`790`	`790`	`// unless the user specifies a type`
`791`	`791`	`if (params->tensor_types) {`
`792`	`792`	`const std::vector<tensor_quantization> & tensor_types = static_cast<const std::vector<tensor_quantization> >(params->tensor_types);`
	`793`	`+ const std::string tensor_name(tensor->name);`
`793`	`794`	`for (const auto & [tname, qtype] : tensor_types) {`
`794`		`- if (std::regex pattern(tname); std::regex_search(tensor->name, pattern)) {`
`795`		`- if (qtype != new_type) {`
`796`		`- LLAMA_LOG_DEBUG("(overriding %s -> %s), ", ggml_type_name(new_type), ggml_type_name(qtype));`
	`795`	`+ if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) {`
	`796`	`+ for (const auto & allowed : ALLOWED_TENSOR_TYPE) {`
	`797`	`+ if (tensor_name.find(allowed) != std::string::npos) {`
	`798`	`+ if (qtype != new_type) {`
	`799`	`+ LLAMA_LOG_DEBUG("(overriding %s), ", ggml_type_name(new_type));`
	`800`	`+ new_type = qtype;`
	`801`	`+ break;`
	`802`	`+ }`
	`803`	`+ }`
`797`	`804`	`}`
`798`		`- new_type = qtype;`
`799`		`- break;`
	`805`	`+ goto loop_exit; // if two or more types are specified for the tensor, first match wins`
`800`	`806`	`}`
`801`	`807`	`}`
`802`	`808`	`}`
	`809`	`+ loop_exit:;`
`803`	`810`	`}`
	`811`	`+`
`804`	`812`	`if (params->token_embedding_type < GGML_TYPE_COUNT && strcmp(tensor->name, "token_embd.weight") == 0) {`
`805`	`813`	`new_type = params->token_embedding_type;`
`806`	`814`	`}`