nicoboss
diff --git a/‎common/chat-parser.cpp‎
Lines changed: 5 additions & 0 deletions b/‎common/chat-parser.cpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎common/chat-parser.h‎
Lines changed: 2 additions & 0 deletions b/‎common/chat-parser.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎common/chat.cpp‎
Lines changed: 3 additions & 1 deletion b/‎common/chat.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎convert_hf_to_gguf.py‎
Lines changed: 28 additions & 0 deletions b/‎convert_hf_to_gguf.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎docs/function-calling.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/function-calling.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎gguf-py/gguf/constants.py‎
Lines changed: 26 additions & 0 deletions b/‎gguf-py/gguf/constants.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎gguf-py/gguf/tensor_mapping.py‎
Lines changed: 1 addition & 1 deletion b/‎gguf-py/gguf/tensor_mapping.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/llama.h‎
Lines changed: 2 additions & 2 deletions b/‎include/llama.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎requirements/requirements-compare-llama-bench.txt‎
Lines changed: 1 addition & 0 deletions b/‎requirements/requirements-compare-llama-bench.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎scripts/compare-llama-bench.py‎
Lines changed: 168 additions & 1 deletion b/‎scripts/compare-llama-bench.py‎
Lines changed: 168 additions & 1 deletion
@@ -49,6 +49,7 @@ bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::
 
     // LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
     result_.tool_calls.emplace_back(tool_call);
+
     return true;
 }
 bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
@@ -378,3 +379,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
         /* .is_partial = */ found_healing_marker,
     };
 }
+
+void common_chat_msg_parser::clear_tools() {
+    result_.tool_calls.clear();
+}
@@ -115,4 +115,6 @@ class common_chat_msg_parser {
         const std::vector<std::vector<std::string>> & args_paths = {},
         const std::vector<std::vector<std::string>> & content_paths = {}
     );
+
+    void clear_tools();
 };
@@ -1921,7 +1921,9 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
     } catch (const common_chat_msg_partial_exception & ex) {
         LOG_DBG("Partial parse: %s\n", ex.what());
         if (!is_partial) {
-            throw std::runtime_error(ex.what());
+            builder.clear_tools();
+            builder.move_to(0);
+            common_chat_parse_content_only(builder);
         }
     }
     auto msg = builder.result();
 
@@ -5262,6 +5262,34 @@ def prepare_tensors(self):
                 raise ValueError(f"Unprocessed experts: {experts}")
 
 
+@ModelBase.register("Dots1ForCausalLM")
+class Dots1Model(Qwen2MoeModel):
+    model_arch = gguf.MODEL_ARCH.DOTS1
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.hparams["num_experts"] = self.hparams["n_routed_experts"]
+
+    def set_gguf_parameters(self):
+        super().set_gguf_parameters()
+        self.gguf_writer.add_leading_dense_block_count(self.hparams["first_k_dense_replace"])
+        self.gguf_writer.add_expert_shared_count(self.hparams["n_shared_experts"])
+        self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"])
+        self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"])
+
+        if self.hparams["scoring_func"] == "noaux_tc":
+            self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
+        else:
+            raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
+
+    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
+        if name.endswith("e_score_correction_bias"):
+            name = name.replace("e_score_correction_bias", "e_score_correction.bias")
+        if "shared_experts" in name:
+            return [(self.map_tensor_name(name), data_torch)]
+        return super().modify_tensors(data_torch, name, bid)
+
+
 @ModelBase.register("PLMForCausalLM")
 class PLMModel(TextModel):
     model_arch = gguf.MODEL_ARCH.PLM
 
@@ -11,7 +11,7 @@ Function calling is supported for all models (see https://github.com/ggml-org/ll
   - Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2
   - Functionary v3.1 / v3.2
   - Hermes 2/3, Qwen 2.5
-  - Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034)
+  - Qwen 2.5 Coder
   - Mistral Nemo
   - Firefunction v2
   - Command R7B
 
@@ -343,6 +343,7 @@ class MODEL_ARCH(IntEnum):
     WAVTOKENIZER_DEC = auto()
     PLM              = auto()
     BAILINGMOE       = auto()
+    DOTS1            = auto()
 
 
 class VISION_PROJECTOR_TYPE(IntEnum):
@@ -623,6 +624,7 @@ class MODEL_TENSOR(IntEnum):
     MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
     MODEL_ARCH.PLM:              "plm",
     MODEL_ARCH.BAILINGMOE:       "bailingmoe",
+    MODEL_ARCH.DOTS1:            "dots1"
 }
 
 VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@@ -2044,6 +2046,30 @@ class MODEL_TENSOR(IntEnum):
         MODEL_TENSOR.FFN_DOWN_SHEXP,
         MODEL_TENSOR.FFN_UP_SHEXP,
     ],
+    MODEL_ARCH.DOTS1: [
+        MODEL_TENSOR.TOKEN_EMBD,
+        MODEL_TENSOR.OUTPUT_NORM,
+        MODEL_TENSOR.OUTPUT,
+        MODEL_TENSOR.ATTN_NORM,
+        MODEL_TENSOR.ATTN_Q,
+        MODEL_TENSOR.ATTN_Q_NORM,
+        MODEL_TENSOR.ATTN_K,
+        MODEL_TENSOR.ATTN_K_NORM,
+        MODEL_TENSOR.ATTN_V,
+        MODEL_TENSOR.ATTN_OUT,
+        MODEL_TENSOR.FFN_EXP_PROBS_B,
+        MODEL_TENSOR.FFN_NORM,
+        MODEL_TENSOR.FFN_GATE,
+        MODEL_TENSOR.FFN_GATE_EXP,
+        MODEL_TENSOR.FFN_GATE_INP,
+        MODEL_TENSOR.FFN_GATE_SHEXP,
+        MODEL_TENSOR.FFN_DOWN,
+        MODEL_TENSOR.FFN_DOWN_EXP,
+        MODEL_TENSOR.FFN_DOWN_SHEXP,
+        MODEL_TENSOR.FFN_UP,
+        MODEL_TENSOR.FFN_UP_EXP,
+        MODEL_TENSOR.FFN_UP_SHEXP,
+    ],
     # TODO
 }
 
 
@@ -305,7 +305,7 @@ class TensorNameMap:
         ),
 
         MODEL_TENSOR.FFN_EXP_PROBS_B: (
-            "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
+            "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
         ),
 
         # Feed-forward up
 
@@ -243,14 +243,14 @@ extern "C" {
 
     typedef bool (*llama_progress_callback)(float progress, void * user_data);
 
-    // Input data for llama_decode
+    // Input data for llama_encode/llama_decode
     // A llama_batch object can contain input about one or many sequences
     // The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens
     //
     // - token  : the token ids of the input (used when embd is NULL)
     // - embd   : token embeddings (i.e. float vector of size n_embd) (used when token is NULL)
     // - pos    : the positions of the respective token in the sequence
-    //            (if set to NULL, the token position will be tracked automatically by llama_decode)
+    //            (if set to NULL, the token position will be tracked automatically by llama_encode/llama_decode)
     // - seq_id : the sequence to which the respective token belongs
     //            (if set to NULL, the sequence ID will be assumed to be 0)
     // - logits : if zero, the logits (and/or the embeddings) for the respective token will not be output
 
@@ -1,2 +1,3 @@
 tabulate~=0.9.0
 GitPython~=3.1.43
+matplotlib~=3.10.0
@@ -19,6 +19,7 @@
     print("the following Python libraries are required: GitPython, tabulate.") # noqa: NP100
     raise e
 
+
 logger = logging.getLogger("compare-llama-bench")
 
 # All llama-bench SQL fields
@@ -122,11 +123,15 @@
 parser.add_argument("--check", action="store_true", help="check if all required Python libraries are installed")
 parser.add_argument("-s", "--show", help=help_s)
 parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
+parser.add_argument("--plot", help="generate a performance comparison plot and save to specified file (e.g., plot.png)")
+parser.add_argument("--plot_x", help="parameter to use as x axis for plotting (default: n_depth)", default="n_depth")
+parser.add_argument("--plot_log_scale", action="store_true", help="use log scale for x axis in plots (off by default)")
 
 known_args, unknown_args = parser.parse_known_args()
 
 logging.basicConfig(level=logging.DEBUG if known_args.verbose else logging.INFO)
 
+
 if known_args.check:
     # Check if all required Python libraries are installed. Would have failed earlier if not.
     sys.exit(0)
@@ -499,7 +504,6 @@ def valid_format(data_files: list[str]) -> bool:
 
 name_compare = bench_data.get_commit_name(hexsha8_compare)
 
-
 # If the user provided columns to group the results by, use them:
 if known_args.show is not None:
     show = known_args.show.split(",")
@@ -544,6 +548,14 @@ def valid_format(data_files: list[str]) -> bool:
             show.remove(prop)
         except ValueError:
             pass
+
+    # Add plot_x parameter to parameters to show if it's not already present:
+    if known_args.plot:
+        for k, v in PRETTY_NAMES.items():
+            if v == known_args.plot_x and k not in show:
+                show.append(k)
+                break
+
     rows_show = bench_data.get_rows(show, hexsha8_baseline, hexsha8_compare)
 
 if not rows_show:
@@ -600,6 +612,161 @@ def valid_format(data_files: list[str]) -> bool:
 headers  = [PRETTY_NAMES[p] for p in show]
 headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"]
 
+if known_args.plot:
+    def create_performance_plot(table_data: list[list[str]], headers: list[str], baseline_name: str, compare_name: str, output_file: str, plot_x_param: str, log_scale: bool = False):
+        try:
+            import matplotlib.pyplot as plt
+            import matplotlib
+            matplotlib.use('Agg')
+        except ImportError as e:
+            logger.error("matplotlib is required for --plot.")
+            raise e
+
+        data_headers = headers[:-4] # Exclude the last 4 columns (Test, baseline t/s, compare t/s, Speedup)
+        plot_x_index = None
+        plot_x_label = plot_x_param
+
+        if plot_x_param not in ["n_prompt", "n_gen", "n_depth"]:
+            pretty_name = PRETTY_NAMES.get(plot_x_param, plot_x_param)
+            if pretty_name in data_headers:
+                plot_x_index = data_headers.index(pretty_name)
+                plot_x_label = pretty_name
+            elif plot_x_param in data_headers:
+                plot_x_index = data_headers.index(plot_x_param)
+                plot_x_label = plot_x_param
+            else:
+                logger.error(f"Parameter '{plot_x_param}' not found in current table columns. Available columns: {', '.join(data_headers)}")
+                return
+
+        grouped_data = {}
+
+        for i, row in enumerate(table_data):
+            group_key_parts = []
+            test_name = row[-4]
+
+            base_test = ""
+            x_value = None
+
+            if plot_x_param in ["n_prompt", "n_gen", "n_depth"]:
+                for j, val in enumerate(row[:-4]):
+                    header_name = data_headers[j]
+                    if val is not None and str(val).strip():
+                        group_key_parts.append(f"{header_name}={val}")
+
+                if plot_x_param == "n_prompt" and "pp" in test_name:
+                    base_test = test_name.split("@")[0]
+                    x_value = base_test
+                elif plot_x_param == "n_gen" and "tg" in test_name:
+                    x_value = test_name.split("@")[0]
+                elif plot_x_param == "n_depth" and "@d" in test_name:
+                    base_test = test_name.split("@d")[0]
+                    x_value = int(test_name.split("@d")[1])
+                else:
+                    base_test = test_name
+
+                if base_test.strip():
+                    group_key_parts.append(f"Test={base_test}")
+            else:
+                for j, val in enumerate(row[:-4]):
+                    if j != plot_x_index:
+                        header_name = data_headers[j]
+                        if val is not None and str(val).strip():
+                            group_key_parts.append(f"{header_name}={val}")
+                    else:
+                        x_value = val
+
+                group_key_parts.append(f"Test={test_name}")
+
+            group_key = tuple(group_key_parts)
+
+            if group_key not in grouped_data:
+                grouped_data[group_key] = []
+
+            grouped_data[group_key].append({
+                'x_value': x_value,
+                'baseline': float(row[-3]),
+                'compare': float(row[-2]),
+                'speedup': float(row[-1])
+            })
+
+        if not grouped_data:
+            logger.error("No data available for plotting")
+            return
+
+        def make_axes(num_groups, max_cols=2, base_size=(8, 4)):
+            from math import ceil
+            cols = 1 if num_groups == 1 else min(max_cols, num_groups)
+            rows = ceil(num_groups / cols)
+
+            # Scale figure size by grid dimensions
+            w, h = base_size
+            fig, ax_arr = plt.subplots(rows, cols,
+                                       figsize=(w * cols, h * rows),
+                                       squeeze=False)
+
+            axes = ax_arr.flatten()[:num_groups]
+            return fig, axes
+
+        num_groups = len(grouped_data)
+        fig, axes = make_axes(num_groups)
+
+        plot_idx = 0
+
+        for group_key, points in grouped_data.items():
+            if plot_idx >= len(axes):
+                break
+            ax = axes[plot_idx]
+
+            try:
+                points_sorted = sorted(points, key=lambda p: float(p['x_value']) if p['x_value'] is not None else 0)
+                x_values = [float(p['x_value']) if p['x_value'] is not None else 0 for p in points_sorted]
+            except ValueError:
+                points_sorted = sorted(points, key=lambda p: group_key)
+                x_values = [p['x_value'] for p in points_sorted]
+
+            baseline_vals = [p['baseline'] for p in points_sorted]
+            compare_vals = [p['compare'] for p in points_sorted]
+
+            ax.plot(x_values, baseline_vals, 'o-', color='skyblue',
+                    label=f'{baseline_name}', linewidth=2, markersize=6)
+            ax.plot(x_values, compare_vals, 's--', color='lightcoral', alpha=0.8,
+                    label=f'{compare_name}', linewidth=2, markersize=6)
+
+            if log_scale:
+                ax.set_xscale('log', base=2)
+                unique_x = sorted(set(x_values))
+                ax.set_xticks(unique_x)
+                ax.set_xticklabels([str(int(x)) for x in unique_x])
+
+            title_parts = []
+            for part in group_key:
+                if '=' in part:
+                    key, value = part.split('=', 1)
+                    title_parts.append(f"{key}: {value}")
+
+            title = ', '.join(title_parts) if title_parts else "Performance comparison"
+
+            ax.set_xlabel(plot_x_label, fontsize=12, fontweight='bold')
+            ax.set_ylabel('Tokens per second (t/s)', fontsize=12, fontweight='bold')
+            ax.set_title(title, fontsize=12, fontweight='bold')
+            ax.legend(loc='best', fontsize=10)
+            ax.grid(True, alpha=0.3)
+
+            plot_idx += 1
+
+        for i in range(plot_idx, len(axes)):
+            axes[i].set_visible(False)
+
+        fig.suptitle(f'Performance comparison: {compare_name} vs. {baseline_name}',
+                     fontsize=14, fontweight='bold')
+        fig.subplots_adjust(top=1)
+
+        plt.tight_layout()
+        plt.savefig(output_file, dpi=300, bbox_inches='tight')
+        plt.close()
+
+    create_performance_plot(table, headers, name_baseline, name_compare, known_args.plot, known_args.plot_x, known_args.plot_log_scale)
+
 print(tabulate( # noqa: NP100
     table,
     headers=headers,
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@ bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::`
`49`	`49`
`50`	`50`	`// LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());`
`51`	`51`	`result_.tool_calls.emplace_back(tool_call);`
	`52`	`+`
`52`	`53`	`return true;`
`53`	`54`	`}`
`54`	`55`	`bool common_chat_msg_parser::add_tool_call(const json & tool_call) {`
`@@ -378,3 +379,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse`
`378`	`379`	`/* .is_partial = */ found_healing_marker,`
`379`	`380`	`};`
`380`	`381`	`}`
	`382`	`+`
	`383`	`+void common_chat_msg_parser::clear_tools() {`
	`384`	`+ result_.tool_calls.clear();`
	`385`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -1921,7 +1921,9 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co`
`1921`	`1921`	`} catch (const common_chat_msg_partial_exception & ex) {`
`1922`	`1922`	`LOG_DBG("Partial parse: %s\n", ex.what());`
`1923`	`1923`	`if (!is_partial) {`
`1924`		`- throw std::runtime_error(ex.what());`
	`1924`	`+ builder.clear_tools();`
	`1925`	`+ builder.move_to(0);`
	`1926`	`+ common_chat_parse_content_only(builder);`
`1925`	`1927`	`}`
`1926`	`1928`	`}`
`1927`	`1929`	`auto msg = builder.result();`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`tabulate~=0.9.0`
`2`	`2`	`GitPython~=3.1.43`
	`3`	`+matplotlib~=3.10.0`