diff --git a/README.md b/README.md
index 1c0742370de39..e373611051e44 100644
--- a/README.md
+++ b/README.md
@@ -138,6 +138,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 - [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
 - [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38)
 - [x] [Hunyuan models](https://huggingface.co/collections/tencent/hunyuan-dense-model-6890632cda26b19119c9c5e7)
+- [x] [BailingMoeV2 (Ring/Ling 2.0) models](https://huggingface.co/collections/inclusionAI/ling-v2-68bf1dd2fc34c306c1fa6f86)
 
 #### Multimodal
 
@@ -187,6 +188,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 - Swift [srgtuszy/llama-cpp-swift](https://github.com/srgtuszy/llama-cpp-swift)
 - Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama)
 - Delphi [Embarcadero/llama-cpp-delphi](https://github.com/Embarcadero/llama-cpp-delphi)
+- Go (no CGo needed): [hybridgroup/yzma](https://github.com/hybridgroup/yzma)
 
 </details>
 
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 8c5132193e0e0..ed99dc8477231 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -892,8 +892,8 @@ def get_vocab_base_pre(self, tokenizer) -> str:
             # ref: https://huggingface.co/JetBrains/Mellum-4b-base
             res = "mellum"
         if chkhsh == "9b1be57e70d20d9501b2b3186e792d81181ae36ada3903c26f9fea418cf87206":
-            # ref: https://huggingface.co/inclusionAI/LLaDA-MoE-7B-A1B-Base
-            res = "llada-moe"
+            # ref: https://huggingface.co/inclusionAI/Ling-mini-base-2.0
+            res = "bailingmoe2"
         if chkhsh == "53e325976a6e142379c19b09afcae354f2f496f147afa8f9e189a33fe4e3024e":
             # ref: https://huggingface.co/ibm-granite/granite-docling-258M
             res = "granite-docling"
@@ -8055,6 +8055,103 @@ def prepare_tensors(self):
                 raise ValueError(f"Unprocessed experts: {experts}")
 
 
+@ModelBase.register("BailingMoeV2ForCausalLM")
+class BailingMoeV2Model(TextModel):
+    model_arch = gguf.MODEL_ARCH.BAILINGMOE2
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        if nextn_layers := self.hparams.get("num_nextn_predict_layers", 0):
+            self.block_count = self.hparams["num_hidden_layers"] + nextn_layers
+            self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
+
+    def set_vocab(self):
+        self._set_vocab_gpt2()
+
+    def set_gguf_parameters(self):
+        super().set_gguf_parameters()
+        hparams = self.hparams
+        if (rope_dim := hparams.get("head_dim")) is None:
+            rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
+
+        self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
+        rope_scaling = self.hparams.get("rope_scaling") or {}
+        if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling:
+            self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
+            self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
+            self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
+        else:
+            self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
+        self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
+        self.gguf_writer.add_vocab_size(hparams["vocab_size"])
+        self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
+        self.gguf_writer.add_expert_shared_feed_forward_length(hparams.get("moe_shared_expert_intermediate_size", hparams["moe_intermediate_size"] * hparams["num_shared_experts"]))
+        self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
+        self.gguf_writer.add_expert_count(hparams["num_experts"])
+        self.gguf_writer.add_expert_shared_count(hparams["num_shared_experts"])
+        self.gguf_writer.add_expert_group_count(hparams["n_group"])
+        self.gguf_writer.add_expert_group_used_count(hparams["topk_group"])
+        self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
+
+        if hparams["score_function"] == "sigmoid":
+            self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
+        elif hparams["score_function"] == "softmax":
+            self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
+        else:
+            raise ValueError(f"Unsupported score_function value: {hparams['score_function']}")
+
+        if (nextn_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
+            self.gguf_writer.add_nextn_predict_layers(nextn_layers)
+
+    _experts: list[dict[str, Tensor]] | None = None
+
+    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+        if "mlp.experts" in name:
+            n_experts = self.hparams["num_experts"]
+            assert bid is not None
+
+            tensors: list[tuple[str, Tensor]] = []
+
+            if self._experts is None:
+                self._experts = [{} for _ in range(self.block_count)]
+
+            self._experts[bid][name] = data_torch
+
+            if len(self._experts[bid]) >= n_experts * 3:
+                # merge the experts into a single 3d tensor
+                for w_name in ["down_proj", "gate_proj", "up_proj"]:
+                    datas: list[Tensor] = []
+
+                    for xid in range(n_experts):
+                        ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
+                        datas.append(self._experts[bid][ename])
+                        del self._experts[bid][ename]
+
+                    data_torch = torch.stack(datas, dim=0)
+
+                    merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
+
+                    new_name = self.map_tensor_name(merged_name)
+
+                    tensors.append((new_name, data_torch))
+
+            return tensors
+
+        if name.endswith(".expert_bias"):
+            name = name.replace(".expert_bias", ".expert_bias.bias")
+
+        return [(self.map_tensor_name(name), data_torch)]
+
+    def prepare_tensors(self):
+        super().prepare_tensors()
+
+        if self._experts is not None:
+            # flatten `list[dict[str, Tensor]]` into `list[str]`
+            experts = [k for d in self._experts for k in d.keys()]
+            if len(experts) > 0:
+                raise ValueError(f"Unprocessed experts: {experts}")
+
+
 @ModelBase.register("GroveMoeForCausalLM", "modeling_grove_moe.GroveMoeForCausalLM")
 class GroveMoeModel(TextModel):
     model_arch = gguf.MODEL_ARCH.GROVEMOE
diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
index 28002f766e23b..0ebc1b160f603 100755
--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
@@ -139,7 +139,7 @@ class TOKENIZER_TYPE(IntEnum):
     {"name": "lfm2",             "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"},
     {"name": "exaone4",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B", },
     {"name": "mellum",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/JetBrains/Mellum-4b-base", },
-    {"name": "llada-moe",        "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/LLaDA-MoE-7B-A1B-Base", },
+    {"name": "bailingmoe2",      "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-mini-base-2.0", },
     {"name": "granite-docling",  "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
 ]
 
diff --git a/docs/ops.md b/docs/ops.md
index 938efac815fc0..dfd1cfab6a8b2 100644
--- a/docs/ops.md
+++ b/docs/ops.md
@@ -22,7 +22,7 @@ Legend:
 |                           ARANGE | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ |
 |                           ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                          ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
-|                             CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                             CEIL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                            CLAMP | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ |
 |                           CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | 🟡 | ✅ | ❌ |
 |                             CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ❌ |
@@ -42,7 +42,7 @@ Legend:
 |                              ELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                              EXP | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ |
 |                   FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ |
-|                            FLOOR | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                            FLOOR | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                GATED_LINEAR_ATTN | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                            GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
 |                        GEGLU_ERF | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
@@ -72,7 +72,7 @@ Legend:
 |                     OPT_STEP_SGD | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
 |                         OUT_PROD | 🟡 | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ |
 |                              PAD | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ |
-|                   PAD_REFLECT_1D | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
+|                   PAD_REFLECT_1D | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ |
 |                          POOL_2D | ❌ | 🟡 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                            REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | 🟡 | ❌ |
 |                             RELU | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ |
@@ -84,7 +84,7 @@ Legend:
 |                             ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ |
 |                             ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                        ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
-|                            ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                            ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                        RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                        RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ |
 |                            SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
@@ -111,6 +111,6 @@ Legend:
 |                             TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ |
 |               TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ |
 |                         TOPK_MOE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
-|                            TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
+|                            TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |
 |                          UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ |
 |                            XIELU | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv
index d7efa43cdf3da..fe6876357f359 100644
--- a/docs/ops/SYCL.csv
+++ b/docs/ops/SYCL.csv
@@ -31,6 +31,14 @@
 "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
 "SYCL0","XIELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
 "SYCL0","XIELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
+"SYCL0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
 "SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
 "SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
 "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
@@ -95,6 +103,14 @@
 "SYCL0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
 "SYCL0","XIELU","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
 "SYCL0","XIELU","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
+"SYCL0","FLOOR","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","FLOOR","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","CEIL","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","CEIL","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
+"SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
+"SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
 "SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
 "SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
 "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
@@ -9363,8 +9379,8 @@
 "SYCL0","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","yes","SYCL"
 "SYCL0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","yes","SYCL"
 "SYCL0","PAD","type=f32,ne_a=[512,512,3,1],lp0=1,rp0=1,lp1=1,rp1=1,lp2=1,rp2=1,lp3=1,rp3=1,v=0","support","1","yes","SYCL"
-"SYCL0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","no","SYCL"
-"SYCL0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","0","no","SYCL"
+"SYCL0","PAD_REFLECT_1D","type=f32,ne_a=[3000,384,4,1],pad_0=10,pad_1=9","support","0","yes","SYCL"
+"SYCL0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","yes","SYCL"
 "SYCL0","ROLL","shift0=3,shift1=-2,shift3=1,shift4=-1","support","0","no","SYCL"
 "SYCL0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","0","no","SYCL"
 "SYCL0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","yes","SYCL"
diff --git a/docs/ops/Vulkan.csv b/docs/ops/Vulkan.csv
index ea252577280d5..298c2a6ccd5fc 100644
--- a/docs/ops/Vulkan.csv
+++ b/docs/ops/Vulkan.csv
@@ -3263,27 +3263,27 @@
 "Vulkan0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=0","support","1","yes","Vulkan"
 "Vulkan0","RMS_NORM_MUL_ADD","type=f32,ne=[64,5,4,3],eps=1.000000,broadcast=1","support","1","yes","Vulkan"
 "Vulkan0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","yes","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","0","no","Vulkan"
-"Vulkan0","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","0","no","Vulkan"
-"Vulkan0","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","0","no","Vulkan"
-"Vulkan0","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","0","no","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[3,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[3,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[3,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[3,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[3,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[3,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1024,1,1],ne_b=[4,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1024,4,1],ne_b=[4,1024,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[8,2048,1,1],ne_b=[4,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_CONV","type=f32,ne_a=[4,2048,4,1],ne_b=[4,2048,1,1]","support","1","yes","Vulkan"
+"Vulkan0","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","yes","Vulkan"
+"Vulkan0","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Vulkan"
+"Vulkan0","SSM_SCAN","type=f32,d_state=256,head_dim=64,n_head=8,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","yes","Vulkan"
 "Vulkan0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","yes","Vulkan"
 "Vulkan0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","yes","Vulkan"
 "Vulkan0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","yes","Vulkan"
diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
index 929bc4488156f..c830c09655fec 100644
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -598,6 +598,26 @@ static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor
     return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
 }
 
+// free the extra space at the end if the new tensor is smaller
+static void ggml_gallocr_free_extra_space(ggml_gallocr_t galloc, struct ggml_tensor * node, struct ggml_tensor * parent) {
+    struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
+    struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent);
+
+    size_t parent_size = ggml_backend_buft_get_alloc_size(galloc->bufts[p_hn->buffer_id], parent);
+    size_t node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], node);
+
+    GGML_ASSERT(parent_size >= node_size);
+
+    if (parent_size > node_size) {
+        struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
+        struct buffer_address p_addr = p_hn->addr;
+        p_addr.offset += node_size;
+        size_t extra_size = parent_size - node_size;
+        AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
+        ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent);
+    }
+}
+
 static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) {
     GGML_ASSERT(buffer_id >= 0);
     struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
@@ -643,6 +663,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
                             hn->addr = p_hn->addr;
                             p_hn->allocated = false; // avoid freeing the parent
                             view_src_hn->allocated = false;
+                            ggml_gallocr_free_extra_space(galloc, node, view_src);
                             return;
                         }
                     } else {
@@ -650,6 +671,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
                         hn->buffer_id = p_hn->buffer_id;
                         hn->addr = p_hn->addr;
                         p_hn->allocated = false; // avoid freeing the parent
+                        ggml_gallocr_free_extra_space(galloc, node, parent);
                         return;
                     }
                 }
diff --git a/ggml/src/ggml-sycl/backend.hpp b/ggml/src/ggml-sycl/backend.hpp
index 6ff3215d5a439..b1575b8145138 100644
--- a/ggml/src/ggml-sycl/backend.hpp
+++ b/ggml/src/ggml-sycl/backend.hpp
@@ -37,5 +37,7 @@
 #include "softmax.hpp"
 #include "tsembd.hpp"
 #include "wkv.hpp"
+#include "pad_reflect_1d.hpp"
+
 
 #endif  // GGML_SYCL_BACKEND_HPP
diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp
index 58f5125c9cf6e..810995d0cbf74 100644
--- a/ggml/src/ggml-sycl/element_wise.cpp
+++ b/ggml/src/ggml-sycl/element_wise.cpp
@@ -150,6 +150,26 @@ static __dpct_inline__ T op_clamp(T x, float min_val, float max_val) {
     return x < static_cast<T>(min_val) ? static_cast<T>(min_val) : (x > static_cast<T>(max_val) ? static_cast<T>(max_val) : x);
 }
 
+template<typename T>
+static __dpct_inline__ T op_floor(T x) {
+    return sycl::floor(x);
+}
+
+template<typename T>
+static __dpct_inline__ T op_ceil(T x) {
+    return sycl::ceil(x);
+}
+
+template<typename T>
+static __dpct_inline__ T op_round(T x) {
+    return sycl::round(x);
+}
+
+template<typename T>
+static __dpct_inline__ T op_trunc(T x) {
+    return sycl::trunc(x);
+}
+
 template<typename T>
 static void unary_op_sgn_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
     SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
@@ -304,6 +324,34 @@ static void unary_op_clamp_kernel(const T * x, T * dst, const int k, const sycl:
     }
 }
 
+template<typename T>
+static void unary_op_floor_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
+    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
+        dst[i] = op_floor(x[i]);
+    }
+}
+
+template<typename T>
+static void unary_op_ceil_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
+    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
+        dst[i] = op_ceil(x[i]);
+    }
+}
+
+template<typename T>
+static void unary_op_round_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
+    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
+        dst[i] = op_round(x[i]);
+    }
+}
+
+template<typename T>
+static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
+    SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
+        dst[i] = op_trunc(x[i]);
+    }
+}
+
 template<typename  T>
 static void upscale(const T  *x, T *dst, const int nb00, const int nb01,
                         const int nb02, const int nb03, const int ne10, const int ne11,
@@ -897,6 +945,58 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens
         }, min_val, max_val);
 }
 
+static inline void ggml_sycl_op_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
+        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
+            const int num_blocks = ceil_div(k_elements, 256);
+            stream->parallel_for(
+                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
+                                  sycl::range<1>(256)),
+                [=](sycl::nd_item<1> item_ct1) {
+                    unary_op_floor_kernel(src, dst_ptr, k_elements, item_ct1);
+                });
+        });
+}
+
+static inline void ggml_sycl_op_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
+        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
+            const int num_blocks = ceil_div(k_elements, 256);
+            stream->parallel_for(
+                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
+                                  sycl::range<1>(256)),
+                [=](sycl::nd_item<1> item_ct1) {
+                    unary_op_ceil_kernel(src, dst_ptr, k_elements, item_ct1);
+                });
+        });
+}
+
+static inline void ggml_sycl_op_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
+        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
+            const int num_blocks = ceil_div(k_elements, 256);
+            stream->parallel_for(
+                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
+                                  sycl::range<1>(256)),
+                [=](sycl::nd_item<1> item_ct1) {
+                    unary_op_round_kernel(src, dst_ptr, k_elements, item_ct1);
+                });
+        });
+}
+
+static inline void ggml_sycl_op_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
+        [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
+            const int num_blocks = ceil_div(k_elements, 256);
+            stream->parallel_for(
+                sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
+                                  sycl::range<1>(256)),
+                [=](sycl::nd_item<1> item_ct1) {
+                    unary_op_trunc_kernel(src, dst_ptr, k_elements, item_ct1);
+                });
+        });
+}
+
 static inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
     GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
     GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32);
@@ -1122,3 +1222,23 @@ void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/0);
     ggml_sycl_detail::ggml_sycl_op_arange(ctx, dst);
 }
+
+void ggml_sycl_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_floor(ctx, dst);
+}
+
+void ggml_sycl_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_ceil(ctx, dst);
+}
+
+void ggml_sycl_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_round(ctx, dst);
+}
+
+void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
+    ggml_sycl_op_trunc(ctx, dst);
+}
diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp
index ed96c55f75a7a..fcf93295cb215 100644
--- a/ggml/src/ggml-sycl/element_wise.hpp
+++ b/ggml/src/ggml-sycl/element_wise.hpp
@@ -80,6 +80,10 @@ void ggml_sycl_reglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 
 void ggml_sycl_arange(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
index a7e077ec8ebe0..33f9035075ba7 100644
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -3698,6 +3698,18 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
                 case GGML_UNARY_OP_ELU:
                     ggml_sycl_elu(ctx, dst);
                     break;
+                case GGML_UNARY_OP_FLOOR:
+                    ggml_sycl_floor(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_CEIL:
+                    ggml_sycl_ceil(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_ROUND:
+                    ggml_sycl_round(ctx, dst);
+                    break;
+                case GGML_UNARY_OP_TRUNC:
+                    ggml_sycl_trunc(ctx, dst);
+                    break;
                 default:
                     return false;
             }
@@ -3732,6 +3744,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
         case GGML_OP_CONCAT:
             ggml_sycl_op_concat(ctx, dst);
             break;
+        case GGML_OP_PAD_REFLECT_1D:
+            ggml_sycl_op_pad_reflect_1d(ctx,dst);
+            break;
         case GGML_OP_UPSCALE:
             ggml_sycl_upscale(ctx, dst);
             break;
@@ -4262,6 +4277,10 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
                 case GGML_UNARY_OP_SGN:
                 case GGML_UNARY_OP_ABS:
                 case GGML_UNARY_OP_ELU:
+                case GGML_UNARY_OP_FLOOR:
+                case GGML_UNARY_OP_CEIL:
+                case GGML_UNARY_OP_ROUND:
+                case GGML_UNARY_OP_TRUNC:
 #if defined (GGML_SYCL_F16)
                     return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type);
 #else
@@ -4439,6 +4458,8 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
         case GGML_OP_DIV:
         case GGML_OP_REPEAT:
             return true;
+        case GGML_OP_PAD_REFLECT_1D:
+            return ggml_is_contiguous(op->src[0]) && op-> type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32;
         case GGML_OP_SQR:
         case GGML_OP_SQRT:
         case GGML_OP_SIN:
diff --git a/ggml/src/ggml-sycl/pad_reflect_1d.cpp b/ggml/src/ggml-sycl/pad_reflect_1d.cpp
new file mode 100644
index 0000000000000..e56655a98a106
--- /dev/null
+++ b/ggml/src/ggml-sycl/pad_reflect_1d.cpp
@@ -0,0 +1,72 @@
+#include "pad_reflect_1d.hpp"
+
+void pad_reflect_1d_f32(const float* src,float* dst,
+    const int64_t ne0, const int64_t ne02, const int p0, const int p1,
+    const int64_t nb0, const int64_t nb1, const int64_t nb2, const int64_t nb3,
+    const int64_t nb00, const int64_t nb01, const int64_t nb02, const int64_t nb03,
+    const sycl::nd_item<3> &item_ct1){
+
+    const int i0 = item_ct1.get_group(0) * SYCL_CONCAT_BLOCK_SIZE + item_ct1.get_local_id(0);
+    const int i1 = item_ct1.get_group(1);
+    const int g2 = item_ct1.get_group(2);
+    const int i2 = g2 % ne02;
+    const int i3 = g2 / ne02;
+
+    if (i0 >= p0 + ne0 + p1) return;
+
+    int t = i0 - p0;
+    int period = 2 * ne0 -2;
+    int m = t % period;
+    m += (m < 0) * period;
+    int center = ne0 -1;
+    int srci0 = center - abs(center - m);
+
+    int offest_src = i3*nb3 + i2*nb2 + i1*nb1 + srci0*nb0;
+    int offest_dst =  i3*nb03 +  i2*nb02 +  i1*nb01 +  i0*nb00;
+    dst[offest_dst] = src[offest_src];
+
+}
+
+void ggml_sycl_op_pad_reflect_1d(ggml_backend_sycl_context& ctx, ggml_tensor* dst){
+
+    const ggml_tensor * src0 = dst->src[0];
+    queue_ptr           stream = ctx.stream();
+
+    GGML_ASSERT(src0->type == GGML_TYPE_F32);
+    GGML_ASSERT( dst->type == GGML_TYPE_F32);
+
+    const int32_t * opts = (const int32_t *) dst->op_params;
+    const int p0 = opts[0];
+    const int p1 = opts[1];
+
+    const int64_t ne0 = src0->ne[0];
+
+    const int64_t ne00 = dst->ne[0];
+    const int64_t ne01 = dst->ne[1];
+    const int64_t ne02 = dst->ne[2];
+    const int64_t ne03 = dst->ne[3];
+
+    const int64_t nb00 = dst->nb[0];
+    const int64_t nb01 = dst->nb[1];
+    const int64_t nb02 = dst->nb[2];
+    const int64_t nb03 = dst->nb[3];
+    const int64_t nb0 = src0->nb[0];
+    const int64_t nb1 = src0->nb[1];
+    const int64_t nb2 = src0->nb[2];
+    const int64_t nb3 = src0->nb[3];
+
+    int num_blocks = (ne00 + SYCL_CONCAT_BLOCK_SIZE - 1) / SYCL_CONCAT_BLOCK_SIZE;
+    sycl::range<3> global(num_blocks * SYCL_CONCAT_BLOCK_SIZE, ne01, ne02*ne03);
+    sycl::range<3> local(SYCL_CONCAT_BLOCK_SIZE, 1, 1);
+
+    stream->parallel_for(
+        sycl::nd_range<3>(global,
+                            local),
+        [=](sycl::nd_item<3> item_ct1) { pad_reflect_1d_f32(
+            (const float *) src0->data, (float *) dst->data,
+            ne0, ne02, p0, p1,
+            nb0, nb1, nb2, nb3,
+            nb00, nb01, nb02, nb03
+            , item_ct1);
+         });
+}
diff --git a/ggml/src/ggml-sycl/pad_reflect_1d.hpp b/ggml/src/ggml-sycl/pad_reflect_1d.hpp
new file mode 100644
index 0000000000000..a24509dea6384
--- /dev/null
+++ b/ggml/src/ggml-sycl/pad_reflect_1d.hpp
@@ -0,0 +1,8 @@
+#ifndef GGML_SYCL_PAD_REFLECT_1D_HPP
+#define GGML_SYCL_PAD_REFLECT_1D_HPP
+
+#include "common.hpp"
+
+void ggml_sycl_op_pad_reflect_1d(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
+
+#endif // GGML_SYCL_PAD_REFLECT_1D_HPP
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index f5e5fba8008bd..1b71fb3749aaa 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -102,6 +102,8 @@ class LLM:
         EXPERT_COUNT                      = "{arch}.expert_count"
         EXPERT_USED_COUNT                 = "{arch}.expert_used_count"
         EXPERT_SHARED_COUNT               = "{arch}.expert_shared_count"
+        EXPERT_GROUP_COUNT                = "{arch}.expert_group_count"
+        EXPERT_GROUP_USED_COUNT           = "{arch}.expert_group_used_count"
         EXPERT_WEIGHTS_SCALE              = "{arch}.expert_weights_scale"
         EXPERT_WEIGHTS_NORM               = "{arch}.expert_weights_norm"
         EXPERT_GATING_FUNC                = "{arch}.expert_gating_func"
@@ -400,6 +402,7 @@ class MODEL_ARCH(IntEnum):
     WAVTOKENIZER_DEC = auto()
     PLM              = auto()
     BAILINGMOE       = auto()
+    BAILINGMOE2      = auto()
     DOTS1            = auto()
     ARCEE            = auto()
     ERNIE4_5         = auto()
@@ -744,6 +747,7 @@ class MODEL_TENSOR(IntEnum):
     MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
     MODEL_ARCH.PLM:              "plm",
     MODEL_ARCH.BAILINGMOE:       "bailingmoe",
+    MODEL_ARCH.BAILINGMOE2:      "bailingmoe2",
     MODEL_ARCH.DOTS1:            "dots1",
     MODEL_ARCH.ARCEE:            "arcee",
     MODEL_ARCH.ERNIE4_5:         "ernie4_5",
@@ -2533,6 +2537,35 @@ class MODEL_TENSOR(IntEnum):
         MODEL_TENSOR.FFN_DOWN_SHEXP,
         MODEL_TENSOR.FFN_UP_SHEXP,
     ],
+    MODEL_ARCH.BAILINGMOE2: [
+        MODEL_TENSOR.TOKEN_EMBD,
+        MODEL_TENSOR.OUTPUT_NORM,
+        MODEL_TENSOR.OUTPUT,
+        MODEL_TENSOR.ATTN_NORM,
+        MODEL_TENSOR.ATTN_Q_NORM,
+        MODEL_TENSOR.ATTN_K_NORM,
+        MODEL_TENSOR.ATTN_QKV,
+        MODEL_TENSOR.ATTN_OUT,
+        MODEL_TENSOR.FFN_GATE_INP,
+        MODEL_TENSOR.FFN_EXP_PROBS_B,
+        MODEL_TENSOR.FFN_NORM,
+        MODEL_TENSOR.FFN_GATE,
+        MODEL_TENSOR.FFN_DOWN,
+        MODEL_TENSOR.FFN_UP,
+        MODEL_TENSOR.FFN_GATE_EXP,
+        MODEL_TENSOR.FFN_DOWN_EXP,
+        MODEL_TENSOR.FFN_UP_EXP,
+        MODEL_TENSOR.FFN_GATE_SHEXP,
+        MODEL_TENSOR.FFN_DOWN_SHEXP,
+        MODEL_TENSOR.FFN_UP_SHEXP,
+        MODEL_TENSOR.NEXTN_EH_PROJ,
+        MODEL_TENSOR.NEXTN_EMBED_TOKENS,
+        MODEL_TENSOR.NEXTN_ENORM,
+        MODEL_TENSOR.NEXTN_HNORM,
+        MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD,
+        MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM,
+        MODEL_TENSOR.LAYER_OUT_NORM,
+    ],
     MODEL_ARCH.DOTS1: [
         MODEL_TENSOR.TOKEN_EMBD,
         MODEL_TENSOR.OUTPUT_NORM,
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 306679e21834b..d52d4f40f7884 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -755,6 +755,12 @@ def add_expert_used_count(self, count: int) -> None:
     def add_expert_shared_count(self, count: int) -> None:
         self.add_uint32(Keys.LLM.EXPERT_SHARED_COUNT.format(arch=self.arch), count)
 
+    def add_expert_group_count(self, count: int) -> None:
+        self.add_uint32(Keys.LLM.EXPERT_GROUP_COUNT.format(arch=self.arch), count)
+
+    def add_expert_group_used_count(self, count: int) -> None:
+        self.add_uint32(Keys.LLM.EXPERT_GROUP_USED_COUNT.format(arch=self.arch), count)
+
     def add_expert_weights_scale(self, value: float) -> None:
         self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
 
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index c05aa6cc488de..d7dcd8efb8426 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -174,6 +174,7 @@ class TensorNameMap:
             "h.{bid}.self_attention.query_key_value",                              # bloom
             "language_model.encoder.layers.{bid}.self_attention.query_key_value",  # persimmon
             "model.layers.{bid}.self_attn.query_key_value",                        # persimmon
+            "model.layers.{bid}.attention.query_key_value",                        # bailingmoe2
             "h.{bid}.attn.c_attn",                                                 # gpt2
             "transformer.h.{bid}.mixer.Wqkv",                                      # phi2
             "encoder.layers.{bid}.attn.Wqkv",                                      # nomic-bert
@@ -260,6 +261,7 @@ class TensorNameMap:
             "transformer.h.{bid}.attn.out_proj",                            # gpt-j
             "language_model.encoder.layers.{bid}.self_attention.dense",     # persimmon
             "model.layers.{bid}.self_attn.dense",                           # persimmon
+            "model.layers.{bid}.attention.dense",                           # bailingmoe2
             "h.{bid}.attn.c_proj",                                          # gpt2
             "transformer.h.{bid}.mixer.out_proj",                           # phi2
             "model.layers.layers.{bid}.self_attn.o_proj",                   # plamo
@@ -373,6 +375,7 @@ class TensorNameMap:
         MODEL_TENSOR.FFN_EXP_PROBS_B: (
             "model.layers.{bid}.mlp.gate.e_score_correction",               # deepseek-v3 dots1
             "model.layers.{bid}.mlp.moe_statics.e_score_correction",        # ernie4.5-moe
+            "model.layers.{bid}.mlp.gate.expert_bias",                      # bailingmoe2
             "model.layers.{bid}.feed_forward.expert_bias",                  # lfm2moe
         ),
 
@@ -549,6 +552,7 @@ class TensorNameMap:
             "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
             "model.layers.{bid}.self_attn.q_layernorm",                       # persimmon
             "model.layers.{bid}.self_attn.query_layernorm",                   # hunyuan
+            "model.layers.{bid}.attention.query_layernorm",                   # bailingmoe2
             "model.layers.{bid}.self_attn.q_norm",                            # cohere olmoe chameleon olmo2
             "layers.{bid}.self_attn.q_norm",                                  # embeddinggemma
             "transformer.blocks.{bid}.attn.q_ln",                             # sea-lion
@@ -563,6 +567,7 @@ class TensorNameMap:
             "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
             "model.layers.{bid}.self_attn.k_layernorm",                       # persimmon
             "model.layers.{bid}.self_attn.key_layernorm",                     # hunyuan
+            "model.layers.{bid}.attention.key_layernorm",                     # bailingmoe2
             "model.layers.{bid}.self_attn.k_norm",                            # cohere olmoe chameleon olmo2
             "layers.{bid}.self_attn.k_norm",                                  # embeddinggemma
             "transformer.blocks.{bid}.attn.k_ln",                             # sea-lion
@@ -584,6 +589,7 @@ class TensorNameMap:
             "transformer.decoder_layer.{bid}.rms_norm_3",   # Grok
             "encoder.layer.{bid}.mlp.layernorm",            # jina-bert-v2
             "encoder.layer.{bid}.layer_norm_2",             # jina-v2-code
+            "model.layers.{bid}.final_layernorm",           # bailingmoe2
         ),
 
         MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
index b7e00b275b6f7..8ca769c5fd2ef 100644
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -85,6 +85,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
     { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
     { LLM_ARCH_PLM,              "plm"              },
     { LLM_ARCH_BAILINGMOE,       "bailingmoe"       },
+    { LLM_ARCH_BAILINGMOE2,      "bailingmoe2"      },
     { LLM_ARCH_DOTS1,            "dots1"            },
     { LLM_ARCH_ARCEE,            "arcee"            },
     { LLM_ARCH_ERNIE4_5,         "ernie4_5"         },
@@ -135,6 +136,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_EXPERT_COUNT,                      "%s.expert_count"                      },
     { LLM_KV_EXPERT_USED_COUNT,                 "%s.expert_used_count"                 },
     { LLM_KV_EXPERT_SHARED_COUNT,               "%s.expert_shared_count"               },
+    { LLM_KV_EXPERT_GROUP_COUNT,                "%s.expert_group_count"                },
+    { LLM_KV_EXPERT_GROUP_USED_COUNT,           "%s.expert_group_used_count"           },
     { LLM_KV_EXPERT_WEIGHTS_SCALE,              "%s.expert_weights_scale"              },
     { LLM_KV_EXPERT_WEIGHTS_NORM,               "%s.expert_weights_norm"               },
     { LLM_KV_EXPERT_GATING_FUNC,                "%s.expert_gating_func"                },
@@ -1946,6 +1949,38 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
             { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
         },
     },
+    {
+        LLM_ARCH_BAILINGMOE2,
+        {
+            { LLM_TENSOR_TOKEN_EMBD,         "token_embd" },
+            { LLM_TENSOR_OUTPUT_NORM,        "output_norm" },
+            { LLM_TENSOR_OUTPUT,             "output" },
+            { LLM_TENSOR_ATTN_NORM,          "blk.%d.attn_norm" },
+            { LLM_TENSOR_ATTN_Q_NORM,        "blk.%d.attn_q_norm" },
+            { LLM_TENSOR_ATTN_K_NORM,        "blk.%d.attn_k_norm" },
+            { LLM_TENSOR_ATTN_QKV,           "blk.%d.attn_qkv" },
+            { LLM_TENSOR_ATTN_OUT,           "blk.%d.attn_output" },
+            { LLM_TENSOR_FFN_GATE_INP,       "blk.%d.ffn_gate_inp" },
+            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
+            { LLM_TENSOR_FFN_NORM,           "blk.%d.ffn_norm" },
+            { LLM_TENSOR_FFN_GATE,           "blk.%d.ffn_gate" },
+            { LLM_TENSOR_FFN_DOWN,           "blk.%d.ffn_down" },
+            { LLM_TENSOR_FFN_UP,             "blk.%d.ffn_up" },
+            { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
+            { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
+            { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
+            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
+            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
+            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
+            { LLM_TENSOR_NEXTN_EH_PROJ,      "blk.%d.nextn.eh_proj" },
+            { LLM_TENSOR_NEXTN_EMBED_TOKENS, "blk.%d.nextn.embed_tokens" },
+            { LLM_TENSOR_NEXTN_ENORM,        "blk.%d.nextn.enorm" },
+            { LLM_TENSOR_NEXTN_HNORM,        "blk.%d.nextn.hnorm" },
+            { LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "blk.%d.nextn.shared_head_head" },
+            { LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "blk.%d.nextn.shared_head_norm" },
+            { LLM_TENSOR_LAYER_OUT_NORM,     "blk.%d.layer_output_norm" },
+        },
+    },
     {
         LLM_ARCH_DOTS1,
         {
diff --git a/src/llama-arch.h b/src/llama-arch.h
index c41de89859d5c..dea725c1a753a 100644
--- a/src/llama-arch.h
+++ b/src/llama-arch.h
@@ -89,6 +89,7 @@ enum llm_arch {
     LLM_ARCH_WAVTOKENIZER_DEC,
     LLM_ARCH_PLM,
     LLM_ARCH_BAILINGMOE,
+    LLM_ARCH_BAILINGMOE2,
     LLM_ARCH_DOTS1,
     LLM_ARCH_ARCEE,
     LLM_ARCH_ERNIE4_5,
@@ -139,6 +140,8 @@ enum llm_kv {
     LLM_KV_EXPERT_COUNT,
     LLM_KV_EXPERT_USED_COUNT,
     LLM_KV_EXPERT_SHARED_COUNT,
+    LLM_KV_EXPERT_GROUP_COUNT,
+    LLM_KV_EXPERT_GROUP_USED_COUNT,
     LLM_KV_EXPERT_WEIGHTS_SCALE,
     LLM_KV_EXPERT_WEIGHTS_NORM,
     LLM_KV_EXPERT_GATING_FUNC,
diff --git a/src/llama-batch.h b/src/llama-batch.h
index d563adc66aaf5..0dc8cebd2a7b3 100644
--- a/src/llama-batch.h
+++ b/src/llama-batch.h
@@ -123,7 +123,7 @@ class llama_batch_allocr {
     uint32_t n_seq_max;
     uint32_t n_outputs;
 
-    std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
+    std::array<llama_seq_id, 1> seq_id_0 = {{ 0 }}; // default sequence id
 
     std::vector<llama_pos>      pos;
     std::vector<int32_t>        n_seq_id;
diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp
index 956c4e085e5b6..0285006d73caa 100644
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@@ -63,6 +63,8 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
     { "megrez",            LLM_CHAT_TEMPLATE_MEGREZ            },
     { "yandex",            LLM_CHAT_TEMPLATE_YANDEX            },
     { "bailing",           LLM_CHAT_TEMPLATE_BAILING           },
+    { "bailing-think",     LLM_CHAT_TEMPLATE_BAILING_THINK     },
+    { "bailing2",          LLM_CHAT_TEMPLATE_BAILING2          },
     { "llama4",            LLM_CHAT_TEMPLATE_LLAMA4            },
     { "smolvlm",           LLM_CHAT_TEMPLATE_SMOLVLM           },
     { "hunyuan-moe",       LLM_CHAT_TEMPLATE_HUNYUAN_MOE       },
@@ -191,6 +193,10 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
         return LLM_CHAT_TEMPLATE_YANDEX;
     } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
         return LLM_CHAT_TEMPLATE_BAILING;
+    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
+        return LLM_CHAT_TEMPLATE_BAILING_THINK;
+    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
+        return LLM_CHAT_TEMPLATE_BAILING2;
     } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
         return LLM_CHAT_TEMPLATE_LLAMA4;
     } else if (tmpl_contains("<|endofuserprompt|>")) {
@@ -644,8 +650,8 @@ int32_t llm_chat_apply_template(
         if (add_ass) {
             ss << " Ассистент:[SEP]";
         }
-    }  else if (tmpl == LLM_CHAT_TEMPLATE_BAILING) {
-        // Bailing (Ling) template
+    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
+        // Bailing (Ling/Ring) template
         for (auto message : chat) {
             std::string role(message->role);
 
@@ -658,6 +664,33 @@ int32_t llm_chat_apply_template(
             ss << "<role>" << role << "</role>" << message->content;
         }
 
+        if (add_ass) {
+            ss << "<role>ASSISTANT</role>";
+
+            if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
+                ss << "<think>";
+            }
+        }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
+        // Bailing2 (Ling 2.0) template
+        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
+
+        if (!has_system) {
+            ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
+        }
+
+        for (auto message : chat) {
+            std::string role(message->role);
+
+            if (role == "user") {
+                role = "HUMAN";
+            } else {
+                std::transform(role.begin(), role.end(), role.begin(), ::toupper);
+            }
+
+            ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
+        }
+
         if (add_ass) {
             ss << "<role>ASSISTANT</role>";
         }
diff --git a/src/llama-chat.h b/src/llama-chat.h
index 5a87d9ab627bc..da1b7c47997ca 100644
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@@ -42,6 +42,8 @@ enum llm_chat_template {
     LLM_CHAT_TEMPLATE_MEGREZ,
     LLM_CHAT_TEMPLATE_YANDEX,
     LLM_CHAT_TEMPLATE_BAILING,
+    LLM_CHAT_TEMPLATE_BAILING_THINK,
+    LLM_CHAT_TEMPLATE_BAILING2,
     LLM_CHAT_TEMPLATE_LLAMA4,
     LLM_CHAT_TEMPLATE_SMOLVLM,
     LLM_CHAT_TEMPLATE_DOTS1,
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index e7526e7d0a557..bd348bcad370a 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -2346,7 +2346,8 @@ llama_context * llama_init_from_model(
         return nullptr;
     }
 
-    if (params.pooling_type != model->hparams.pooling_type) {
+    if (params.pooling_type != LLAMA_POOLING_TYPE_UNSPECIFIED &&
+        params.pooling_type != model->hparams.pooling_type) {
         //user-specified pooling-type is different from the model default
         LLAMA_LOG_WARN("%s: model default pooling_type is [%d], but [%d] was specified\n", __func__,
                        model->hparams.pooling_type, params.pooling_type);
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
index f29a1e98c9103..41fa6894377ea 100644
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -950,6 +950,31 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
         cb(selection_probs, "ffn_moe_probs_biased", il);
     }
 
+    // select top n_group_used expert groups
+    // https://huggingface.co/deepseek-ai/DeepSeek-V3/blob/e815299b0bcbac849fa540c768ef21845365c9eb/modeling_deepseek.py#L440-L457
+    if (hparams.n_expert_groups > 1 && n_tokens > 0) {
+        const int64_t n_exp_per_group = n_expert / hparams.n_expert_groups;
+
+        // organize experts into n_expert_groups
+        ggml_tensor * selection_groups = ggml_reshape_3d(ctx0, selection_probs, n_exp_per_group, hparams.n_expert_groups, n_tokens); // [n_exp_per_group, n_expert_groups, n_tokens]
+
+        ggml_tensor * group_scores = ggml_top_k(ctx0, selection_groups, 2); // [2, n_expert_groups, n_tokens]
+        group_scores = ggml_get_rows(ctx0, ggml_reshape_4d(ctx0, selection_groups, 1, selection_groups->ne[0], selection_groups->ne[1], selection_groups->ne[2]), group_scores); // [1, 2, n_expert_groups, n_tokens]
+
+        // get top n_group_used expert groups
+        group_scores = ggml_sum_rows(ctx0, ggml_reshape_3d(ctx0, group_scores, group_scores->ne[1], group_scores->ne[2], group_scores->ne[3])); // [1, n_expert_groups, n_tokens]
+        group_scores = ggml_reshape_2d(ctx0, group_scores, group_scores->ne[1], group_scores->ne[2]); // [n_expert_groups, n_tokens]
+
+        ggml_tensor * expert_groups = ggml_top_k(ctx0, group_scores, hparams.n_group_used); // [n_group_used, n_tokens]
+        cb(expert_groups, "ffn_moe_group_topk", il);
+
+        // mask out the other groups
+        selection_probs = ggml_get_rows(ctx0, selection_groups, expert_groups); // [n_exp_per_group, n_group_used, n_tokens]
+        selection_probs = ggml_set_rows(ctx0, ggml_scale_bias(ctx0, selection_groups, 0.0f, -INFINITY), selection_probs, expert_groups); // [n_exp_per_group, n_expert_groups, n_tokens]
+        selection_probs = ggml_reshape_2d(ctx0, selection_probs, n_expert, n_tokens); // [n_expert, n_tokens]
+        cb(selection_probs, "ffn_moe_probs_masked", il);
+    }
+
     // select experts
     ggml_tensor * selected_experts = ggml_top_k(ctx0, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
     cb(selected_experts->src[0], "ffn_moe_argsort", il);
@@ -981,6 +1006,11 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
         ggml_tensor * weights_sum = ggml_sum_rows(ctx0, weights); // [1, n_tokens]
         cb(weights_sum, "ffn_moe_weights_sum", il);
 
+        if (arch == LLM_ARCH_BAILINGMOE2) {
+            weights_sum = ggml_scale_bias(ctx0, weights_sum, 1.0, 1e-20);
+            cb(weights_sum, "ffn_moe_weights_sum_biased", il);
+        }
+
         weights = ggml_div(ctx0, weights, weights_sum); // [n_expert_used, n_tokens]
         cb(weights, "ffn_moe_weights_norm", il);
 
diff --git a/src/llama-hparams.h b/src/llama-hparams.h
index 4e7f73ec234c3..6fcf91b7daa47 100644
--- a/src/llama-hparams.h
+++ b/src/llama-hparams.h
@@ -72,6 +72,8 @@ struct llama_hparams {
     uint32_t n_ff_chexp         = 0;
     uint32_t n_expert_shared    = 0;
     uint32_t n_norm_groups      = 0;
+    uint32_t n_expert_groups    = 0;
+    uint32_t n_group_used       = 0;
     uint32_t n_group_experts    = 0;
 
     float    expert_group_scale   = 0.05f;
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 909b49e8e6450..e460996330080 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -116,8 +116,10 @@ const char * llm_type_name(llm_type type) {
         case LLM_TYPE_A13B:          return "A13B";
         case LLM_TYPE_7B_A1B:        return "7B.A1B";
         case LLM_TYPE_8B_A1B:        return "8B.A1B";
+        case LLM_TYPE_16B_A1B:       return "16B.A1B";
         case LLM_TYPE_21B_A3B:       return "21B.A3B";
         case LLM_TYPE_30B_A3B:       return "30B.A3B";
+        case LLM_TYPE_100B_A6B:      return "100B.A6B";
         case LLM_TYPE_106B_A12B:     return "106B.A12B";
         case LLM_TYPE_235B_A22B:     return "235B.A22B";
         case LLM_TYPE_300B_A47B:     return "300B.A47B";
@@ -481,11 +483,13 @@ void llama_model::load_hparams(llama_model_loader & ml) {
         return;
     }
 
-    ml.get_key(LLM_KV_CONTEXT_LENGTH,    hparams.n_ctx_train);
-    ml.get_key(LLM_KV_EMBEDDING_LENGTH,  hparams.n_embd);
-    ml.get_key(LLM_KV_BLOCK_COUNT,       hparams.n_layer);
-    ml.get_key(LLM_KV_EXPERT_COUNT,      hparams.n_expert,      false);
-    ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false);
+    ml.get_key(LLM_KV_CONTEXT_LENGTH,          hparams.n_ctx_train);
+    ml.get_key(LLM_KV_EMBEDDING_LENGTH,        hparams.n_embd);
+    ml.get_key(LLM_KV_BLOCK_COUNT,             hparams.n_layer);
+    ml.get_key(LLM_KV_EXPERT_COUNT,            hparams.n_expert,        false);
+    ml.get_key(LLM_KV_EXPERT_USED_COUNT,       hparams.n_expert_used,   false);
+    ml.get_key(LLM_KV_EXPERT_GROUP_COUNT,      hparams.n_expert_groups, false);
+    ml.get_key(LLM_KV_EXPERT_GROUP_USED_COUNT, hparams.n_group_used,    false);
 
     if (arch == LLM_ARCH_WAVTOKENIZER_DEC) {
         ml.get_key(LLM_KV_FEATURES_LENGTH, hparams.n_embd_features);
@@ -501,8 +505,15 @@ void llama_model::load_hparams(llama_model_loader & ml) {
     GGML_ASSERT(hparams.n_expert_used <= hparams.n_expert);
     if (hparams.n_expert > 0) {
         GGML_ASSERT(hparams.n_expert_used > 0);
+        GGML_ASSERT(hparams.n_expert_groups < hparams.n_expert);
+        if (hparams.n_expert_groups > 1) {
+            GGML_ASSERT(hparams.n_expert % hparams.n_expert_groups == 0);
+            GGML_ASSERT(hparams.n_group_used > 0);
+            GGML_ASSERT(hparams.n_group_used < hparams.n_expert_groups);
+        }
     } else {
         GGML_ASSERT(hparams.n_expert_used == 0);
+        GGML_ASSERT(hparams.n_expert_groups == 0);
     }
 
     std::fill(hparams.n_head_arr.begin(),    hparams.n_head_arr.end(),    0);
@@ -1888,6 +1899,29 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                     default: type = LLM_TYPE_UNKNOWN;
                 }
             } break;
+        case LLM_ARCH_BAILINGMOE2:
+            {
+                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,       hparams.f_norm_rms_eps);
+                ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT,         hparams.n_layer_dense_lead);
+                ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH,        hparams.n_ff_exp);
+                ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp);
+                ml.get_key(LLM_KV_EXPERT_SHARED_COUNT,               hparams.n_expert_shared);
+                ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE,              hparams.expert_weights_scale);
+                ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM,               hparams.expert_weights_norm, false);
+                ml.get_key(LLM_KV_EXPERT_GATING_FUNC,                hparams.expert_gating_func);
+                ml.get_key(LLM_KV_NEXTN_PREDICT_LAYERS,              hparams.nextn_predict_layers, false);
+
+                // TODO: when MTP is implemented, this should probably be updated if needed
+                hparams.n_layer_kv_from_start = hparams.n_layer - hparams.nextn_predict_layers;
+
+                switch (hparams.n_layer) {
+                    case 20: type = LLM_TYPE_16B_A1B; break;
+                    case 21: type = LLM_TYPE_16B_A1B; break;
+                    case 32: type = LLM_TYPE_100B_A6B; break;
+                    case 33: type = LLM_TYPE_100B_A6B; break;
+                    default: type = LLM_TYPE_UNKNOWN;
+                }
+            } break;
         case LLM_ARCH_DOTS1:
             {
                 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
@@ -5498,6 +5532,70 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                         layer.ffn_up_shexp   = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP,   "weight", i), {n_embd, n_ff_exp * n_expert_shared}, 0);
                     }
                 } break;
+            case LLM_ARCH_BAILINGMOE2:
+                {
+                    const int64_t n_ff_exp        = hparams.n_ff_exp;
+                    const int64_t n_expert_shared = hparams.n_expert_shared;
+
+                    tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
+
+                    // output
+                    output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
+                    output      = create_tensor(tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, 0);
+
+                    GGML_ASSERT(n_expert > 0 && "n_expert must be > 0 for bailingmoe2");
+                    GGML_ASSERT(n_expert_used > 0 && "n_expert_used must be > 0 for bailingmoe2");
+
+                    for (int i = 0; i < n_layer; ++i) {
+                        int flags = 0;
+                        if (hparams.nextn_predict_layers > 0 && static_cast<uint32_t>(i) >= n_layer - hparams.nextn_predict_layers) {
+                            // skip all tensors in the NextN layers
+                            flags |= TENSOR_SKIP;
+                        }
+
+                        auto & layer = layers[i];
+
+                        layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, flags);
+
+                        layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, flags);
+                        layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, flags);
+
+                        layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd_head_k}, flags);
+                        layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd_head_k}, flags);
+
+                        layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, flags);
+
+                        if (static_cast<uint32_t>(i) >= hparams.n_layer_dense_lead) { // MoE layers
+                            const int64_t n_ff_shexp = (hparams.n_ff_shexp ? hparams.n_ff_shexp : n_ff_exp) * n_expert_shared;
+
+                            layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, flags);
+                            layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, TENSOR_NOT_REQUIRED | flags);
+
+                            layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {  n_embd, n_ff_exp, n_expert}, flags);
+                            layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {n_ff_exp,   n_embd, n_expert}, flags);
+                            layer.ffn_up_exps   = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS,   "weight", i), {  n_embd, n_ff_exp, n_expert}, flags);
+
+                            layer.ffn_gate_shexp = create_tensor(tn(LLM_TENSOR_FFN_GATE_SHEXP, "weight", i), {n_embd, n_ff_shexp}, flags);
+                            layer.ffn_down_shexp = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), {n_ff_shexp, n_embd}, flags);
+                            layer.ffn_up_shexp   = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP,   "weight", i), {n_embd, n_ff_shexp}, flags);
+                        } else { // Dense layers
+                            layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd,   n_ff}, flags);
+                            layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {  n_ff, n_embd}, flags);
+                            layer.ffn_up   = create_tensor(tn(LLM_TENSOR_FFN_UP,   "weight", i), {n_embd,   n_ff}, flags);
+                        }
+
+                        // NextN/MTP tensors (preserved but unused) - conditionally load for last nextn_predict_layers
+                        if (hparams.nextn_predict_layers > 0 && static_cast<uint32_t>(i) >= n_layer - hparams.nextn_predict_layers) {
+                            layer.nextn.eh_proj          = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "weight", i), { 2 * n_embd, n_embd }, flags);
+                            layer.nextn.embed_tokens     = create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", i), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED | flags);
+                            layer.nextn.enorm            = create_tensor(tn(LLM_TENSOR_NEXTN_ENORM, "weight", i), { n_embd }, flags);
+                            layer.nextn.hnorm            = create_tensor(tn(LLM_TENSOR_NEXTN_HNORM, "weight", i), { n_embd }, flags);
+                            layer.nextn.shared_head_head = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", i), { n_embd, n_vocab }, TENSOR_NOT_REQUIRED | flags);
+                            layer.nextn.shared_head_norm = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", i), { n_embd }, TENSOR_NOT_REQUIRED | flags);
+                            layer.layer_out_norm         = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd}, flags);
+                        }
+                    }
+                } break;
             case LLM_ARCH_DOTS1:
                 {
                     const int64_t n_ff_exp        = hparams.n_ff_exp;
@@ -6353,6 +6451,19 @@ void llama_model::print_info() const {
         LLAMA_LOG_INFO("%s: expert_weights_norm  = %d\n",     __func__, hparams.expert_weights_norm);
     }
 
+    if (arch == LLM_ARCH_BAILINGMOE2) {
+        LLAMA_LOG_INFO("%s: n_layer_dense_lead   = %d\n",     __func__, hparams.n_layer_dense_lead);
+        LLAMA_LOG_INFO("%s: n_ff_exp             = %d\n",     __func__, hparams.n_ff_exp);
+        LLAMA_LOG_INFO("%s: n_ff_shexp           = %d\n",     __func__, hparams.n_ff_shexp);
+        LLAMA_LOG_INFO("%s: n_expert_shared      = %d\n",     __func__, hparams.n_expert_shared);
+        LLAMA_LOG_INFO("%s: n_expert_groups      = %d\n",     __func__, hparams.n_expert_groups);
+        LLAMA_LOG_INFO("%s: n_group_used         = %d\n",     __func__, hparams.n_group_used);
+        LLAMA_LOG_INFO("%s: expert_weights_scale = %.1f\n",   __func__, hparams.expert_weights_scale);
+        LLAMA_LOG_INFO("%s: expert_weights_norm  = %d\n",     __func__, hparams.expert_weights_norm);
+        LLAMA_LOG_INFO("%s: expert_gating_func   = %s\n",     __func__, llama_expert_gating_func_name((llama_expert_gating_func_type) hparams.expert_gating_func));
+        LLAMA_LOG_INFO("%s: nextn_predict_layers = %d\n",     __func__, hparams.nextn_predict_layers);
+    }
+
     if (arch == LLM_ARCH_SMALLTHINKER || arch == LLM_ARCH_LFM2MOE) {
         LLAMA_LOG_INFO("%s: n_ff_exp             = %d\n",     __func__, hparams.n_ff_exp);
         LLAMA_LOG_INFO("%s: expert_gating_func   = %s\n",     __func__, llama_expert_gating_func_name((llama_expert_gating_func_type) hparams.expert_gating_func));
@@ -17042,6 +17153,150 @@ struct llm_build_bailingmoe : public llm_graph_context {
     }
 };
 
+struct llm_build_bailingmoe2 : public llm_graph_context {
+    llm_build_bailingmoe2(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
+        const int64_t n_embd_head = hparams.n_embd_head_v;
+        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
+
+        GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
+
+        ggml_tensor * cur;
+        ggml_tensor * inpL;
+
+        inpL = build_inp_embd(model.tok_embd);
+
+        // inp_pos - contains the positions
+        ggml_tensor * inp_pos = build_inp_pos();
+
+        auto * inp_attn = build_attn_inp_kv();
+
+        ggml_tensor * inp_out_ids = build_inp_out_ids();
+
+        const int n_transformer_layers = n_layer - hparams.nextn_predict_layers;
+        for (int il = 0; il < n_transformer_layers; ++il) {
+            ggml_tensor * inpSA = inpL;
+
+            // norm
+            cur = build_norm(inpL,
+                    model.layers[il].attn_norm, NULL,
+                    LLM_NORM_RMS, il);
+            cb(cur, "attn_norm", il);
+
+            // self_attention
+            {
+                cur = build_lora_mm(model.layers[il].wqkv, cur);
+                cb(cur, "wqkv", il);
+
+                ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head,    n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd));
+                ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd));
+                ggml_tensor * Vcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa));
+
+                Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, il);
+                cb(Qcur, "Qcur_normed", il);
+
+                Qcur = ggml_rope_ext(
+                        ctx0, Qcur, inp_pos, nullptr,
+                        n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
+                        ext_factor, attn_factor, beta_fast, beta_slow
+                        );
+
+                Kcur = build_norm(Kcur, model.layers[il].attn_k_norm, NULL, LLM_NORM_RMS, il);
+                cb(Kcur, "Kcur_normed", il);
+
+                Kcur = ggml_rope_ext(
+                        ctx0, Kcur, inp_pos, nullptr,
+                        n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
+                        ext_factor, attn_factor, beta_fast, beta_slow
+                        );
+
+                cb(Qcur, "Qcur", il);
+                cb(Kcur, "Kcur", il);
+                cb(Vcur, "Vcur", il);
+
+                cur = build_attn(inp_attn,
+                        model.layers[il].wo, model.layers[il].bo,
+                        Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
+            }
+
+            if (il == n_transformer_layers - 1 && inp_out_ids) {
+                cur   = ggml_get_rows(ctx0,   cur, inp_out_ids);
+                inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
+            }
+
+            ggml_tensor * sa_out = ggml_add(ctx0, cur, inpSA);
+            cb(sa_out, "sa_out", il);
+
+            // MoE branch
+            cur = build_norm(sa_out,
+                    model.layers[il].ffn_norm, NULL,
+                    LLM_NORM_RMS, il);
+            cb(cur, "ffn_norm", il);
+
+            if (static_cast<uint32_t>(il) < hparams.n_layer_dense_lead) {
+                cur = build_ffn(cur,
+                        model.layers[il].ffn_up,   NULL, NULL,
+                        model.layers[il].ffn_gate, NULL, NULL,
+                        model.layers[il].ffn_down, NULL, NULL,
+                        NULL,
+                        LLM_FFN_SILU, LLM_FFN_PAR, il);
+                cb(cur, "ffn_out", il);
+            } else {
+                ggml_tensor * moe_out =
+                    build_moe_ffn(cur,
+                            model.layers[il].ffn_gate_inp,
+                            model.layers[il].ffn_up_exps,
+                            model.layers[il].ffn_gate_exps,
+                            model.layers[il].ffn_down_exps,
+                            model.layers[il].ffn_exp_probs_b,
+                            n_expert, n_expert_used,
+                            LLM_FFN_SILU, hparams.expert_weights_norm,
+                            true, hparams.expert_weights_scale,
+                            (llama_expert_gating_func_type) hparams.expert_gating_func,
+                            il);
+                cb(moe_out, "ffn_moe_out", il);
+
+                {
+                    ggml_tensor * ffn_shexp = build_ffn(cur,
+                            model.layers[il].ffn_up_shexp,   NULL, NULL,
+                            model.layers[il].ffn_gate_shexp, NULL, NULL,
+                            model.layers[il].ffn_down_shexp, NULL, NULL,
+                            NULL,
+                            LLM_FFN_SILU, LLM_FFN_PAR, il);
+                    cb(ffn_shexp, "ffn_shexp", il);
+
+                    cur = ggml_add(ctx0, moe_out, ffn_shexp);
+                    cb(cur, "ffn_out", il);
+                }
+            }
+
+            cur = ggml_add(ctx0, cur, sa_out);
+
+            cur = build_cvec(cur, il);
+            cb(cur, "l_out", il);
+
+            // input for next layer
+            inpL = cur;
+        }
+
+        cur = inpL;
+
+        cur = build_norm(cur,
+                model.output_norm, NULL,
+                LLM_NORM_RMS, -1);
+
+        cb(cur, "result_norm", -1);
+        res->t_embd = cur;
+
+        // lm_head
+        cur = build_lora_mm(model.output, cur);
+
+        cb(cur, "result_output", -1);
+        res->t_logits = cur;
+
+        ggml_build_forward_expand(gf, cur);
+    }
+};
+
 struct llm_build_dots1 : public llm_graph_context {
     llm_build_dots1(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
         const int64_t n_embd_head = hparams.n_embd_head_v;
@@ -19838,6 +20093,10 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
             {
                 llm = std::make_unique<llm_build_bailingmoe>(*this, params);
             } break;
+        case LLM_ARCH_BAILINGMOE2:
+            {
+                llm = std::make_unique<llm_build_bailingmoe2>(*this, params);
+            } break;
         case LLM_ARCH_SEED_OSS:
             {
                 llm = std::make_unique<llm_build_seed_oss>(*this, params);
@@ -20104,6 +20363,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
         case LLM_ARCH_EXAONE:
         case LLM_ARCH_EXAONE4:
         case LLM_ARCH_MINICPM3:
+        case LLM_ARCH_BAILINGMOE2:
         case LLM_ARCH_DOTS1:
         case LLM_ARCH_HUNYUAN_MOE:
         case LLM_ARCH_OPENAI_MOE:
diff --git a/src/llama-model.h b/src/llama-model.h
index 05701e7d70c84..248f854101cd7 100644
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -109,8 +109,10 @@ enum llm_type {
     LLM_TYPE_A13B,
     LLM_TYPE_7B_A1B,
     LLM_TYPE_8B_A1B, // lfm2moe
+    LLM_TYPE_16B_A1B,
     LLM_TYPE_21B_A3B, // Ernie MoE small
     LLM_TYPE_30B_A3B,
+    LLM_TYPE_100B_A6B,
     LLM_TYPE_106B_A12B, // GLM-4.5-Air
     LLM_TYPE_235B_A22B,
     LLM_TYPE_300B_A47B, // Ernie MoE big
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
index 7fffd171491aa..639fecbd31745 100644
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@@ -1968,6 +1968,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
                 clean_spaces = false;
             } else if (
                 tokenizer_pre == "bailingmoe" ||
+                tokenizer_pre == "bailingmoe2" ||
                 tokenizer_pre == "llada-moe") {
                 pre_type = LLAMA_VOCAB_PRE_TYPE_BAILINGMOE;
                 clean_spaces = false;
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 82bb55ea0e184..fa98db2982ce7 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -3759,6 +3759,130 @@ struct test_clamp : public test_case {
     }
 };
 
+// GGML_OP_FLOOR
+struct test_floor : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    test_floor(ggml_type type = GGML_TYPE_F32,
+               std::array<int64_t, 4> ne = {10, 2, 2, 2})
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_floor(ctx, a);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -10.0f, 10.0f);
+        }
+    }
+};
+
+// GGML_OP_CEIL
+struct test_ceil : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    test_ceil(ggml_type type = GGML_TYPE_F32,
+              std::array<int64_t, 4> ne = {10, 2, 2, 2})
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_ceil(ctx, a);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -10.0f, 10.0f);
+        }
+    }
+};
+
+// GGML_OP_ROUND
+struct test_round : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    test_round(ggml_type type = GGML_TYPE_F32,
+               std::array<int64_t, 4> ne = {10, 2, 2, 2})
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_round(ctx, a);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -10.0f, 10.0f);
+        }
+    }
+};
+
+// GGML_OP_TRUNC
+struct test_trunc : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    test_trunc(ggml_type type = GGML_TYPE_F32,
+               std::array<int64_t, 4> ne = {10, 2, 2, 2})
+        : type(type), ne(ne) {}
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_param(a);
+        ggml_set_name(a, "a");
+
+        ggml_tensor * out = ggml_trunc(ctx, a);
+        ggml_set_name(out, "out");
+
+        return out;
+    }
+
+    void initialize_tensors(ggml_context * ctx) override {
+        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+            init_tensor_uniform(t, -10.0f, 10.0f);
+        }
+    }
+};
+
 // GGML_OP_DIAG_MASK_INF
 struct test_diag_mask_inf : public test_case {
     const ggml_type type;
@@ -6585,6 +6709,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
         test_cases.emplace_back(new test_cos       (type));
         test_cases.emplace_back(new test_clamp     (type));
         test_cases.emplace_back(new test_leaky_relu(type));
+        test_cases.emplace_back(new test_floor     (type));
+        test_cases.emplace_back(new test_ceil      (type));
+        test_cases.emplace_back(new test_round     (type));
+        test_cases.emplace_back(new test_trunc     (type));
         test_cases.emplace_back(new test_sqr       (type, {7, 1, 5, 3}));
         test_cases.emplace_back(new test_sqrt      (type, {7, 1, 5, 3}));
         test_cases.emplace_back(new test_log       (type, {7, 1, 5, 3}));
@@ -6592,6 +6720,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
         test_cases.emplace_back(new test_cos       (type, {7, 1, 5, 3}));
         test_cases.emplace_back(new test_clamp     (type, {7, 1, 5, 3}));
         test_cases.emplace_back(new test_leaky_relu(type, {7, 1, 5, 3}));
+        test_cases.emplace_back(new test_floor     (type, {7, 1, 5, 3}));
+        test_cases.emplace_back(new test_ceil      (type, {7, 1, 5, 3}));
+        test_cases.emplace_back(new test_round     (type, {7, 1, 5, 3}));
+        test_cases.emplace_back(new test_trunc     (type, {7, 1, 5, 3}));
     }
 
     test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 1, 1}, 5));
diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index c76f5778be8fe..08450a93cb3f4 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/package-lock.json b/tools/server/webui/package-lock.json
index 9cd6ef9138c95..f86b9282c9bb6 100644
--- a/tools/server/webui/package-lock.json
+++ b/tools/server/webui/package-lock.json
@@ -50,6 +50,7 @@
 				"eslint-plugin-svelte": "^3.0.0",
 				"fflate": "^0.8.2",
 				"globals": "^16.0.0",
+				"http-server": "^14.1.1",
 				"mdast": "^3.0.0",
 				"mdsvex": "^0.12.3",
 				"playwright": "^1.53.0",
@@ -2979,6 +2980,13 @@
 				"node": ">=4"
 			}
 		},
+		"node_modules/async": {
+			"version": "3.2.6",
+			"resolved": "https://registry.npmjs.org/async/-/async-3.2.6.tgz",
+			"integrity": "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/axe-core": {
 			"version": "4.10.3",
 			"resolved": "https://registry.npmjs.org/axe-core/-/axe-core-4.10.3.tgz",
@@ -3015,6 +3023,19 @@
 			"dev": true,
 			"license": "MIT"
 		},
+		"node_modules/basic-auth": {
+			"version": "2.0.1",
+			"resolved": "https://registry.npmjs.org/basic-auth/-/basic-auth-2.0.1.tgz",
+			"integrity": "sha512-NF+epuEdnUYVlGuhaxbbq+dvJttwLnGY+YixlXlME5KpQ5W3CnXA5cVTneY3SPbPDRkcjMbifrwmFYcClgOZeg==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"safe-buffer": "5.1.2"
+			},
+			"engines": {
+				"node": ">= 0.8"
+			}
+		},
 		"node_modules/better-opn": {
 			"version": "3.0.2",
 			"resolved": "https://registry.npmjs.org/better-opn/-/better-opn-3.0.2.tgz",
@@ -3125,6 +3146,37 @@
 				"node": ">=8"
 			}
 		},
+		"node_modules/call-bind-apply-helpers": {
+			"version": "1.0.2",
+			"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
+			"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"es-errors": "^1.3.0",
+				"function-bind": "^1.1.2"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			}
+		},
+		"node_modules/call-bound": {
+			"version": "1.0.4",
+			"resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz",
+			"integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"call-bind-apply-helpers": "^1.0.2",
+				"get-intrinsic": "^1.3.0"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
 		"node_modules/callsites": {
 			"version": "3.1.0",
 			"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -3335,6 +3387,16 @@
 				"node": ">= 0.6"
 			}
 		},
+		"node_modules/corser": {
+			"version": "2.0.1",
+			"resolved": "https://registry.npmjs.org/corser/-/corser-2.0.1.tgz",
+			"integrity": "sha512-utCYNzRSQIZNPIcGZdQc92UVJYAhtGAteCFg0yRaFm8f0P+CPtyGyHXJcGXnffjCybUCEx3FQ2G7U3/o9eIkVQ==",
+			"dev": true,
+			"license": "MIT",
+			"engines": {
+				"node": ">= 0.4.0"
+			}
+		},
 		"node_modules/cross-spawn": {
 			"version": "7.0.6",
 			"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
@@ -3520,6 +3582,21 @@
 			"dev": true,
 			"license": "MIT"
 		},
+		"node_modules/dunder-proto": {
+			"version": "1.0.1",
+			"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+			"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"call-bind-apply-helpers": "^1.0.1",
+				"es-errors": "^1.3.0",
+				"gopd": "^1.2.0"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			}
+		},
 		"node_modules/enhanced-resolve": {
 			"version": "5.18.2",
 			"resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.2.tgz",
@@ -3547,6 +3624,26 @@
 				"url": "https://github.com/fb55/entities?sponsor=1"
 			}
 		},
+		"node_modules/es-define-property": {
+			"version": "1.0.1",
+			"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+			"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+			"dev": true,
+			"license": "MIT",
+			"engines": {
+				"node": ">= 0.4"
+			}
+		},
+		"node_modules/es-errors": {
+			"version": "1.3.0",
+			"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+			"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+			"dev": true,
+			"license": "MIT",
+			"engines": {
+				"node": ">= 0.4"
+			}
+		},
 		"node_modules/es-module-lexer": {
 			"version": "1.7.0",
 			"resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz",
@@ -3554,6 +3651,19 @@
 			"dev": true,
 			"license": "MIT"
 		},
+		"node_modules/es-object-atoms": {
+			"version": "1.1.1",
+			"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+			"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"es-errors": "^1.3.0"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			}
+		},
 		"node_modules/es-toolkit": {
 			"version": "1.39.7",
 			"resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.39.7.tgz",
@@ -3885,6 +3995,13 @@
 				"node": ">=0.10.0"
 			}
 		},
+		"node_modules/eventemitter3": {
+			"version": "4.0.7",
+			"resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
+			"integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/expect-type": {
 			"version": "1.2.2",
 			"resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.2.tgz",
@@ -4058,6 +4175,27 @@
 			"dev": true,
 			"license": "ISC"
 		},
+		"node_modules/follow-redirects": {
+			"version": "1.15.11",
+			"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
+			"integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
+			"dev": true,
+			"funding": [
+				{
+					"type": "individual",
+					"url": "https://github.com/sponsors/RubenVerborgh"
+				}
+			],
+			"license": "MIT",
+			"engines": {
+				"node": ">=4.0"
+			},
+			"peerDependenciesMeta": {
+				"debug": {
+					"optional": true
+				}
+			}
+		},
 		"node_modules/fsevents": {
 			"version": "2.3.2",
 			"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
@@ -4073,6 +4211,55 @@
 				"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
 			}
 		},
+		"node_modules/function-bind": {
+			"version": "1.1.2",
+			"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+			"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+			"dev": true,
+			"license": "MIT",
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
+		"node_modules/get-intrinsic": {
+			"version": "1.3.0",
+			"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
+			"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"call-bind-apply-helpers": "^1.0.2",
+				"es-define-property": "^1.0.1",
+				"es-errors": "^1.3.0",
+				"es-object-atoms": "^1.1.1",
+				"function-bind": "^1.1.2",
+				"get-proto": "^1.0.1",
+				"gopd": "^1.2.0",
+				"has-symbols": "^1.1.0",
+				"hasown": "^2.0.2",
+				"math-intrinsics": "^1.1.0"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
+		"node_modules/get-proto": {
+			"version": "1.0.1",
+			"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+			"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"dunder-proto": "^1.0.1",
+				"es-object-atoms": "^1.0.0"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			}
+		},
 		"node_modules/glob-parent": {
 			"version": "6.0.2",
 			"resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
@@ -4099,6 +4286,19 @@
 				"url": "https://github.com/sponsors/sindresorhus"
 			}
 		},
+		"node_modules/gopd": {
+			"version": "1.2.0",
+			"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+			"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+			"dev": true,
+			"license": "MIT",
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
 		"node_modules/graceful-fs": {
 			"version": "4.2.11",
 			"resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
@@ -4123,6 +4323,32 @@
 				"node": ">=8"
 			}
 		},
+		"node_modules/has-symbols": {
+			"version": "1.1.0",
+			"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+			"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
+			"dev": true,
+			"license": "MIT",
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
+		"node_modules/hasown": {
+			"version": "2.0.2",
+			"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+			"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"function-bind": "^1.1.2"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			}
+		},
 		"node_modules/hast-util-from-dom": {
 			"version": "5.0.1",
 			"resolved": "https://registry.npmjs.org/hast-util-from-dom/-/hast-util-from-dom-5.0.1.tgz",
@@ -4363,6 +4589,16 @@
 				"url": "https://opencollective.com/unified"
 			}
 		},
+		"node_modules/he": {
+			"version": "1.2.0",
+			"resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz",
+			"integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==",
+			"dev": true,
+			"license": "MIT",
+			"bin": {
+				"he": "bin/he"
+			}
+		},
 		"node_modules/highlight.js": {
 			"version": "11.11.1",
 			"resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-11.11.1.tgz",
@@ -4372,6 +4608,19 @@
 				"node": ">=12.0.0"
 			}
 		},
+		"node_modules/html-encoding-sniffer": {
+			"version": "3.0.0",
+			"resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-3.0.0.tgz",
+			"integrity": "sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"whatwg-encoding": "^2.0.0"
+			},
+			"engines": {
+				"node": ">=12"
+			}
+		},
 		"node_modules/html-void-elements": {
 			"version": "3.0.0",
 			"resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz",
@@ -4382,6 +4631,62 @@
 				"url": "https://github.com/sponsors/wooorm"
 			}
 		},
+		"node_modules/http-proxy": {
+			"version": "1.18.1",
+			"resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz",
+			"integrity": "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"eventemitter3": "^4.0.0",
+				"follow-redirects": "^1.0.0",
+				"requires-port": "^1.0.0"
+			},
+			"engines": {
+				"node": ">=8.0.0"
+			}
+		},
+		"node_modules/http-server": {
+			"version": "14.1.1",
+			"resolved": "https://registry.npmjs.org/http-server/-/http-server-14.1.1.tgz",
+			"integrity": "sha512-+cbxadF40UXd9T01zUHgA+rlo2Bg1Srer4+B4NwIHdaGxAGGv59nYRnGGDJ9LBk7alpS0US+J+bLLdQOOkJq4A==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"basic-auth": "^2.0.1",
+				"chalk": "^4.1.2",
+				"corser": "^2.0.1",
+				"he": "^1.2.0",
+				"html-encoding-sniffer": "^3.0.0",
+				"http-proxy": "^1.18.1",
+				"mime": "^1.6.0",
+				"minimist": "^1.2.6",
+				"opener": "^1.5.1",
+				"portfinder": "^1.0.28",
+				"secure-compare": "3.0.1",
+				"union": "~0.5.0",
+				"url-join": "^4.0.1"
+			},
+			"bin": {
+				"http-server": "bin/http-server"
+			},
+			"engines": {
+				"node": ">=12"
+			}
+		},
+		"node_modules/iconv-lite": {
+			"version": "0.6.3",
+			"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
+			"integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"safer-buffer": ">= 2.1.2 < 3.0.0"
+			},
+			"engines": {
+				"node": ">=0.10.0"
+			}
+		},
 		"node_modules/ignore": {
 			"version": "5.3.2",
 			"resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -5008,6 +5313,16 @@
 				"url": "https://github.com/sponsors/wooorm"
 			}
 		},
+		"node_modules/math-intrinsics": {
+			"version": "1.1.0",
+			"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+			"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+			"dev": true,
+			"license": "MIT",
+			"engines": {
+				"node": ">= 0.4"
+			}
+		},
 		"node_modules/mdast": {
 			"version": "3.0.0",
 			"resolved": "https://registry.npmjs.org/mdast/-/mdast-3.0.0.tgz",
@@ -5976,6 +6291,19 @@
 				"url": "https://github.com/sponsors/jonschlinkert"
 			}
 		},
+		"node_modules/mime": {
+			"version": "1.6.0",
+			"resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
+			"integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
+			"dev": true,
+			"license": "MIT",
+			"bin": {
+				"mime": "cli.js"
+			},
+			"engines": {
+				"node": ">=4"
+			}
+		},
 		"node_modules/min-indent": {
 			"version": "1.0.1",
 			"resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz",
@@ -6009,6 +6337,16 @@
 				"node": "*"
 			}
 		},
+		"node_modules/minimist": {
+			"version": "1.2.8",
+			"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
+			"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
+			"dev": true,
+			"license": "MIT",
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
 		"node_modules/minipass": {
 			"version": "7.1.2",
 			"resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
@@ -6124,6 +6462,19 @@
 				"tslib": "^2.0.3"
 			}
 		},
+		"node_modules/object-inspect": {
+			"version": "1.13.4",
+			"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
+			"integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==",
+			"dev": true,
+			"license": "MIT",
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
 		"node_modules/open": {
 			"version": "8.4.2",
 			"resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz",
@@ -6142,6 +6493,16 @@
 				"url": "https://github.com/sponsors/sindresorhus"
 			}
 		},
+		"node_modules/opener": {
+			"version": "1.5.2",
+			"resolved": "https://registry.npmjs.org/opener/-/opener-1.5.2.tgz",
+			"integrity": "sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==",
+			"dev": true,
+			"license": "(WTFPL OR MIT)",
+			"bin": {
+				"opener": "bin/opener-bin.js"
+			}
+		},
 		"node_modules/optionator": {
 			"version": "0.9.4",
 			"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
@@ -6330,6 +6691,20 @@
 				"node": ">=18"
 			}
 		},
+		"node_modules/portfinder": {
+			"version": "1.0.38",
+			"resolved": "https://registry.npmjs.org/portfinder/-/portfinder-1.0.38.tgz",
+			"integrity": "sha512-rEwq/ZHlJIKw++XtLAO8PPuOQA/zaPJOZJ37BVuN97nLpMJeuDVLVGRwbFoBgLudgdTMP2hdRJP++H+8QOA3vg==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"async": "^3.2.6",
+				"debug": "^4.3.6"
+			},
+			"engines": {
+				"node": ">= 10.12"
+			}
+		},
 		"node_modules/postcss": {
 			"version": "8.5.6",
 			"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
@@ -6680,6 +7055,22 @@
 				"node": ">=6"
 			}
 		},
+		"node_modules/qs": {
+			"version": "6.14.0",
+			"resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz",
+			"integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==",
+			"dev": true,
+			"license": "BSD-3-Clause",
+			"dependencies": {
+				"side-channel": "^1.1.0"
+			},
+			"engines": {
+				"node": ">=0.6"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
 		"node_modules/queue-microtask": {
 			"version": "1.2.3",
 			"resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
@@ -6959,6 +7350,13 @@
 				"url": "https://opencollective.com/unified"
 			}
 		},
+		"node_modules/requires-port": {
+			"version": "1.0.0",
+			"resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz",
+			"integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/resolve-from": {
 			"version": "4.0.0",
 			"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@@ -7072,6 +7470,20 @@
 				"node": ">=6"
 			}
 		},
+		"node_modules/safe-buffer": {
+			"version": "5.1.2",
+			"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
+			"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
+			"dev": true,
+			"license": "MIT"
+		},
+		"node_modules/safer-buffer": {
+			"version": "2.1.2",
+			"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+			"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/scheduler": {
 			"version": "0.26.0",
 			"resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz",
@@ -7079,6 +7491,13 @@
 			"dev": true,
 			"license": "MIT"
 		},
+		"node_modules/secure-compare": {
+			"version": "3.0.1",
+			"resolved": "https://registry.npmjs.org/secure-compare/-/secure-compare-3.0.1.tgz",
+			"integrity": "sha512-AckIIV90rPDcBcglUwXPF3kg0P0qmPsPXAj6BBEENQE1p5yA1xfmDJzfi1Tappj37Pv2mVbKpL3Z1T+Nn7k1Qw==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/semver": {
 			"version": "7.7.2",
 			"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.2.tgz",
@@ -7122,6 +7541,82 @@
 				"node": ">=8"
 			}
 		},
+		"node_modules/side-channel": {
+			"version": "1.1.0",
+			"resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz",
+			"integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"es-errors": "^1.3.0",
+				"object-inspect": "^1.13.3",
+				"side-channel-list": "^1.0.0",
+				"side-channel-map": "^1.0.1",
+				"side-channel-weakmap": "^1.0.2"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
+		"node_modules/side-channel-list": {
+			"version": "1.0.0",
+			"resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz",
+			"integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"es-errors": "^1.3.0",
+				"object-inspect": "^1.13.3"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
+		"node_modules/side-channel-map": {
+			"version": "1.0.1",
+			"resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz",
+			"integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"call-bound": "^1.0.2",
+				"es-errors": "^1.3.0",
+				"get-intrinsic": "^1.2.5",
+				"object-inspect": "^1.13.3"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
+		"node_modules/side-channel-weakmap": {
+			"version": "1.0.2",
+			"resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz",
+			"integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"call-bound": "^1.0.2",
+				"es-errors": "^1.3.0",
+				"get-intrinsic": "^1.2.5",
+				"object-inspect": "^1.13.3",
+				"side-channel-map": "^1.0.1"
+			},
+			"engines": {
+				"node": ">= 0.4"
+			},
+			"funding": {
+				"url": "https://github.com/sponsors/ljharb"
+			}
+		},
 		"node_modules/siginfo": {
 			"version": "2.0.0",
 			"resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz",
@@ -7904,6 +8399,18 @@
 			"integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==",
 			"license": "MIT"
 		},
+		"node_modules/union": {
+			"version": "0.5.0",
+			"resolved": "https://registry.npmjs.org/union/-/union-0.5.0.tgz",
+			"integrity": "sha512-N6uOhuW6zO95P3Mel2I2zMsbsanvvtgn6jVqJv4vbVcz/JN0OkL9suomjQGmWtxJQXOCqUJvquc1sMeNz/IwlA==",
+			"dev": true,
+			"dependencies": {
+				"qs": "^6.4.0"
+			},
+			"engines": {
+				"node": ">= 0.8.0"
+			}
+		},
 		"node_modules/unist-util-find-after": {
 			"version": "5.0.0",
 			"resolved": "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-5.0.0.tgz",
@@ -8073,6 +8580,13 @@
 				"punycode": "^2.1.0"
 			}
 		},
+		"node_modules/url-join": {
+			"version": "4.0.1",
+			"resolved": "https://registry.npmjs.org/url-join/-/url-join-4.0.1.tgz",
+			"integrity": "sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==",
+			"dev": true,
+			"license": "MIT"
+		},
 		"node_modules/util-deprecate": {
 			"version": "1.0.2",
 			"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
@@ -8447,6 +8961,19 @@
 			"dev": true,
 			"license": "MIT"
 		},
+		"node_modules/whatwg-encoding": {
+			"version": "2.0.0",
+			"resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-2.0.0.tgz",
+			"integrity": "sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==",
+			"dev": true,
+			"license": "MIT",
+			"dependencies": {
+				"iconv-lite": "0.6.3"
+			},
+			"engines": {
+				"node": ">=12"
+			}
+		},
 		"node_modules/which": {
 			"version": "2.0.2",
 			"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
diff --git a/tools/server/webui/package.json b/tools/server/webui/package.json
index e073cd32f07e1..376f69015261b 100644
--- a/tools/server/webui/package.json
+++ b/tools/server/webui/package.json
@@ -52,6 +52,7 @@
 		"eslint-plugin-svelte": "^3.0.0",
 		"fflate": "^0.8.2",
 		"globals": "^16.0.0",
+		"http-server": "^14.1.1",
 		"mdast": "^3.0.0",
 		"mdsvex": "^0.12.3",
 		"playwright": "^1.53.0",
diff --git a/tools/server/webui/playwright.config.ts b/tools/server/webui/playwright.config.ts
index 90ca19b09f3ed..51688b394106a 100644
--- a/tools/server/webui/playwright.config.ts
+++ b/tools/server/webui/playwright.config.ts
@@ -2,8 +2,10 @@ import { defineConfig } from '@playwright/test';
 
 export default defineConfig({
 	webServer: {
-		command: 'npm run build && npx http-server ../public -p 8181',
-		port: 8181
+		command: 'npm run build && http-server ../public -p 8181',
+		port: 8181,
+		timeout: 120000,
+		reuseExistingServer: false
 	},
 	testDir: 'e2e'
 });
diff --git a/tools/server/webui/src/app.d.ts b/tools/server/webui/src/app.d.ts
index e9bb140939886..eb14d6fe45143 100644
--- a/tools/server/webui/src/app.d.ts
+++ b/tools/server/webui/src/app.d.ts
@@ -31,7 +31,8 @@ import type {
 	DatabaseMessageExtraAudioFile,
 	DatabaseMessageExtraImageFile,
 	DatabaseMessageExtraTextFile,
-	DatabaseMessageExtraPdfFile
+	DatabaseMessageExtraPdfFile,
+	DatabaseMessageExtraLegacyContext
 } from '$lib/types/database';
 
 import type {
@@ -73,6 +74,7 @@ declare global {
 		DatabaseMessageExtraImageFile,
 		DatabaseMessageExtraTextFile,
 		DatabaseMessageExtraPdfFile,
+		DatabaseMessageExtraLegacyContext,
 		SettingsConfigValue,
 		SettingsFieldConfig,
 		SettingsConfigType,
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte
index 0007c4c0b4597..e378139d1b626 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte
@@ -94,6 +94,17 @@
 					attachmentIndex: index,
 					textContent: attachment.content
 				});
+			} else if (attachment.type === 'context') {
+				// Legacy format from old webui - treat as text file
+				items.push({
+					id: `attachment-${index}`,
+					name: attachment.name,
+					type: 'text',
+					isImage: false,
+					attachment,
+					attachmentIndex: index,
+					textContent: attachment.content
+				});
 			} else if (attachment.type === 'audioFile') {
 				items.push({
 					id: `attachment-${index}`,
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte
index 6a7c0dd366e40..67a7fff54cb6b 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte
@@ -26,6 +26,7 @@
 		MimeTypeImage,
 		MimeTypeText
 	} from '$lib/enums/files';
+	import { isIMEComposing } from '$lib/utils/is-ime-composing';
 
 	interface Props {
 		class?: string;
@@ -97,7 +98,7 @@
 	}
 
 	async function handleKeydown(event: KeyboardEvent) {
-		if (event.key === 'Enter' && !event.shiftKey) {
+		if (event.key === 'Enter' && !event.shiftKey && !isIMEComposing(event)) {
 			event.preventDefault();
 
 			if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return;
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
index fed0cf712695f..7ade6bc61f333 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte
@@ -1,6 +1,7 @@
 <script lang="ts">
 	import { getDeletionInfo } from '$lib/stores/chat.svelte';
 	import { copyToClipboard } from '$lib/utils/copy';
+	import { isIMEComposing } from '$lib/utils/is-ime-composing';
 	import ChatMessageAssistant from './ChatMessageAssistant.svelte';
 	import ChatMessageUser from './ChatMessageUser.svelte';
 
@@ -93,7 +94,9 @@
 	}
 
 	function handleEditKeydown(event: KeyboardEvent) {
-		if (event.key === 'Enter' && !event.shiftKey) {
+		// Check for IME composition using isComposing property and keyCode 229 (specifically for IME composition on Safari)
+		// This prevents saving edit when confirming IME word selection (e.g., Japanese/Chinese input)
+		if (event.key === 'Enter' && !event.shiftKey && !isIMEComposing(event)) {
 			event.preventDefault();
 			handleSaveEdit();
 		} else if (event.key === 'Escape') {
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte b/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte
index c10d7dbf1d781..94b27caa369c8 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatProcessingInfo.svelte
@@ -7,18 +7,19 @@
 
 	const processingState = useProcessingState();
 
+	let isCurrentConversationLoading = $derived(isLoading());
 	let processingDetails = $derived(processingState.getProcessingDetails());
+	let showSlotsInfo = $derived(isCurrentConversationLoading || config().keepStatsVisible);
 
-	let showSlotsInfo = $derived(isLoading() || config().keepStatsVisible);
-
+	// Track loading state reactively by checking if conversation ID is in loading conversations array
 	$effect(() => {
 		const keepStatsVisible = config().keepStatsVisible;
 
-		if (keepStatsVisible || isLoading()) {
+		if (keepStatsVisible || isCurrentConversationLoading) {
 			processingState.startMonitoring();
 		}
 
-		if (!isLoading() && !keepStatsVisible) {
+		if (!isCurrentConversationLoading && !keepStatsVisible) {
 			setTimeout(() => {
 				if (!config().keepStatsVisible) {
 					processingState.stopMonitoring();
@@ -27,18 +28,20 @@
 		}
 	});
 
+	// Update processing state from stored timings
 	$effect(() => {
-		activeConversation();
-
+		const conversation = activeConversation();
 		const messages = activeMessages() as DatabaseMessage[];
 		const keepStatsVisible = config().keepStatsVisible;
 
-		if (keepStatsVisible) {
+		if (keepStatsVisible && conversation) {
 			if (messages.length === 0) {
-				slotsService.clearState();
+				slotsService.clearConversationState(conversation.id);
 				return;
 			}
 
+			// Search backwards through messages to find most recent assistant message with timing data
+			// Using reverse iteration for performance - avoids array copy and stops at first match
 			let foundTimingData = false;
 
 			for (let i = messages.length - 1; i >= 0; i--) {
@@ -47,15 +50,18 @@
 					foundTimingData = true;
 
 					slotsService
-						.updateFromTimingData({
-							prompt_n: message.timings.prompt_n || 0,
-							predicted_n: message.timings.predicted_n || 0,
-							predicted_per_second:
-								message.timings.predicted_n && message.timings.predicted_ms
-									? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
-									: 0,
-							cache_n: message.timings.cache_n || 0
-						})
+						.updateFromTimingData(
+							{
+								prompt_n: message.timings.prompt_n || 0,
+								predicted_n: message.timings.predicted_n || 0,
+								predicted_per_second:
+									message.timings.predicted_n && message.timings.predicted_ms
+										? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
+										: 0,
+								cache_n: message.timings.cache_n || 0
+							},
+							conversation.id
+						)
 						.catch((error) => {
 							console.warn('Failed to update processing state from stored timings:', error);
 						});
@@ -64,7 +70,7 @@
 			}
 
 			if (!foundTimingData) {
-				slotsService.clearState();
+				slotsService.clearConversationState(conversation.id);
 			}
 		}
 	});
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
index 374eb05ab0f46..16563537cc292 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte
@@ -83,6 +83,8 @@
 	let activeErrorDialog = $derived(errorDialog());
 	let isServerLoading = $derived(serverLoading());
 
+	let isCurrentConversationLoading = $derived(isLoading());
+
 	async function handleDeleteConfirm() {
 		const conversation = activeConversation();
 		if (conversation) {
@@ -254,7 +256,7 @@
 	});
 
 	$effect(() => {
-		if (isLoading() && autoScrollEnabled) {
+		if (isCurrentConversationLoading && autoScrollEnabled) {
 			scrollInterval = setInterval(scrollChatToBottom, AUTO_SCROLL_INTERVAL);
 		} else if (scrollInterval) {
 			clearInterval(scrollInterval);
@@ -305,7 +307,7 @@
 
 			<div class="conversation-chat-form pointer-events-auto rounded-t-3xl pb-4">
 				<ChatForm
-					isLoading={isLoading()}
+					isLoading={isCurrentConversationLoading}
 					onFileRemove={handleFileRemove}
 					onFileUpload={handleFileUpload}
 					onSend={handleSendMessage}
@@ -348,7 +350,7 @@
 
 			<div in:fly={{ y: 10, duration: 250, delay: 300 }}>
 				<ChatForm
-					isLoading={isLoading()}
+					isLoading={isCurrentConversationLoading}
 					onFileRemove={handleFileRemove}
 					onFileUpload={handleFileUpload}
 					onSend={handleSendMessage}
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
index bf17633095242..ad5d617b5ff64 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsDialog.svelte
@@ -9,9 +9,11 @@
 		Sun,
 		Moon,
 		ChevronLeft,
-		ChevronRight
+		ChevronRight,
+		Database
 	} from '@lucide/svelte';
 	import { ChatSettingsFooter, ChatSettingsFields } from '$lib/components/app';
+	import ImportExportTab from './ImportExportTab.svelte';
 	import * as Dialog from '$lib/components/ui/dialog';
 	import { ScrollArea } from '$lib/components/ui/scroll-area';
 	import { config, updateMultipleConfig } from '$lib/stores/settings.svelte';
@@ -205,6 +207,11 @@
 				}
 			]
 		},
+		{
+			title: 'Import/Export',
+			icon: Database,
+			fields: []
+		},
 		{
 			title: 'Developer',
 			icon: Code,
@@ -455,21 +462,25 @@
 
 			<ScrollArea class="max-h-[calc(100dvh-13.5rem)] flex-1 md:max-h-[calc(100vh-13.5rem)]">
 				<div class="space-y-6 p-4 md:p-6">
-					<div>
+					<div class="grid">
 						<div class="mb-6 flex hidden items-center gap-2 border-b border-border/30 pb-6 md:flex">
 							<currentSection.icon class="h-5 w-5" />
 
 							<h3 class="text-lg font-semibold">{currentSection.title}</h3>
 						</div>
 
-						<div class="space-y-6">
-							<ChatSettingsFields
-								fields={currentSection.fields}
-								{localConfig}
-								onConfigChange={handleConfigChange}
-								onThemeChange={handleThemeChange}
-							/>
-						</div>
+						{#if currentSection.title === 'Import/Export'}
+							<ImportExportTab />
+						{:else}
+							<div class="space-y-6">
+								<ChatSettingsFields
+									fields={currentSection.fields}
+									{localConfig}
+									onConfigChange={handleConfigChange}
+									onThemeChange={handleThemeChange}
+								/>
+							</div>
+						{/if}
 					</div>
 
 					<div class="mt-8 border-t pt-6">
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ConversationSelectionDialog.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ConversationSelectionDialog.svelte
new file mode 100644
index 0000000000000..bc92a50a26b64
--- /dev/null
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ConversationSelectionDialog.svelte
@@ -0,0 +1,249 @@
+<script lang="ts">
+	import { Search, X } from '@lucide/svelte';
+	import * as Dialog from '$lib/components/ui/dialog';
+	import { Button } from '$lib/components/ui/button';
+	import { Input } from '$lib/components/ui/input';
+	import { Checkbox } from '$lib/components/ui/checkbox';
+	import { ScrollArea } from '$lib/components/ui/scroll-area';
+	import { SvelteSet } from 'svelte/reactivity';
+
+	interface Props {
+		conversations: DatabaseConversation[];
+		messageCountMap?: Map<string, number>;
+		mode: 'export' | 'import';
+		onCancel: () => void;
+		onConfirm: (selectedConversations: DatabaseConversation[]) => void;
+		open?: boolean;
+	}
+
+	let {
+		conversations,
+		messageCountMap = new Map(),
+		mode,
+		onCancel,
+		onConfirm,
+		open = $bindable(false)
+	}: Props = $props();
+
+	let searchQuery = $state('');
+	let selectedIds = $state.raw<SvelteSet<string>>(new SvelteSet(conversations.map((c) => c.id)));
+	let lastClickedId = $state<string | null>(null);
+
+	let filteredConversations = $derived(
+		conversations.filter((conv) => {
+			const name = conv.name || 'Untitled conversation';
+			return name.toLowerCase().includes(searchQuery.toLowerCase());
+		})
+	);
+
+	let allSelected = $derived(
+		filteredConversations.length > 0 &&
+			filteredConversations.every((conv) => selectedIds.has(conv.id))
+	);
+
+	let someSelected = $derived(
+		filteredConversations.some((conv) => selectedIds.has(conv.id)) && !allSelected
+	);
+
+	function toggleConversation(id: string, shiftKey: boolean = false) {
+		const newSet = new SvelteSet(selectedIds);
+
+		if (shiftKey && lastClickedId !== null) {
+			const lastIndex = filteredConversations.findIndex((c) => c.id === lastClickedId);
+			const currentIndex = filteredConversations.findIndex((c) => c.id === id);
+
+			if (lastIndex !== -1 && currentIndex !== -1) {
+				const start = Math.min(lastIndex, currentIndex);
+				const end = Math.max(lastIndex, currentIndex);
+
+				const shouldSelect = !newSet.has(id);
+
+				for (let i = start; i <= end; i++) {
+					if (shouldSelect) {
+						newSet.add(filteredConversations[i].id);
+					} else {
+						newSet.delete(filteredConversations[i].id);
+					}
+				}
+
+				selectedIds = newSet;
+				return;
+			}
+		}
+
+		if (newSet.has(id)) {
+			newSet.delete(id);
+		} else {
+			newSet.add(id);
+		}
+
+		selectedIds = newSet;
+		lastClickedId = id;
+	}
+
+	function toggleAll() {
+		if (allSelected) {
+			const newSet = new SvelteSet(selectedIds);
+
+			filteredConversations.forEach((conv) => newSet.delete(conv.id));
+			selectedIds = newSet;
+		} else {
+			const newSet = new SvelteSet(selectedIds);
+
+			filteredConversations.forEach((conv) => newSet.add(conv.id));
+			selectedIds = newSet;
+		}
+	}
+
+	function handleConfirm() {
+		const selected = conversations.filter((conv) => selectedIds.has(conv.id));
+		onConfirm(selected);
+	}
+
+	function handleCancel() {
+		selectedIds = new SvelteSet(conversations.map((c) => c.id));
+		searchQuery = '';
+		lastClickedId = null;
+
+		onCancel();
+	}
+
+	let previousOpen = $state(false);
+
+	$effect(() => {
+		if (open && !previousOpen) {
+			selectedIds = new SvelteSet(conversations.map((c) => c.id));
+			searchQuery = '';
+			lastClickedId = null;
+		} else if (!open && previousOpen) {
+			onCancel();
+		}
+
+		previousOpen = open;
+	});
+</script>
+
+<Dialog.Root bind:open>
+	<Dialog.Portal>
+		<Dialog.Overlay class="z-[1000000]" />
+
+		<Dialog.Content class="z-[1000001] max-w-2xl">
+			<Dialog.Header>
+				<Dialog.Title>
+					Select Conversations to {mode === 'export' ? 'Export' : 'Import'}
+				</Dialog.Title>
+
+				<Dialog.Description>
+					{#if mode === 'export'}
+						Choose which conversations you want to export. Selected conversations will be downloaded
+						as a JSON file.
+					{:else}
+						Choose which conversations you want to import. Selected conversations will be merged
+						with your existing conversations.
+					{/if}
+				</Dialog.Description>
+			</Dialog.Header>
+
+			<div class="space-y-4">
+				<div class="relative">
+					<Search class="absolute top-1/2 left-3 h-4 w-4 -translate-y-1/2 text-muted-foreground" />
+
+					<Input bind:value={searchQuery} placeholder="Search conversations..." class="pr-9 pl-9" />
+
+					{#if searchQuery}
+						<button
+							class="absolute top-1/2 right-3 -translate-y-1/2 text-muted-foreground hover:text-foreground"
+							onclick={() => (searchQuery = '')}
+							type="button"
+						>
+							<X class="h-4 w-4" />
+						</button>
+					{/if}
+				</div>
+
+				<div class="flex items-center justify-between text-sm text-muted-foreground">
+					<span>
+						{selectedIds.size} of {conversations.length} selected
+						{#if searchQuery}
+							({filteredConversations.length} shown)
+						{/if}
+					</span>
+				</div>
+
+				<div class="overflow-hidden rounded-md border">
+					<ScrollArea class="h-[400px]">
+						<table class="w-full">
+							<thead class="sticky top-0 z-10 bg-muted">
+								<tr class="border-b">
+									<th class="w-12 p-3 text-left">
+										<Checkbox
+											checked={allSelected}
+											indeterminate={someSelected}
+											onCheckedChange={toggleAll}
+										/>
+									</th>
+
+									<th class="p-3 text-left text-sm font-medium">Conversation Name</th>
+
+									<th class="w-32 p-3 text-left text-sm font-medium">Messages</th>
+								</tr>
+							</thead>
+							<tbody>
+								{#if filteredConversations.length === 0}
+									<tr>
+										<td colspan="3" class="p-8 text-center text-sm text-muted-foreground">
+											{#if searchQuery}
+												No conversations found matching "{searchQuery}"
+											{:else}
+												No conversations available
+											{/if}
+										</td>
+									</tr>
+								{:else}
+									{#each filteredConversations as conv (conv.id)}
+										<tr
+											class="cursor-pointer border-b transition-colors hover:bg-muted/50"
+											onclick={(e) => toggleConversation(conv.id, e.shiftKey)}
+										>
+											<td class="p-3">
+												<Checkbox
+													checked={selectedIds.has(conv.id)}
+													onclick={(e) => {
+														e.preventDefault();
+														e.stopPropagation();
+														toggleConversation(conv.id, e.shiftKey);
+													}}
+												/>
+											</td>
+
+											<td class="p-3 text-sm">
+												<div
+													class="max-w-[17rem] truncate"
+													title={conv.name || 'Untitled conversation'}
+												>
+													{conv.name || 'Untitled conversation'}
+												</div>
+											</td>
+
+											<td class="p-3 text-sm text-muted-foreground">
+												{messageCountMap.get(conv.id) ?? 0}
+											</td>
+										</tr>
+									{/each}
+								{/if}
+							</tbody>
+						</table>
+					</ScrollArea>
+				</div>
+			</div>
+
+			<Dialog.Footer>
+				<Button variant="outline" onclick={handleCancel}>Cancel</Button>
+
+				<Button onclick={handleConfirm} disabled={selectedIds.size === 0}>
+					{mode === 'export' ? 'Export' : 'Import'} ({selectedIds.size})
+				</Button>
+			</Dialog.Footer>
+		</Dialog.Content>
+	</Dialog.Portal>
+</Dialog.Root>
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ImportExportTab.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ImportExportTab.svelte
new file mode 100644
index 0000000000000..19c982c7b45ea
--- /dev/null
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ImportExportTab.svelte
@@ -0,0 +1,255 @@
+<script lang="ts">
+	import { Download, Upload } from '@lucide/svelte';
+	import { Button } from '$lib/components/ui/button';
+	import ConversationSelectionDialog from './ConversationSelectionDialog.svelte';
+	import { DatabaseStore } from '$lib/stores/database';
+	import type { ExportedConversations } from '$lib/types/database';
+	import { createMessageCountMap } from '$lib/utils/conversation-utils';
+	import { chatStore } from '$lib/stores/chat.svelte';
+
+	let exportedConversations = $state<DatabaseConversation[]>([]);
+	let importedConversations = $state<DatabaseConversation[]>([]);
+	let showExportSummary = $state(false);
+	let showImportSummary = $state(false);
+
+	let showExportDialog = $state(false);
+	let showImportDialog = $state(false);
+	let availableConversations = $state<DatabaseConversation[]>([]);
+	let messageCountMap = $state<Map<string, number>>(new Map());
+	let fullImportData = $state<Array<{ conv: DatabaseConversation; messages: DatabaseMessage[] }>>(
+		[]
+	);
+
+	async function handleExportClick() {
+		try {
+			const allConversations = await DatabaseStore.getAllConversations();
+			if (allConversations.length === 0) {
+				alert('No conversations to export');
+				return;
+			}
+
+			const conversationsWithMessages = await Promise.all(
+				allConversations.map(async (conv) => {
+					const messages = await DatabaseStore.getConversationMessages(conv.id);
+					return { conv, messages };
+				})
+			);
+
+			messageCountMap = createMessageCountMap(conversationsWithMessages);
+			availableConversations = allConversations;
+			showExportDialog = true;
+		} catch (err) {
+			console.error('Failed to load conversations:', err);
+			alert('Failed to load conversations');
+		}
+	}
+
+	async function handleExportConfirm(selectedConversations: DatabaseConversation[]) {
+		try {
+			const allData: ExportedConversations = await Promise.all(
+				selectedConversations.map(async (conv) => {
+					const messages = await DatabaseStore.getConversationMessages(conv.id);
+					return { conv: $state.snapshot(conv), messages: $state.snapshot(messages) };
+				})
+			);
+
+			const blob = new Blob([JSON.stringify(allData, null, 2)], {
+				type: 'application/json'
+			});
+			const url = URL.createObjectURL(blob);
+			const a = document.createElement('a');
+
+			a.href = url;
+			a.download = `conversations_${new Date().toISOString().split('T')[0]}.json`;
+			document.body.appendChild(a);
+			a.click();
+			document.body.removeChild(a);
+			URL.revokeObjectURL(url);
+
+			exportedConversations = selectedConversations;
+			showExportSummary = true;
+			showImportSummary = false;
+			showExportDialog = false;
+		} catch (err) {
+			console.error('Export failed:', err);
+			alert('Failed to export conversations');
+		}
+	}
+
+	async function handleImportClick() {
+		try {
+			const input = document.createElement('input');
+
+			input.type = 'file';
+			input.accept = '.json';
+
+			input.onchange = async (e) => {
+				const file = (e.target as HTMLInputElement)?.files?.[0];
+				if (!file) return;
+
+				try {
+					const text = await file.text();
+					const parsedData = JSON.parse(text);
+					let importedData: ExportedConversations;
+
+					if (Array.isArray(parsedData)) {
+						importedData = parsedData;
+					} else if (
+						parsedData &&
+						typeof parsedData === 'object' &&
+						'conv' in parsedData &&
+						'messages' in parsedData
+					) {
+						// Single conversation object
+						importedData = [parsedData];
+					} else {
+						throw new Error(
+							'Invalid file format: expected array of conversations or single conversation object'
+						);
+					}
+
+					fullImportData = importedData;
+					availableConversations = importedData.map(
+						(item: { conv: DatabaseConversation; messages: DatabaseMessage[] }) => item.conv
+					);
+					messageCountMap = createMessageCountMap(importedData);
+					showImportDialog = true;
+				} catch (err: unknown) {
+					const message = err instanceof Error ? err.message : 'Unknown error';
+
+					console.error('Failed to parse file:', err);
+					alert(`Failed to parse file: ${message}`);
+				}
+			};
+
+			input.click();
+		} catch (err) {
+			console.error('Import failed:', err);
+			alert('Failed to import conversations');
+		}
+	}
+
+	async function handleImportConfirm(selectedConversations: DatabaseConversation[]) {
+		try {
+			const selectedIds = new Set(selectedConversations.map((c) => c.id));
+			const selectedData = $state
+				.snapshot(fullImportData)
+				.filter((item) => selectedIds.has(item.conv.id));
+
+			await DatabaseStore.importConversations(selectedData);
+
+			await chatStore.loadConversations();
+
+			importedConversations = selectedConversations;
+			showImportSummary = true;
+			showExportSummary = false;
+			showImportDialog = false;
+		} catch (err) {
+			console.error('Import failed:', err);
+			alert('Failed to import conversations. Please check the file format.');
+		}
+	}
+</script>
+
+<div class="space-y-6">
+	<div class="space-y-4">
+		<div class="grid">
+			<h4 class="mb-2 text-sm font-medium">Export Conversations</h4>
+
+			<p class="mb-4 text-sm text-muted-foreground">
+				Download all your conversations as a JSON file. This includes all messages, attachments, and
+				conversation history.
+			</p>
+
+			<Button
+				class="w-full justify-start justify-self-start md:w-auto"
+				onclick={handleExportClick}
+				variant="outline"
+			>
+				<Download class="mr-2 h-4 w-4" />
+
+				Export conversations
+			</Button>
+
+			{#if showExportSummary && exportedConversations.length > 0}
+				<div class="mt-4 grid overflow-x-auto rounded-lg border border-border/50 bg-muted/30 p-4">
+					<h5 class="mb-2 text-sm font-medium">
+						Exported {exportedConversations.length} conversation{exportedConversations.length === 1
+							? ''
+							: 's'}
+					</h5>
+
+					<ul class="space-y-1 text-sm text-muted-foreground">
+						{#each exportedConversations.slice(0, 10) as conv (conv.id)}
+							<li class="truncate">• {conv.name || 'Untitled conversation'}</li>
+						{/each}
+
+						{#if exportedConversations.length > 10}
+							<li class="italic">
+								... and {exportedConversations.length - 10} more
+							</li>
+						{/if}
+					</ul>
+				</div>
+			{/if}
+		</div>
+
+		<div class="grid border-t border-border/30 pt-4">
+			<h4 class="mb-2 text-sm font-medium">Import Conversations</h4>
+
+			<p class="mb-4 text-sm text-muted-foreground">
+				Import one or more conversations from a previously exported JSON file. This will merge with
+				your existing conversations.
+			</p>
+
+			<Button
+				class="w-full justify-start justify-self-start md:w-auto"
+				onclick={handleImportClick}
+				variant="outline"
+			>
+				<Upload class="mr-2 h-4 w-4" />
+				Import conversations
+			</Button>
+
+			{#if showImportSummary && importedConversations.length > 0}
+				<div class="mt-4 grid overflow-x-auto rounded-lg border border-border/50 bg-muted/30 p-4">
+					<h5 class="mb-2 text-sm font-medium">
+						Imported {importedConversations.length} conversation{importedConversations.length === 1
+							? ''
+							: 's'}
+					</h5>
+
+					<ul class="space-y-1 text-sm text-muted-foreground">
+						{#each importedConversations.slice(0, 10) as conv (conv.id)}
+							<li class="truncate">• {conv.name || 'Untitled conversation'}</li>
+						{/each}
+
+						{#if importedConversations.length > 10}
+							<li class="italic">
+								... and {importedConversations.length - 10} more
+							</li>
+						{/if}
+					</ul>
+				</div>
+			{/if}
+		</div>
+	</div>
+</div>
+
+<ConversationSelectionDialog
+	conversations={availableConversations}
+	{messageCountMap}
+	mode="export"
+	bind:open={showExportDialog}
+	onCancel={() => (showExportDialog = false)}
+	onConfirm={handleExportConfirm}
+/>
+
+<ConversationSelectionDialog
+	conversations={availableConversations}
+	{messageCountMap}
+	mode="import"
+	bind:open={showImportDialog}
+	onCancel={() => (showImportDialog = false)}
+	onConfirm={handleImportConfirm}
+/>
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte
index e91673e98b036..30d1f9d4b7e98 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarActions.svelte
@@ -1,9 +1,8 @@
 <script lang="ts">
-	import { Search, SquarePen, X, Download, Upload } from '@lucide/svelte';
+	import { Search, SquarePen, X } from '@lucide/svelte';
 	import { KeyboardShortcutInfo } from '$lib/components/app';
 	import { Button } from '$lib/components/ui/button';
 	import { Input } from '$lib/components/ui/input';
-	import { exportAllConversations, importConversations } from '$lib/stores/chat.svelte';
 
 	interface Props {
 		handleMobileSidebarItemClick: () => void;
@@ -78,34 +77,5 @@
 
 			<KeyboardShortcutInfo keys={['cmd', 'k']} />
 		</Button>
-
-		<Button
-			class="w-full justify-start text-sm"
-			onclick={() => {
-				importConversations().catch((err) => {
-					console.error('Import failed:', err);
-					// Optional: show toast or dialog
-				});
-			}}
-			variant="ghost"
-		>
-			<div class="flex items-center gap-2">
-				<Upload class="h-4 w-4" />
-				Import conversations
-			</div>
-		</Button>
-
-		<Button
-			class="w-full justify-start text-sm"
-			onclick={() => {
-				exportAllConversations();
-			}}
-			variant="ghost"
-		>
-			<div class="flex items-center gap-2">
-				<Download class="h-4 w-4" />
-				Export all conversations
-			</div>
-		</Button>
 	{/if}
 </div>
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarConversationItem.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarConversationItem.svelte
index b63e6f5962a7f..51692c8cfde26 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarConversationItem.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSidebar/ChatSidebarConversationItem.svelte
@@ -1,7 +1,7 @@
 <script lang="ts">
-	import { Trash2, Pencil, MoreHorizontal, Download } from '@lucide/svelte';
+	import { Trash2, Pencil, MoreHorizontal, Download, Loader2 } from '@lucide/svelte';
 	import { ActionDropdown } from '$lib/components/app';
-	import { downloadConversation } from '$lib/stores/chat.svelte';
+	import { downloadConversation, getAllLoadingConversations } from '$lib/stores/chat.svelte';
 	import { onMount } from 'svelte';
 
 	interface Props {
@@ -25,6 +25,8 @@
 	let renderActionsDropdown = $state(false);
 	let dropdownOpen = $state(false);
 
+	let isLoading = $derived(getAllLoadingConversations().includes(conversation.id));
+
 	function handleEdit(event: Event) {
 		event.stopPropagation();
 		onEdit?.(conversation.id);
@@ -83,11 +85,16 @@
 	onmouseover={handleMouseOver}
 	onmouseleave={handleMouseLeave}
 >
-	<!-- svelte-ignore a11y_click_events_have_key_events -->
-	<!-- svelte-ignore a11y_no_static_element_interactions -->
-	<span class="truncate text-sm font-medium" onclick={handleMobileSidebarItemClick}>
-		{conversation.name}
-	</span>
+	<div class="flex min-w-0 flex-1 items-center gap-2">
+		{#if isLoading}
+			<Loader2 class="h-3.5 w-3.5 shrink-0 animate-spin text-muted-foreground" />
+		{/if}
+		<!-- svelte-ignore a11y_click_events_have_key_events -->
+		<!-- svelte-ignore a11y_no_static_element_interactions -->
+		<span class="truncate text-sm font-medium" onclick={handleMobileSidebarItemClick}>
+			{conversation.name}
+		</span>
+	</div>
 
 	{#if renderActionsDropdown}
 		<div class="actions flex items-center">
diff --git a/tools/server/webui/src/lib/components/app/index.ts b/tools/server/webui/src/lib/components/app/index.ts
index 4c2cbdebe16eb..7b85db93db3f5 100644
--- a/tools/server/webui/src/lib/components/app/index.ts
+++ b/tools/server/webui/src/lib/components/app/index.ts
@@ -25,6 +25,8 @@ export { default as ChatScreen } from './chat/ChatScreen/ChatScreen.svelte';
 export { default as ChatSettingsDialog } from './chat/ChatSettings/ChatSettingsDialog.svelte';
 export { default as ChatSettingsFooter } from './chat/ChatSettings/ChatSettingsFooter.svelte';
 export { default as ChatSettingsFields } from './chat/ChatSettings/ChatSettingsFields.svelte';
+export { default as ImportExportTab } from './chat/ChatSettings/ImportExportTab.svelte';
+export { default as ConversationSelectionDialog } from './chat/ChatSettings/ConversationSelectionDialog.svelte';
 export { default as ParameterSourceIndicator } from './chat/ChatSettings/ParameterSourceIndicator.svelte';
 
 export { default as ChatSidebar } from './chat/ChatSidebar/ChatSidebar.svelte';
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index 37e60b85b5a6a..2c4e53a02b2da 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -29,7 +29,7 @@ import { slotsService } from './slots';
  * - Request lifecycle management (abort, cleanup)
  */
 export class ChatService {
-	private abortController: AbortController | null = null;
+	private abortControllers: Map<string, AbortController> = new Map();
 
 	/**
 	 * Sends a chat completion request to the llama.cpp server.
@@ -43,7 +43,8 @@ export class ChatService {
 	 */
 	async sendMessage(
 		messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
-		options: SettingsChatServiceOptions = {}
+		options: SettingsChatServiceOptions = {},
+		conversationId?: string
 	): Promise<string | void> {
 		const {
 			stream,
@@ -79,25 +80,25 @@ export class ChatService {
 
 		const currentConfig = config();
 
-		// Cancel any ongoing request and create a new abort controller
-		this.abort();
-		this.abortController = new AbortController();
+		const requestId = conversationId || 'default';
+
+		if (this.abortControllers.has(requestId)) {
+			this.abortControllers.get(requestId)?.abort();
+		}
+
+		const abortController = new AbortController();
+		this.abortControllers.set(requestId, abortController);
 
-		// Convert database messages with attachments to API format if needed
 		const normalizedMessages: ApiChatMessageData[] = messages
 			.map((msg) => {
-				// Check if this is a DatabaseMessage by checking for DatabaseMessage-specific fields
 				if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
-					// This is a DatabaseMessage, convert it
 					const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };
 					return ChatService.convertMessageToChatServiceData(dbMsg);
 				} else {
-					// This is already an ApiChatMessageData object
 					return msg as ApiChatMessageData;
 				}
 			})
 			.filter((msg) => {
-				// Filter out empty system messages
 				if (msg.role === 'system') {
 					const content = typeof msg.content === 'string' ? msg.content : '';
 
@@ -107,7 +108,6 @@ export class ChatService {
 				return true;
 			});
 
-		// Build base request body with system message injection
 		const processedMessages = this.injectSystemMessage(normalizedMessages);
 
 		const requestBody: ApiChatCompletionRequest = {
@@ -172,11 +172,10 @@ export class ChatService {
 					...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
 				},
 				body: JSON.stringify(requestBody),
-				signal: this.abortController.signal
+				signal: abortController.signal
 			});
 
 			if (!response.ok) {
-				// Use the new parseErrorResponse method to handle structured errors
 				const error = await this.parseErrorResponse(response);
 				if (onError) {
 					onError(error);
@@ -185,13 +184,16 @@ export class ChatService {
 			}
 
 			if (stream) {
-				return this.handleStreamResponse(
+				await this.handleStreamResponse(
 					response,
 					onChunk,
 					onComplete,
 					onError,
-					options.onReasoningChunk
+					options.onReasoningChunk,
+					conversationId,
+					abortController.signal
 				);
+				return;
 			} else {
 				return this.handleNonStreamResponse(response, onComplete, onError);
 			}
@@ -227,18 +229,19 @@ export class ChatService {
 				onError(userFriendlyError);
 			}
 			throw userFriendlyError;
+		} finally {
+			this.abortControllers.delete(requestId);
 		}
 	}
 
 	/**
-	 * Handles streaming response from the chat completion API.
-	 * Processes server-sent events and extracts content chunks from the stream.
-	 *
-	 * @param response - The fetch Response object containing the streaming data
+	 * Handles streaming response from the chat completion API
+	 * @param response - The Response object from the fetch request
 	 * @param onChunk - Optional callback invoked for each content chunk received
 	 * @param onComplete - Optional callback invoked when the stream is complete with full response
 	 * @param onError - Optional callback invoked if an error occurs during streaming
 	 * @param onReasoningChunk - Optional callback invoked for each reasoning content chunk
+	 * @param conversationId - Optional conversation ID for per-conversation state tracking
 	 * @returns {Promise<void>} Promise that resolves when streaming is complete
 	 * @throws {Error} if the stream cannot be read or parsed
 	 */
@@ -251,7 +254,9 @@ export class ChatService {
 			timings?: ChatMessageTimings
 		) => void,
 		onError?: (error: Error) => void,
-		onReasoningChunk?: (chunk: string) => void
+		onReasoningChunk?: (chunk: string) => void,
+		conversationId?: string,
+		abortSignal?: AbortSignal
 	): Promise<void> {
 		const reader = response.body?.getReader();
 
@@ -269,14 +274,20 @@ export class ChatService {
 		try {
 			let chunk = '';
 			while (true) {
+				if (abortSignal?.aborted) break;
+
 				const { done, value } = await reader.read();
 				if (done) break;
 
+				if (abortSignal?.aborted) break;
+
 				chunk += decoder.decode(value, { stream: true });
 				const lines = chunk.split('\n');
-				chunk = lines.pop() || ''; // Save incomplete line for next read
+				chunk = lines.pop() || '';
 
 				for (const line of lines) {
+					if (abortSignal?.aborted) break;
+
 					if (line.startsWith('data: ')) {
 						const data = line.slice(6);
 						if (data === '[DONE]') {
@@ -293,9 +304,7 @@ export class ChatService {
 							const promptProgress = parsed.prompt_progress;
 
 							if (timings || promptProgress) {
-								this.updateProcessingState(timings, promptProgress);
-
-								// Store the latest timing data
+								this.updateProcessingState(timings, promptProgress, conversationId);
 								if (timings) {
 									lastTimings = timings;
 								}
@@ -304,21 +313,29 @@ export class ChatService {
 							if (content) {
 								hasReceivedData = true;
 								aggregatedContent += content;
-								onChunk?.(content);
+								if (!abortSignal?.aborted) {
+									onChunk?.(content);
+								}
 							}
 
 							if (reasoningContent) {
 								hasReceivedData = true;
 								fullReasoningContent += reasoningContent;
-								onReasoningChunk?.(reasoningContent);
+								if (!abortSignal?.aborted) {
+									onReasoningChunk?.(reasoningContent);
+								}
 							}
 						} catch (e) {
 							console.error('Error parsing JSON chunk:', e);
 						}
 					}
 				}
+
+				if (abortSignal?.aborted) break;
 			}
 
+			if (abortSignal?.aborted) return;
+
 			if (streamFinished) {
 				if (!hasReceivedData && aggregatedContent.length === 0) {
 					const noResponseError = new Error('No response received from server. Please try again.');
@@ -445,6 +462,19 @@ export class ChatService {
 			});
 		}
 
+		// Handle legacy 'context' type from old webui (pasted content)
+		const legacyContextFiles = message.extra.filter(
+			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext =>
+				extra.type === 'context'
+		);
+
+		for (const legacyContextFile of legacyContextFiles) {
+			contentParts.push({
+				type: 'text',
+				text: `\n\n--- File: ${legacyContextFile.name} ---\n${legacyContextFile.content}`
+			});
+		}
+
 		const audioFiles = message.extra.filter(
 			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile =>
 				extra.type === 'audioFile'
@@ -520,10 +550,18 @@ export class ChatService {
 	 *
 	 * @public
 	 */
-	public abort(): void {
-		if (this.abortController) {
-			this.abortController.abort();
-			this.abortController = null;
+	public abort(conversationId?: string): void {
+		if (conversationId) {
+			const abortController = this.abortControllers.get(conversationId);
+			if (abortController) {
+				abortController.abort();
+				this.abortControllers.delete(conversationId);
+			}
+		} else {
+			for (const controller of this.abortControllers.values()) {
+				controller.abort();
+			}
+			this.abortControllers.clear();
 		}
 	}
 
@@ -581,7 +619,6 @@ export class ChatService {
 
 			return error;
 		} catch {
-			// If we can't parse the error response, return a generic error
 			const fallback = new Error(`Server error (${response.status}): ${response.statusText}`);
 			fallback.name = 'HttpError';
 			return fallback;
@@ -590,23 +627,25 @@ export class ChatService {
 
 	private updateProcessingState(
 		timings?: ChatMessageTimings,
-		promptProgress?: ChatMessagePromptProgress
+		promptProgress?: ChatMessagePromptProgress,
+		conversationId?: string
 	): void {
-		// Calculate tokens per second from timing data
 		const tokensPerSecond =
 			timings?.predicted_ms && timings?.predicted_n
 				? (timings.predicted_n / timings.predicted_ms) * 1000
 				: 0;
 
-		// Update slots service with timing data (async but don't wait)
 		slotsService
-			.updateFromTimingData({
-				prompt_n: timings?.prompt_n || 0,
-				predicted_n: timings?.predicted_n || 0,
-				predicted_per_second: tokensPerSecond,
-				cache_n: timings?.cache_n || 0,
-				prompt_progress: promptProgress
-			})
+			.updateFromTimingData(
+				{
+					prompt_n: timings?.prompt_n || 0,
+					predicted_n: timings?.predicted_n || 0,
+					predicted_per_second: tokensPerSecond,
+					cache_n: timings?.cache_n || 0,
+					prompt_progress: promptProgress
+				},
+				conversationId
+			)
 			.catch((error) => {
 				console.warn('Failed to update processing state:', error);
 			});
diff --git a/tools/server/webui/src/lib/services/slots.ts b/tools/server/webui/src/lib/services/slots.ts
index 06c0a77de9138..e99297d6a0506 100644
--- a/tools/server/webui/src/lib/services/slots.ts
+++ b/tools/server/webui/src/lib/services/slots.ts
@@ -37,6 +37,8 @@ export class SlotsService {
 	private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set();
 	private isStreamingActive: boolean = false;
 	private lastKnownState: ApiProcessingState | null = null;
+	private conversationStates: Map<string, ApiProcessingState | null> = new Map();
+	private activeConversationId: string | null = null;
 
 	/**
 	 * Start streaming session tracking
@@ -75,6 +77,62 @@ export class SlotsService {
 		return this.isStreamingActive;
 	}
 
+	/**
+	 * Set the active conversation for statistics display
+	 */
+	setActiveConversation(conversationId: string | null): void {
+		this.activeConversationId = conversationId;
+		this.notifyCallbacks();
+	}
+
+	/**
+	 * Update processing state for a specific conversation
+	 */
+	updateConversationState(conversationId: string, state: ApiProcessingState | null): void {
+		this.conversationStates.set(conversationId, state);
+
+		if (conversationId === this.activeConversationId) {
+			this.lastKnownState = state;
+			this.notifyCallbacks();
+		}
+	}
+
+	/**
+	 * Get processing state for a specific conversation
+	 */
+	getConversationState(conversationId: string): ApiProcessingState | null {
+		return this.conversationStates.get(conversationId) || null;
+	}
+
+	/**
+	 * Clear state for a specific conversation
+	 */
+	clearConversationState(conversationId: string): void {
+		this.conversationStates.delete(conversationId);
+
+		if (conversationId === this.activeConversationId) {
+			this.lastKnownState = null;
+			this.notifyCallbacks();
+		}
+	}
+
+	/**
+	 * Notify all callbacks with current state
+	 */
+	private notifyCallbacks(): void {
+		const currentState = this.activeConversationId
+			? this.conversationStates.get(this.activeConversationId) || null
+			: this.lastKnownState;
+
+		for (const callback of this.callbacks) {
+			try {
+				callback(currentState);
+			} catch (error) {
+				console.error('Error in slots service callback:', error);
+			}
+		}
+	}
+
 	/**
 	 * @deprecated Polling is no longer used - timing data comes from ChatService streaming response
 	 * This method logs a warning if called to help identify outdated usage
@@ -100,29 +158,29 @@ export class SlotsService {
 	/**
 	 * Updates processing state with timing data from ChatService streaming response
 	 */
-	async updateFromTimingData(timingData: {
-		prompt_n: number;
-		predicted_n: number;
-		predicted_per_second: number;
-		cache_n: number;
-		prompt_progress?: ChatMessagePromptProgress;
-	}): Promise<void> {
+	async updateFromTimingData(
+		timingData: {
+			prompt_n: number;
+			predicted_n: number;
+			predicted_per_second: number;
+			cache_n: number;
+			prompt_progress?: ChatMessagePromptProgress;
+		},
+		conversationId?: string
+	): Promise<void> {
 		const processingState = await this.parseCompletionTimingData(timingData);
 
-		// Only update if we successfully parsed the state
 		if (processingState === null) {
 			console.warn('Failed to parse timing data - skipping update');
+
 			return;
 		}
 
-		this.lastKnownState = processingState;
-
-		for (const callback of this.callbacks) {
-			try {
-				callback(processingState);
-			} catch (error) {
-				console.error('Error in timing callback:', error);
-			}
+		if (conversationId) {
+			this.updateConversationState(conversationId, processingState);
+		} else {
+			this.lastKnownState = processingState;
+			this.notifyCallbacks();
 		}
 	}
 
@@ -143,6 +201,7 @@ export class SlotsService {
 					...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
 				}
 			});
+
 			if (response.ok) {
 				const slotsData = await response.json();
 				if (Array.isArray(slotsData) && slotsData.length > 0) {
@@ -179,6 +238,7 @@ export class SlotsService {
 
 		if (contextTotal === null) {
 			console.warn('No context total available - cannot calculate processing state');
+
 			return null;
 		}
 
@@ -214,13 +274,21 @@ export class SlotsService {
 	/**
 	 * Get current processing state
 	 * Returns the last known state from timing data, or null if no data available
+	 * If activeConversationId is set, returns state for that conversation
 	 */
 	async getCurrentState(): Promise<ApiProcessingState | null> {
+		if (this.activeConversationId) {
+			const conversationState = this.conversationStates.get(this.activeConversationId);
+
+			if (conversationState) {
+				return conversationState;
+			}
+		}
+
 		if (this.lastKnownState) {
 			return this.lastKnownState;
 		}
 		try {
-			// Import dynamically to avoid circular dependency
 			const { chatStore } = await import('$lib/stores/chat.svelte');
 			const messages = chatStore.activeMessages;
 
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 5b77abb4cb21c..ccc67c7294263 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -6,6 +6,7 @@ import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/u
 import { browser } from '$app/environment';
 import { goto } from '$app/navigation';
 import { toast } from 'svelte-sonner';
+import { SvelteMap } from 'svelte/reactivity';
 import type { ExportedConversations } from '$lib/types/database';
 
 /**
@@ -50,6 +51,8 @@ class ChatStore {
 	errorDialogState = $state<{ type: 'timeout' | 'server'; message: string } | null>(null);
 	isInitialized = $state(false);
 	isLoading = $state(false);
+	conversationLoadingStates = new SvelteMap<string, boolean>();
+	conversationStreamingStates = new SvelteMap<string, { response: string; messageId: string }>();
 	titleUpdateConfirmationCallback?: (currentTitle: string, newTitle: string) => Promise<boolean>;
 
 	constructor() {
@@ -94,6 +97,13 @@ class ChatStore {
 		this.activeConversation = conversation;
 		this.activeMessages = [];
 
+		slotsService.setActiveConversation(conversation.id);
+
+		const isConvLoading = this.isConversationLoading(conversation.id);
+		this.isLoading = isConvLoading;
+
+		this.currentResponse = '';
+
 		await goto(`#/chat/${conversation.id}`);
 
 		return conversation.id;
@@ -114,6 +124,14 @@ class ChatStore {
 
 			this.activeConversation = conversation;
 
+			slotsService.setActiveConversation(convId);
+
+			const isConvLoading = this.isConversationLoading(convId);
+			this.isLoading = isConvLoading;
+
+			const streamingState = this.getConversationStreaming(convId);
+			this.currentResponse = streamingState?.response || '';
+
 			if (conversation.currNode) {
 				const allMessages = await DatabaseStore.getConversationMessages(convId);
 				this.activeMessages = filterByLeafNodeId(
@@ -285,6 +303,47 @@ class ChatStore {
 		return apiOptions;
 	}
 
+	/**
+	 * Helper methods for per-conversation loading state management
+	 */
+	private setConversationLoading(convId: string, loading: boolean): void {
+		if (loading) {
+			this.conversationLoadingStates.set(convId, true);
+			if (this.activeConversation?.id === convId) {
+				this.isLoading = true;
+			}
+		} else {
+			this.conversationLoadingStates.delete(convId);
+			if (this.activeConversation?.id === convId) {
+				this.isLoading = false;
+			}
+		}
+	}
+
+	private isConversationLoading(convId: string): boolean {
+		return this.conversationLoadingStates.get(convId) || false;
+	}
+
+	private setConversationStreaming(convId: string, response: string, messageId: string): void {
+		this.conversationStreamingStates.set(convId, { response, messageId });
+		if (this.activeConversation?.id === convId) {
+			this.currentResponse = response;
+		}
+	}
+
+	private clearConversationStreaming(convId: string): void {
+		this.conversationStreamingStates.delete(convId);
+		if (this.activeConversation?.id === convId) {
+			this.currentResponse = '';
+		}
+	}
+
+	private getConversationStreaming(
+		convId: string
+	): { response: string; messageId: string } | undefined {
+		return this.conversationStreamingStates.get(convId);
+	}
+
 	/**
 	 * Handles streaming chat completion with the AI model
 	 * @param allMessages - All messages in the conversation
@@ -325,125 +384,132 @@ class ChatStore {
 		};
 
 		slotsService.startStreaming();
+		slotsService.setActiveConversation(assistantMessage.convId);
 
-		await chatService.sendMessage(allMessages, {
-			...this.getApiOptions(),
-
-			onChunk: (chunk: string) => {
-				streamedContent += chunk;
-				this.currentResponse = streamedContent;
+		await chatService.sendMessage(
+			allMessages,
+			{
+				...this.getApiOptions(),
+
+				onChunk: (chunk: string) => {
+					streamedContent += chunk;
+					this.setConversationStreaming(
+						assistantMessage.convId,
+						streamedContent,
+						assistantMessage.id
+					);
 
-				captureModelIfNeeded();
-				const messageIndex = this.findMessageIndex(assistantMessage.id);
-				this.updateMessageAtIndex(messageIndex, {
-					content: streamedContent
-				});
-			},
+					captureModelIfNeeded();
+					const messageIndex = this.findMessageIndex(assistantMessage.id);
+					this.updateMessageAtIndex(messageIndex, {
+						content: streamedContent
+					});
+				},
 
-			onReasoningChunk: (reasoningChunk: string) => {
-				streamedReasoningContent += reasoningChunk;
+				onReasoningChunk: (reasoningChunk: string) => {
+					streamedReasoningContent += reasoningChunk;
 
-				captureModelIfNeeded();
+					captureModelIfNeeded();
 
-				const messageIndex = this.findMessageIndex(assistantMessage.id);
+					const messageIndex = this.findMessageIndex(assistantMessage.id);
 
-				this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent });
-			},
+					this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent });
+				},
 
-			onComplete: async (
-				finalContent?: string,
-				reasoningContent?: string,
-				timings?: ChatMessageTimings
-			) => {
-				slotsService.stopStreaming();
+				onComplete: async (
+					finalContent?: string,
+					reasoningContent?: string,
+					timings?: ChatMessageTimings
+				) => {
+					slotsService.stopStreaming();
+
+					const updateData: {
+						content: string;
+						thinking: string;
+						timings?: ChatMessageTimings;
+						model?: string;
+					} = {
+						content: finalContent || streamedContent,
+						thinking: reasoningContent || streamedReasoningContent,
+						timings: timings
+					};
 
-				const updateData: {
-					content: string;
-					thinking: string;
-					timings?: ChatMessageTimings;
-					model?: string;
-				} = {
-					content: finalContent || streamedContent,
-					thinking: reasoningContent || streamedReasoningContent,
-					timings: timings
-				};
+					const capturedModel = captureModelIfNeeded(false);
 
-				const capturedModel = captureModelIfNeeded(false);
+					if (capturedModel) {
+						updateData.model = capturedModel;
+					}
 
-				if (capturedModel) {
-					updateData.model = capturedModel;
-				}
+					await DatabaseStore.updateMessage(assistantMessage.id, updateData);
 
-				await DatabaseStore.updateMessage(assistantMessage.id, updateData);
+					const messageIndex = this.findMessageIndex(assistantMessage.id);
 
-				const messageIndex = this.findMessageIndex(assistantMessage.id);
+					const localUpdateData: { timings?: ChatMessageTimings; model?: string } = {
+						timings: timings
+					};
 
-				const localUpdateData: { timings?: ChatMessageTimings; model?: string } = {
-					timings: timings
-				};
+					if (updateData.model) {
+						localUpdateData.model = updateData.model;
+					}
 
-				if (updateData.model) {
-					localUpdateData.model = updateData.model;
-				}
+					this.updateMessageAtIndex(messageIndex, localUpdateData);
 
-				this.updateMessageAtIndex(messageIndex, localUpdateData);
+					await DatabaseStore.updateCurrentNode(assistantMessage.convId, assistantMessage.id);
 
-				await DatabaseStore.updateCurrentNode(this.activeConversation!.id, assistantMessage.id);
-				this.activeConversation!.currNode = assistantMessage.id;
-				await this.refreshActiveMessages();
+					if (this.activeConversation?.id === assistantMessage.convId) {
+						this.activeConversation.currNode = assistantMessage.id;
+						await this.refreshActiveMessages();
+					}
 
-				if (onComplete) {
-					await onComplete(streamedContent);
-				}
+					if (onComplete) {
+						await onComplete(streamedContent);
+					}
 
-				this.isLoading = false;
-				this.currentResponse = '';
-			},
+					this.setConversationLoading(assistantMessage.convId, false);
+					this.clearConversationStreaming(assistantMessage.convId);
+					slotsService.clearConversationState(assistantMessage.convId);
+				},
 
-			onError: (error: Error) => {
-				slotsService.stopStreaming();
+				onError: (error: Error) => {
+					slotsService.stopStreaming();
 
-				if (error.name === 'AbortError' || error instanceof DOMException) {
-					this.isLoading = false;
-					this.currentResponse = '';
-					return;
-				}
+					if (this.isAbortError(error)) {
+						this.setConversationLoading(assistantMessage.convId, false);
+						this.clearConversationStreaming(assistantMessage.convId);
+						slotsService.clearConversationState(assistantMessage.convId);
+						return;
+					}
 
-				console.error('Streaming error:', error);
-				this.isLoading = false;
-				this.currentResponse = '';
+					console.error('Streaming error:', error);
+					this.setConversationLoading(assistantMessage.convId, false);
+					this.clearConversationStreaming(assistantMessage.convId);
+					slotsService.clearConversationState(assistantMessage.convId);
 
-				const messageIndex = this.activeMessages.findIndex(
-					(m: DatabaseMessage) => m.id === assistantMessage.id
-				);
+					const messageIndex = this.activeMessages.findIndex(
+						(m: DatabaseMessage) => m.id === assistantMessage.id
+					);
 
-				if (messageIndex !== -1) {
-					const [failedMessage] = this.activeMessages.splice(messageIndex, 1);
+					if (messageIndex !== -1) {
+						const [failedMessage] = this.activeMessages.splice(messageIndex, 1);
 
-					if (failedMessage) {
-						DatabaseStore.deleteMessage(failedMessage.id).catch((cleanupError) => {
-							console.error('Failed to remove assistant message after error:', cleanupError);
-						});
+						if (failedMessage) {
+							DatabaseStore.deleteMessage(failedMessage.id).catch((cleanupError) => {
+								console.error('Failed to remove assistant message after error:', cleanupError);
+							});
+						}
 					}
-				}
 
-				const dialogType = error.name === 'TimeoutError' ? 'timeout' : 'server';
+					const dialogType = error.name === 'TimeoutError' ? 'timeout' : 'server';
 
-				this.showErrorDialog(dialogType, error.message);
+					this.showErrorDialog(dialogType, error.message);
 
-				if (onError) {
-					onError(error);
+					if (onError) {
+						onError(error);
+					}
 				}
-			}
-		});
-	}
-
-	private showErrorDialog(type: 'timeout' | 'server', message: string): void {
-		this.errorDialogState = { type, message };
-	}
-
-	dismissErrorDialog(): void {
-		this.errorDialogState = null;
+			},
+			assistantMessage.convId
+		);
 	}
 
 	/**
@@ -455,6 +521,14 @@ class ChatStore {
 		return error instanceof Error && (error.name === 'AbortError' || error instanceof DOMException);
 	}
 
+	private showErrorDialog(type: 'timeout' | 'server', message: string): void {
+		this.errorDialogState = { type, message };
+	}
+
+	dismissErrorDialog(): void {
+		this.errorDialogState = null;
+	}
+
 	/**
 	 * Finds the index of a message in the active messages array
 	 * @param messageId - The message ID to find
@@ -519,7 +593,12 @@ class ChatStore {
 	 * @param extras - Optional extra data (files, attachments, etc.)
 	 */
 	async sendMessage(content: string, extras?: DatabaseMessageExtra[]): Promise<void> {
-		if ((!content.trim() && (!extras || extras.length === 0)) || this.isLoading) return;
+		if (!content.trim() && (!extras || extras.length === 0)) return;
+
+		if (this.activeConversation && this.isConversationLoading(this.activeConversation.id)) {
+			console.log('Cannot send message: current conversation is already processing a message');
+			return;
+		}
 
 		let isNewConversation = false;
 
@@ -534,8 +613,9 @@ class ChatStore {
 		}
 
 		this.errorDialogState = null;
-		this.isLoading = true;
-		this.currentResponse = '';
+
+		this.setConversationLoading(this.activeConversation.id, true);
+		this.clearConversationStreaming(this.activeConversation.id);
 
 		let userMessage: DatabaseMessage | null = null;
 
@@ -546,7 +626,6 @@ class ChatStore {
 				throw new Error('Failed to add user message');
 			}
 
-			// If this is a new conversation, update the title with the first user prompt
 			if (isNewConversation && content) {
 				const title = content.trim();
 				await this.updateConversationName(this.activeConversation.id, title);
@@ -559,19 +638,18 @@ class ChatStore {
 			}
 
 			this.activeMessages.push(assistantMessage);
-			// Don't update currNode until after streaming completes to maintain proper conversation path
 
 			const conversationContext = this.activeMessages.slice(0, -1);
 
 			await this.streamChatCompletion(conversationContext, assistantMessage);
 		} catch (error) {
 			if (this.isAbortError(error)) {
-				this.isLoading = false;
+				this.setConversationLoading(this.activeConversation!.id, false);
 				return;
 			}
 
 			console.error('Failed to send message:', error);
-			this.isLoading = false;
+			this.setConversationLoading(this.activeConversation!.id, false);
 			if (!this.errorDialogState) {
 				if (error instanceof Error) {
 					const dialogType = error.name === 'TimeoutError' ? 'timeout' : 'server';
@@ -587,12 +665,19 @@ class ChatStore {
 	 * Stops the current message generation
 	 * Aborts ongoing requests and saves partial response if available
 	 */
-	stopGeneration(): void {
+	async stopGeneration(): Promise<void> {
+		if (!this.activeConversation) return;
+
+		const convId = this.activeConversation.id;
+
+		await this.savePartialResponseIfNeeded(convId);
+
 		slotsService.stopStreaming();
-		chatService.abort();
-		this.savePartialResponseIfNeeded();
-		this.isLoading = false;
-		this.currentResponse = '';
+		chatService.abort(convId);
+
+		this.setConversationLoading(convId, false);
+		this.clearConversationStreaming(convId);
+		slotsService.clearConversationState(convId);
 	}
 
 	/**
@@ -604,6 +689,9 @@ class ChatStore {
 		slotsService.stopStreaming();
 		chatService.abort();
 		await this.savePartialResponseIfNeeded();
+
+		this.conversationLoadingStates.clear();
+		this.conversationStreamingStates.clear();
 		this.isLoading = false;
 		this.currentResponse = '';
 	}
@@ -612,12 +700,23 @@ class ChatStore {
 	 * Saves partial response if generation was interrupted
 	 * Preserves user's partial content and timing data when generation is stopped early
 	 */
-	private async savePartialResponseIfNeeded(): Promise<void> {
-		if (!this.currentResponse.trim() || !this.activeMessages.length) {
+	private async savePartialResponseIfNeeded(convId?: string): Promise<void> {
+		const conversationId = convId || this.activeConversation?.id;
+		if (!conversationId) return;
+
+		const streamingState = this.conversationStreamingStates.get(conversationId);
+		if (!streamingState || !streamingState.response.trim()) {
 			return;
 		}
 
-		const lastMessage = this.activeMessages[this.activeMessages.length - 1];
+		const messages =
+			conversationId === this.activeConversation?.id
+				? this.activeMessages
+				: await DatabaseStore.getConversationMessages(conversationId);
+
+		if (!messages.length) return;
+
+		const lastMessage = messages[messages.length - 1];
 
 		if (lastMessage && lastMessage.role === 'assistant') {
 			try {
@@ -626,7 +725,7 @@ class ChatStore {
 					thinking?: string;
 					timings?: ChatMessageTimings;
 				} = {
-					content: this.currentResponse
+					content: streamingState.response
 				};
 
 				if (lastMessage.thinking?.trim()) {
@@ -640,7 +739,6 @@ class ChatStore {
 						prompt_n: lastKnownState.promptTokens || 0,
 						predicted_n: lastKnownState.tokensDecoded || 0,
 						cache_n: lastKnownState.cacheTokens || 0,
-						// We don't have ms data from the state, but we can estimate
 						predicted_ms:
 							lastKnownState.tokensPerSecond && lastKnownState.tokensDecoded
 								? (lastKnownState.tokensDecoded / lastKnownState.tokensPerSecond) * 1000
@@ -701,7 +799,6 @@ class ChatStore {
 			this.updateMessageAtIndex(messageIndex, { content: newContent });
 			await DatabaseStore.updateMessage(messageId, { content: newContent });
 
-			// If this is the first user message, update the conversation title with confirmation if needed
 			if (isFirstUserMessage && newContent.trim()) {
 				await this.updateConversationTitleWithConfirmation(
 					this.activeConversation.id,
@@ -718,8 +815,8 @@ class ChatStore {
 			this.activeMessages = this.activeMessages.slice(0, messageIndex + 1);
 			this.updateConversationTimestamp();
 
-			this.isLoading = true;
-			this.currentResponse = '';
+			this.setConversationLoading(this.activeConversation.id, true);
+			this.clearConversationStreaming(this.activeConversation.id);
 
 			try {
 				const assistantMessage = await this.createAssistantMessage();
@@ -742,7 +839,7 @@ class ChatStore {
 				);
 			} catch (regenerateError) {
 				console.error('Failed to regenerate response:', regenerateError);
-				this.isLoading = false;
+				this.setConversationLoading(this.activeConversation!.id, false);
 
 				const messageIndex = this.findMessageIndex(messageId);
 				this.updateMessageAtIndex(messageIndex, { content: originalContent });
@@ -784,8 +881,8 @@ class ChatStore {
 			this.activeMessages = this.activeMessages.slice(0, messageIndex);
 			this.updateConversationTimestamp();
 
-			this.isLoading = true;
-			this.currentResponse = '';
+			this.setConversationLoading(this.activeConversation.id, true);
+			this.clearConversationStreaming(this.activeConversation.id);
 
 			try {
 				const parentMessageId =
@@ -806,7 +903,7 @@ class ChatStore {
 				await this.streamChatCompletion(conversationContext, assistantMessage);
 			} catch (regenerateError) {
 				console.error('Failed to regenerate response:', regenerateError);
-				this.isLoading = false;
+				this.setConversationLoading(this.activeConversation!.id, false);
 			}
 		} catch (error) {
 			if (this.isAbortError(error)) return;
@@ -862,7 +959,6 @@ class ChatStore {
 		try {
 			const currentConfig = config();
 
-			// Only ask for confirmation if the setting is enabled and callback is provided
 			if (currentConfig.askForTitleConfirmation && onConfirmationNeeded) {
 				const conversation = await DatabaseStore.getConversation(convId);
 				if (!conversation) return false;
@@ -944,8 +1040,9 @@ class ChatStore {
 
 	/**
 	 * Exports all conversations with their messages as a JSON file
+	 * Returns the list of exported conversations
 	 */
-	async exportAllConversations(): Promise<void> {
+	async exportAllConversations(): Promise<DatabaseConversation[]> {
 		try {
 			const allConversations = await DatabaseStore.getAllConversations();
 			if (allConversations.length === 0) {
@@ -972,6 +1069,7 @@ class ChatStore {
 			URL.revokeObjectURL(url);
 
 			toast.success(`All conversations (${allConversations.length}) prepared for download`);
+			return allConversations;
 		} catch (err) {
 			console.error('Failed to export conversations:', err);
 			throw err;
@@ -982,8 +1080,9 @@ class ChatStore {
 	 * Imports conversations from a JSON file.
 	 * Supports both single conversation (object) and multiple conversations (array).
 	 * Uses DatabaseStore for safe, encapsulated data access
+	 * Returns the list of imported conversations
 	 */
-	async importConversations(): Promise<void> {
+	async importConversations(): Promise<DatabaseConversation[]> {
 		return new Promise((resolve, reject) => {
 			const input = document.createElement('input');
 			input.type = 'file';
@@ -1024,7 +1123,9 @@ class ChatStore {
 
 					toast.success(`Imported ${result.imported} conversation(s), skipped ${result.skipped}`);
 
-					resolve(undefined);
+					// Extract the conversation objects from imported data
+					const importedConversations = importedData.map((item) => item.conv);
+					resolve(importedConversations);
 				} catch (err: unknown) {
 					const message = err instanceof Error ? err.message : 'Unknown error';
 					console.error('Failed to import conversations:', err);
@@ -1170,14 +1271,16 @@ class ChatStore {
 	}
 
 	/**
-	 * Clears the active conversation and resets state
+	 * Clears the active conversation and messages
 	 * Used when navigating away from chat or starting fresh
+	 * Note: Does not stop ongoing streaming to allow background completion
 	 */
 	clearActiveConversation(): void {
 		this.activeConversation = null;
 		this.activeMessages = [];
-		this.currentResponse = '';
 		this.isLoading = false;
+		this.currentResponse = '';
+		slotsService.setActiveConversation(null);
 	}
 
 	/** Refreshes active messages based on currNode after branch navigation */
@@ -1419,8 +1522,8 @@ class ChatStore {
 				return;
 			}
 
-			this.isLoading = true;
-			this.currentResponse = '';
+			this.setConversationLoading(this.activeConversation.id, true);
+			this.clearConversationStreaming(this.activeConversation.id);
 
 			const newAssistantMessage = await DatabaseStore.createMessageBranch(
 				{
@@ -1454,7 +1557,7 @@ class ChatStore {
 			if (this.isAbortError(error)) return;
 
 			console.error('Failed to regenerate message with branching:', error);
-			this.isLoading = false;
+			this.setConversationLoading(this.activeConversation!.id, false);
 		}
 	}
 
@@ -1466,8 +1569,8 @@ class ChatStore {
 		if (!this.activeConversation) return;
 
 		this.errorDialogState = null;
-		this.isLoading = true;
-		this.currentResponse = '';
+		this.setConversationLoading(this.activeConversation.id, true);
+		this.clearConversationStreaming(this.activeConversation.id);
 
 		try {
 			// Get conversation path up to the user message
@@ -1499,9 +1602,30 @@ class ChatStore {
 			await this.streamChatCompletion(conversationPath, assistantMessage);
 		} catch (error) {
 			console.error('Failed to generate response:', error);
-			this.isLoading = false;
+			this.setConversationLoading(this.activeConversation!.id, false);
 		}
 	}
+
+	/**
+	 * Public methods for accessing per-conversation states
+	 */
+	public isConversationLoadingPublic(convId: string): boolean {
+		return this.isConversationLoading(convId);
+	}
+
+	public getConversationStreamingPublic(
+		convId: string
+	): { response: string; messageId: string } | undefined {
+		return this.getConversationStreaming(convId);
+	}
+
+	public getAllLoadingConversations(): string[] {
+		return Array.from(this.conversationLoadingStates.keys());
+	}
+
+	public getAllStreamingConversations(): string[] {
+		return Array.from(this.conversationStreamingStates.keys());
+	}
 }
 
 export const chatStore = new ChatStore();
@@ -1541,3 +1665,11 @@ export function stopGeneration() {
 	chatStore.stopGeneration();
 }
 export const messages = () => chatStore.activeMessages;
+
+// Per-conversation state access
+export const isConversationLoading = (convId: string) =>
+	chatStore.isConversationLoadingPublic(convId);
+export const getConversationStreaming = (convId: string) =>
+	chatStore.getConversationStreamingPublic(convId);
+export const getAllLoadingConversations = () => chatStore.getAllLoadingConversations();
+export const getAllStreamingConversations = () => chatStore.getAllStreamingConversations();
diff --git a/tools/server/webui/src/lib/types/database.d.ts b/tools/server/webui/src/lib/types/database.d.ts
index 7f6b76ba271cc..b5318b73f4108 100644
--- a/tools/server/webui/src/lib/types/database.d.ts
+++ b/tools/server/webui/src/lib/types/database.d.ts
@@ -34,11 +34,22 @@ export interface DatabaseMessageExtraPdfFile {
 	processedAsImages: boolean; // Whether PDF was processed as images
 }
 
+/**
+ * Legacy format from old webui - pasted content was stored as "context" type
+ * @deprecated Use DatabaseMessageExtraTextFile instead
+ */
+export interface DatabaseMessageExtraLegacyContext {
+	type: 'context';
+	name: string;
+	content: string;
+}
+
 export type DatabaseMessageExtra =
 	| DatabaseMessageExtraImageFile
 	| DatabaseMessageExtraTextFile
 	| DatabaseMessageExtraAudioFile
-	| DatabaseMessageExtraPdfFile;
+	| DatabaseMessageExtraPdfFile
+	| DatabaseMessageExtraLegacyContext;
 
 export interface DatabaseMessage {
 	id: string;
diff --git a/tools/server/webui/src/lib/utils/conversation-utils.ts b/tools/server/webui/src/lib/utils/conversation-utils.ts
new file mode 100644
index 0000000000000..aee244a08055e
--- /dev/null
+++ b/tools/server/webui/src/lib/utils/conversation-utils.ts
@@ -0,0 +1,30 @@
+/**
+ * Utility functions for conversation data manipulation
+ */
+
+/**
+ * Creates a map of conversation IDs to their message counts from exported conversation data
+ * @param exportedData - Array of exported conversations with their messages
+ * @returns Map of conversation ID to message count
+ */
+export function createMessageCountMap(
+	exportedData: Array<{ conv: DatabaseConversation; messages: DatabaseMessage[] }>
+): Map<string, number> {
+	const countMap = new Map<string, number>();
+
+	for (const item of exportedData) {
+		countMap.set(item.conv.id, item.messages.length);
+	}
+
+	return countMap;
+}
+
+/**
+ * Gets the message count for a specific conversation from the count map
+ * @param conversationId - The ID of the conversation
+ * @param countMap - Map of conversation IDs to message counts
+ * @returns The message count, or 0 if not found
+ */
+export function getMessageCount(conversationId: string, countMap: Map<string, number>): number {
+	return countMap.get(conversationId) ?? 0;
+}
diff --git a/tools/server/webui/src/lib/utils/is-ime-composing.ts b/tools/server/webui/src/lib/utils/is-ime-composing.ts
new file mode 100644
index 0000000000000..9182ea4f3603f
--- /dev/null
+++ b/tools/server/webui/src/lib/utils/is-ime-composing.ts
@@ -0,0 +1,5 @@
+export function isIMEComposing(event: KeyboardEvent) {
+	// Check for IME composition using isComposing property and keyCode 229 (specifically for IME composition on Safari, which is notorious for not supporting KeyboardEvent.isComposing)
+	// This prevents form submission when confirming IME word selection (e.g., Japanese/Chinese input)
+	return event.isComposing || event.keyCode === 229;
+}
diff --git a/tools/server/webui/src/routes/chat/[id]/+page.svelte b/tools/server/webui/src/routes/chat/[id]/+page.svelte
index 5b6c73d6d4796..af91a8e9ef755 100644
--- a/tools/server/webui/src/routes/chat/[id]/+page.svelte
+++ b/tools/server/webui/src/routes/chat/[id]/+page.svelte
@@ -1,45 +1,26 @@
 <script lang="ts">
 	import { goto } from '$app/navigation';
 	import { page } from '$app/state';
-	import { beforeNavigate } from '$app/navigation';
 	import { ChatScreen } from '$lib/components/app';
 	import {
 		chatStore,
 		activeConversation,
 		isLoading,
-		stopGeneration,
-		gracefulStop
+		stopGeneration
 	} from '$lib/stores/chat.svelte';
-	import { onDestroy } from 'svelte';
 
 	let chatId = $derived(page.params.id);
 	let currentChatId: string | undefined = undefined;
 
-	beforeNavigate(async ({ cancel, to }) => {
-		if (isLoading()) {
-			console.log(
-				'Navigation detected while streaming - aborting stream and saving partial response'
-			);
-
-			cancel();
-
-			await gracefulStop();
-
-			if (to?.url) {
-				await goto(to.url.pathname + to.url.search + to.url.hash);
-			}
-		}
-	});
-
 	$effect(() => {
 		if (chatId && chatId !== currentChatId) {
-			if (isLoading()) {
-				console.log('Chat switch detected while streaming - aborting stream');
-				stopGeneration();
-			}
-
 			currentChatId = chatId;
 
+			// Skip loading if this conversation is already active (e.g., just created)
+			if (activeConversation()?.id === chatId) {
+				return;
+			}
+
 			(async () => {
 				const success = await chatStore.loadConversation(chatId);
 
@@ -66,12 +47,6 @@
 			};
 		}
 	});
-
-	onDestroy(() => {
-		if (isLoading()) {
-			stopGeneration();
-		}
-	});
 </script>
 
 <svelte:head>
diff --git a/tools/server/webui/svelte.config.js b/tools/server/webui/svelte.config.js
index c24f879ddaf42..f25494236bddd 100644
--- a/tools/server/webui/svelte.config.js
+++ b/tools/server/webui/svelte.config.js
@@ -7,6 +7,7 @@ const config = {
 	// Consult https://svelte.dev/docs/kit/integrations
 	// for more information about preprocessors
 	preprocess: [vitePreprocess(), mdsvex()],
+
 	kit: {
 		paths: {
 			relative: true
@@ -23,6 +24,7 @@ const config = {
 			bundleStrategy: 'inline'
 		}
 	},
+
 	extensions: ['.svelte', '.svx']
 };
 
diff --git a/tools/server/webui/vite.config.ts b/tools/server/webui/vite.config.ts
index 7f7ce3bed3fcc..b077e232ab043 100644
--- a/tools/server/webui/vite.config.ts
+++ b/tools/server/webui/vite.config.ts
@@ -75,7 +75,12 @@ function llamaCppBuildPlugin() {
 }
 
 export default defineConfig({
+	build: {
+		chunkSizeWarningLimit: 3072
+	},
+
 	plugins: [tailwindcss(), sveltekit(), devtoolsJson(), llamaCppBuildPlugin()],
+
 	test: {
 		projects: [
 			{
@@ -123,6 +128,7 @@ export default defineConfig({
 			}
 		]
 	},
+
 	server: {
 		proxy: {
 			'/v1': 'http://localhost:8080',

+ +	Conversation Name	Messages
+ {#if searchQuery} + No conversations found matching "{searchQuery}" + {:else} + No conversations available + {/if} +
+ { + e.preventDefault(); + e.stopPropagation(); + toggleConversation(conv.id, e.shiftKey); + }} + /> +	+ + {conv.name \|\| 'Untitled conversation'} + +	+ {messageCountMap.get(conv.id) ?? 0} +