ModelTC
diff --git a/‎docs/CN/source/models/supported_models.rst‎
Lines changed: 6 additions & 8 deletions b/‎docs/CN/source/models/supported_models.rst‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎docs/EN/source/models/supported_models.rst‎
Lines changed: 5 additions & 8 deletions b/‎docs/EN/source/models/supported_models.rst‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py‎
Lines changed: 1 addition & 1 deletion b/‎lightllm/common/basemodel/layer_weights/meta_weights/fused_moe_weight_tp.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lightllm/models/llama/model.py‎
Lines changed: 3 additions & 1 deletion b/‎lightllm/models/llama/model.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎lightllm/models/qwen3/__init__.py‎ b/‎lightllm/models/qwen3/__init__.py‎
diff --git a/‎lightllm/models/qwen3/layer_infer/__init__.py‎ b/‎lightllm/models/qwen3/layer_infer/__init__.py‎
diff --git a/‎lightllm/models/qwen3/layer_infer/transformer_layer_infer.py‎
Lines changed: 57 additions & 0 deletions b/‎lightllm/models/qwen3/layer_infer/transformer_layer_infer.py‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎lightllm/models/qwen3/layer_weights/__init__.py‎ b/‎lightllm/models/qwen3/layer_weights/__init__.py‎
diff --git a/‎lightllm/models/qwen3/layer_weights/transformer_layer_weight.py‎
Lines changed: 40 additions & 0 deletions b/‎lightllm/models/qwen3/layer_weights/transformer_layer_weight.py‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎lightllm/models/qwen3/model.py‎
Lines changed: 21 additions & 0 deletions b/‎lightllm/models/qwen3/model.py‎
Lines changed: 21 additions & 0 deletions
@@ -32,14 +32,6 @@ lightllm 支持大多数的主流的开源大语言模型以及多模态模型
     - :code:`--eos_id 151643 --trust_remote_code`
   * - `ChatGLM2-6b <https://github.com/THUDM/ChatGLM2-6B>`_
     - :code:`--trust_remote_code`
-  * - `Baichuan-7b <https://github.com/baichuan-inc/Baichuan-7B>`_
-    - :code:`--trust_remote_code`  
-  * - `Baichuan-13b <https://github.com/baichuan-inc/Baichuan-13B>`_
-    - :code:`--trust_remote_code`
-  * - `Baichuan2-7b <https://github.com/baichuan-inc/Baichuan2>`_
-    - :code:`--trust_remote_code`
-  * - `Baichuan2-13b <https://github.com/baichuan-inc/Baichuan2>`_
-    - :code:`--trust_remote_code`
   * - `InternLM-7b <https://github.com/InternLM/InternLM>`_
     - :code:`--trust_remote_code`
   * - `Yi-34b <https://huggingface.co/01-ai/Yi-34B>`_
@@ -58,6 +50,12 @@ lightllm 支持大多数的主流的开源大语言模型以及多模态模型
     - :code:`--data_type bfloat16`
   * - `DeepSeek-V2 <https://huggingface.co/deepseek-ai/DeepSeek-V2>`_ 
     - :code:`--data_type bfloat16`
+  * - `DeepSeek-V3 <https://huggingface.co/deepseek-ai/DeepSeek-V2>`_ 
+    - 
+  * - `Qwen3 <https://github.com/QwenLM/Qwen3>`_ 
+    - 
+  * - `Qwen3-Moe <https://github.com/QwenLM/Qwen3>`_ 
+    - 
 
 
 多模态模型
 
@@ -31,14 +31,6 @@ LLM
     - :code:`--eos_id 151643 --trust_remote_code`
   * - `ChatGLM2-6b <https://github.com/THUDM/ChatGLM2-6B>`_
     - :code:`--trust_remote_code`
-  * - `Baichuan-7b <https://github.com/baichuan-inc/Baichuan-7B>`_
-    - :code:`--trust_remote_code`  
-  * - `Baichuan-13b <https://github.com/baichuan-inc/Baichuan-13B>`_
-    - :code:`--trust_remote_code`
-  * - `Baichuan2-7b <https://github.com/baichuan-inc/Baichuan2>`_
-    - :code:`--trust_remote_code`
-  * - `Baichuan2-13b <https://github.com/baichuan-inc/Baichuan2>`_
-    - :code:`--trust_remote_code`
   * - `InternLM-7b <https://github.com/InternLM/InternLM>`_
     - :code:`--trust_remote_code`
   * - `Yi-34b <https://huggingface.co/01-ai/Yi-34B>`_
@@ -57,6 +49,11 @@ LLM
     - :code:`--data_type bfloat16`
   * - `DeepSeek-V2 <https://huggingface.co/deepseek-ai/DeepSeek-V2>`_ 
     - :code:`--data_type bfloat16`
+  * - `Qwen3 <https://github.com/QwenLM/Qwen3>`_ 
+    - 
+  * - `Qwen3-Moe <https://github.com/QwenLM/Qwen3>`_ 
+    - 
+
 
 
 VLM
 
@@ -45,7 +45,7 @@ def __init__(
         self.e_score_correction_bias = None
         self.w2_list = [None] * self.n_routed_experts
         self.w2_scale_list = [None] * self.n_routed_experts
-        self.scoring_func = network_config["scoring_func"]
+        self.scoring_func = network_config.get("scoring_func", "softmax")
         self.w1 = [None, None]  # weight, weight_scale
         self.w2 = [None, None]  # weight, weight_scale
         self.lock = threading.Lock()
 
@@ -81,11 +81,13 @@ def _verify_params(self):
         return
 
     def _init_mem_manager(self):
+        head_dim_ = self.config["hidden_size"] // self.config["num_attention_heads"]
+        head_dim_ = self.config.get("head_dim", head_dim_)
         self.mem_manager = select_mem_manager_class(self.mode)(
             self.max_total_token_num,
             dtype=self.data_type,
             head_num=self.config["num_key_value_heads"] // self.tp_world_size_,
-            head_dim=self.config["hidden_size"] // self.config["num_attention_heads"],
+            head_dim=head_dim_,
             layer_num=self.config["num_hidden_layers"],
             mem_fraction=self.mem_fraction,
         )
 
@@ -0,0 +1,57 @@
+import os
+import torch
+import torch.functional as F
+import torch.distributed as dist
+import numpy as np
+import triton
+from typing import Tuple
+from lightllm.models.qwen3.layer_weights.transformer_layer_weight import Qwen3TransformerLayerWeight
+from lightllm.models.llama.layer_infer.transformer_layer_infer import LlamaTransformerLayerInfer
+from lightllm.models.llama.infer_struct import LlamaInferStateInfo
+from lightllm.models.llama.triton_kernel.rmsnorm import rmsnorm_forward
+from lightllm.models.llama.triton_kernel.rotary_emb import rotary_emb_fwd
+from lightllm.models.llama.triton_kernel.silu_and_mul import silu_and_mul_fwd
+from functools import partial
+from lightllm.utils.log_utils import init_logger
+
+logger = init_logger(__name__)
+
+
+class Qwen3TransformerLayerInfer(LlamaTransformerLayerInfer):
+    def __init__(self, layer_num, network_config, mode=[]):
+        super().__init__(layer_num, network_config, mode)
+        self.head_dim_ = network_config["head_dim"]
+        return
+
+    def _get_qkv(
+        self,
+        input: torch.Tensor,
+        cache_kv,
+        infer_state: LlamaInferStateInfo,
+        layer_weight: Qwen3TransformerLayerWeight,
+    ) -> torch.Tensor:
+        input = input.view(-1, self.embed_dim_)
+        q = layer_weight.q_proj.mm(input)
+        cache_kv = layer_weight.kv_proj.mm(
+            input, out=cache_kv.view(-1, (self.tp_k_head_num_ + self.tp_v_head_num_) * self.head_dim_)
+        ).view(-1, (self.tp_k_head_num_ + self.tp_v_head_num_), self.head_dim_)
+        rmsnorm_forward(
+            q.reshape(-1, self.head_dim_),
+            weight=layer_weight.q_norm_weight_.weight,
+            eps=self.eps_,
+            out=q.reshape(-1, self.head_dim_),
+        )
+
+        rmsnorm_forward(
+            cache_kv[:, : self.tp_k_head_num_, :].reshape(-1, self.head_dim_),
+            weight=layer_weight.k_norm_weight_.weight,
+            eps=self.eps_,
+            out=cache_kv[:, : self.tp_k_head_num_, :].reshape(-1, self.head_dim_),
+        )
+        rotary_emb_fwd(
+            q.view(-1, self.tp_q_head_num_, self.head_dim_),
+            cache_kv[:, : self.tp_k_head_num_, :],
+            infer_state.position_cos,
+            infer_state.position_sin,
+        )
+        return q, cache_kv
@@ -0,0 +1,40 @@
+import os
+import torch
+import math
+import numpy as np
+from lightllm.common.basemodel import TransformerLayerWeight
+from lightllm.models.llama.layer_weights.transformer_layer_weight import LlamaTransformerLayerWeight
+from lightllm.utils.envs_utils import enable_env_vars
+from lightllm.common.basemodel.layer_weights.meta_weights import (
+    ROWMMWeight,
+    MultiROWMMWeight,
+    COLMMWeight,
+    NormWeight,
+    FusedMoeWeightTP,
+    FusedMoeWeightEP,
+    ROWBMMWeight,
+)
+from functools import partial
+
+
+class Qwen3TransformerLayerWeight(LlamaTransformerLayerWeight):
+    def __init__(self, layer_num, data_type, network_config, mode=[], quant_cfg=None):
+        self.n_routed_experts = network_config["num_experts"]
+        self.is_moe = (
+            network_config["num_experts"] > 0
+            and layer_num not in network_config["mlp_only_layers"]
+            and (layer_num + 1) % network_config["decoder_sparse_step"] == 0
+        )
+        super().__init__(layer_num, data_type, network_config, mode, quant_cfg)
+        return
+
+    def _init_weight_names(self):
+        super()._init_weight_names()
+        self._q_norm_name = f"model.layers.{self.layer_num_}.self_attn.q_norm.weight"
+        self._k_norm_name = f"model.layers.{self.layer_num_}.self_attn.k_norm.weight"
+
+    def _init_norm(self):
+        super()._init_norm()
+
+        self.q_norm_weight_ = NormWeight(weight_name=self._q_norm_name, data_type=self.data_type_)
+        self.k_norm_weight_ = NormWeight(weight_name=self._k_norm_name, data_type=self.data_type_)
@@ -0,0 +1,21 @@
+import torch
+from typing import final
+from lightllm.models.qwen3.layer_infer.transformer_layer_infer import Qwen3TransformerLayerInfer
+from lightllm.models.qwen3.layer_weights.transformer_layer_weight import Qwen3TransformerLayerWeight
+from lightllm.models.llama.model import LlamaTpPartModel
+from lightllm.utils.log_utils import init_logger
+
+
+logger = init_logger(__name__)
+
+
+class Qwen3TpPartModel(LlamaTpPartModel):
+    # weight class
+    transformer_weight_class = Qwen3TransformerLayerWeight
+
+    # infer class
+    transformer_layer_infer_class = Qwen3TransformerLayerInfer
+
+    def __init__(self, kvargs):
+        super().__init__(kvargs)
+        return