ModelTC
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lightllm/models/__init__.py‎
Lines changed: 38 additions & 0 deletions b/‎lightllm/models/__init__.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎lightllm/models/bloom/model.py‎
Lines changed: 2 additions & 0 deletions b/‎lightllm/models/bloom/model.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎lightllm/models/chatglm2/model.py‎
Lines changed: 2 additions & 0 deletions b/‎lightllm/models/chatglm2/model.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎lightllm/models/cohere/model.py‎
Lines changed: 2 additions & 0 deletions b/‎lightllm/models/cohere/model.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎lightllm/models/deepseek2/model.py‎
Lines changed: 2 additions & 0 deletions b/‎lightllm/models/deepseek2/model.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎lightllm/models/gemma3/model.py‎
Lines changed: 3 additions & 0 deletions b/‎lightllm/models/gemma3/model.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎lightllm/models/gemma_2b/model.py‎
Lines changed: 2 additions & 0 deletions b/‎lightllm/models/gemma_2b/model.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎lightllm/models/internlm/model.py‎
Lines changed: 2 additions & 1 deletion b/‎lightllm/models/internlm/model.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎lightllm/models/internlm2/model.py‎
Lines changed: 4 additions & 3 deletions b/‎lightllm/models/internlm2/model.py‎
Lines changed: 4 additions & 3 deletions
@@ -1,5 +1,5 @@
 FROM nvcr.io/nvidia/tritonserver:24.04-py3-min as base
-ARG PYTORCH_VERSION=2.5.1
+ARG PYTORCH_VERSION=2.6.0
 ARG PYTHON_VERSION=3.9
 ARG CUDA_VERSION=12.4
 ARG MAMBA_VERSION=23.1.0-1
 
@@ -0,0 +1,38 @@
+from lightllm.models.cohere.model import CohereTpPartModel
+from lightllm.models.mixtral.model import MixtralTpPartModel
+from lightllm.models.bloom.model import BloomTpPartModel
+from lightllm.models.llama.model import LlamaTpPartModel
+from lightllm.models.starcoder.model import StarcoderTpPartModel
+from lightllm.models.starcoder2.model import Starcoder2TpPartModel
+from lightllm.models.qwen.model import QWenTpPartModel
+from lightllm.models.qwen2.model import Qwen2TpPartModel
+from lightllm.models.qwen3.model import Qwen3TpPartModel
+from lightllm.models.qwen3_moe.model import Qwen3MOEModel
+from lightllm.models.chatglm2.model import ChatGlm2TpPartModel
+from lightllm.models.internlm.model import InternlmTpPartModel
+from lightllm.models.stablelm.model import StablelmTpPartModel
+from lightllm.models.internlm2.model import Internlm2TpPartModel
+from lightllm.models.internlm2_reward.model import Internlm2RewardTpPartModel
+from lightllm.models.mistral.model import MistralTpPartModel
+from lightllm.models.minicpm.model import MiniCPMTpPartModel
+from lightllm.models.llava.model import LlavaTpPartModel
+from lightllm.models.qwen_vl.model import QWenVLTpPartModel
+from lightllm.models.gemma_2b.model import Gemma_2bTpPartModel
+from lightllm.models.phi3.model import Phi3TpPartModel
+from lightllm.models.deepseek2.model import Deepseek2TpPartModel
+from lightllm.models.internvl.model import (
+    InternVLLlamaTpPartModel,
+    InternVLPhi3TpPartModel,
+    InternVLQwen2TpPartModel,
+    InternVLDeepSeek2TpPartModel,
+)
+from lightllm.models.internvl.model import InternVLInternlm2TpPartModel
+from lightllm.models.qwen2_vl.model import Qwen2VLTpPartModel
+from lightllm.models.qwen2_reward.model import Qwen2RewardTpPartModel
+from lightllm.models.gemma3.model import Gemma3TpPartModel
+from lightllm.models.tarsier2.model import (
+    Tarsier2Qwen2TpPartModel,
+    Tarsier2Qwen2VLTpPartModel,
+    Tarsier2LlamaTpPartModel,
+)
+from .registry import get_model
@@ -1,6 +1,7 @@
 import os
 import json
 import torch
+from lightllm.models.registry import ModelRegistry
 from lightllm.models.bloom.layer_infer.pre_layer_infer import BloomPreLayerInfer
 from lightllm.models.bloom.layer_infer.post_layer_infer import BloomPostLayerInfer
 from lightllm.models.bloom.layer_infer.transformer_layer_infer import BloomTransformerLayerInfer
@@ -12,6 +13,7 @@
 from lightllm.common.build_utils import repair_config
 
 
+@ModelRegistry("bloom")
 class BloomTpPartModel(TpPartBaseModel):
     # weight class
     pre_and_post_weight_class = BloomPreAndPostLayerWeight
 
@@ -2,6 +2,7 @@
 import json
 import torch
 
+from lightllm.models.registry import ModelRegistry
 from lightllm.models.chatglm2.layer_infer.transformer_layer_infer import ChatGLM2TransformerLayerInfer
 from lightllm.models.chatglm2.layer_weights.transformer_layer_weight import ChatGLM2TransformerLayerWeight
 from lightllm.models.chatglm2.layer_weights.pre_and_post_layer_weight import ChatGLM2PreAndPostLayerWeight
@@ -12,6 +13,7 @@
 logger = init_logger(__name__)
 
 
+@ModelRegistry("chatglm")
 class ChatGlm2TpPartModel(LlamaTpPartModel):
     # Please use the fast tokenizer from:
     # [THUDM/chatglm3-6b PR #12](https://huggingface.co/THUDM/chatglm3-6b/discussions/12).
 
@@ -5,6 +5,7 @@
     TransformerLayerCohereInferTpl,
 )
 from lightllm.common.mem_manager import MemoryManager
+from lightllm.models.registry import ModelRegistry
 from lightllm.models.cohere.infer_struct import CohereInferStateInfo
 from lightllm.models.cohere.layer_infer.post_layer_infer import CoherePostLayerInfer
 from lightllm.models.cohere.layer_infer.transformer_layer_infer import CohereTransformerLayerInfer
@@ -17,6 +18,7 @@
 logger = init_logger(__name__)
 
 
+@ModelRegistry("cohere")
 class CohereTpPartModel(LlamaTpPartModel):
     pre_and_post_weight_class = CoherePreAndPostLayerWeight
     transformer_weight_class = CohereTransformerLayerWeight
 
@@ -1,5 +1,6 @@
 import torch
 from typing import final
+from lightllm.models.registry import ModelRegistry
 from lightllm.models.deepseek2.layer_infer.transformer_layer_infer import Deepseek2TransformerLayerInfer
 from lightllm.models.deepseek2.layer_weights.transformer_layer_weight import Deepseek2TransformerLayerWeight
 from lightllm.models.deepseek2.infer_struct import Deepseek2InferStateInfo
@@ -49,6 +50,7 @@ def __init__(self, model):
                 self.softmax_scale = self.softmax_scale * mscale * mscale
 
 
+@ModelRegistry(["deepseek_v2", "deepseek_v3"])
 class Deepseek2TpPartModel(LlamaTpPartModel):
     # weight class
     transformer_weight_class = Deepseek2TransformerLayerWeight
 
@@ -3,6 +3,7 @@
 import json
 import numpy as np
 import torch
+from lightllm.models.registry import ModelRegistry
 from lightllm.common.basemodel.multimodal_tokenizer import BaseMultiModalTokenizer
 from lightllm.common.mem_utils import select_mem_manager_class
 from lightllm.models.gemma3.infer_struct import Gemma3InferStateInfo
@@ -22,6 +23,7 @@
 
 logger = init_logger(__name__)
 
+
 # Warp of the origal tokenizer
 class Gemma3Tokenizer(BaseMultiModalTokenizer):
     def __init__(self, tokenizer, model_cfg):
@@ -77,6 +79,7 @@ def encode(self, prompt, multimodal_params: MultimodalParams = None, add_special
         return input_ids
 
 
+@ModelRegistry("gemma3")
 class Gemma3TpPartModel(LlamaTpPartModel):
     # weight class
     pre_and_post_weight_class = Gemma3PreAndPostLayerWeight
 
@@ -1,3 +1,4 @@
+from lightllm.models.registry import ModelRegistry
 from lightllm.models.gemma_2b.layer_weights.transformer_layer_weight import Gemma_2bTransformerLayerWeight
 from lightllm.models.gemma_2b.layer_weights.pre_and_post_layer_weight import Gemma_2bPreAndPostLayerWeight
 from lightllm.models.gemma_2b.layer_infer.pre_layer_infer import Gemma_2bPreLayerInfer
@@ -8,6 +9,7 @@
 from lightllm.common.mem_utils import select_mem_manager_class
 
 
+@ModelRegistry("gemma")
 class Gemma_2bTpPartModel(LlamaTpPartModel):
     # weight class
     pre_and_post_weight_class = Gemma_2bPreAndPostLayerWeight
 
@@ -1,11 +1,12 @@
 import os
 import json
 import torch
-
+from lightllm.models.registry import ModelRegistry
 from lightllm.models.internlm.layer_weights.transformer_layer_weight import InternlmTransformerLayerWeight
 from lightllm.models.llama.model import LlamaTpPartModel
 
 
+@ModelRegistry("internlm")
 class InternlmTpPartModel(LlamaTpPartModel):
     # weight class
     transformer_weight_class = InternlmTransformerLayerWeight
 
@@ -2,16 +2,17 @@
 import json
 import torch
 
+from lightllm.models.registry import ModelRegistry
 from lightllm.models.internlm2.layer_weights.transformer_layer_weight import Internlm2TransformerLayerWeight
-from lightllm.models.internlm2.layer_weights.pre_and_post_layer_weight import Internlm2PreAndPostLayerWeight 
+from lightllm.models.internlm2.layer_weights.pre_and_post_layer_weight import Internlm2PreAndPostLayerWeight
 from lightllm.models.internlm.model import InternlmTpPartModel
 
 
+@ModelRegistry("internlm2")
 class Internlm2TpPartModel(InternlmTpPartModel):
     # weight class
-    pre_and_post_weight_class = Internlm2PreAndPostLayerWeight 
+    pre_and_post_weight_class = Internlm2PreAndPostLayerWeight
     transformer_weight_class = Internlm2TransformerLayerWeight
 
     def __init__(self, kvargs):
         super().__init__(kvargs)
-