fix internvl image token

sangchengmeng@sensetime.com · sangchengmeng@sensetime.com · commit fc646129c961 · 2025-03-06T21:26:14.000+08:00
diff --git a/lightllm/models/internvl/model.py b/lightllm/models/internvl/model.py
@@ -31,7 +31,7 @@ def __init__(self, tokenizer, model_cfg, **kwargs):
 
         self.llm_model_type = model_cfg.get("llm_config").get("model_type")
         self.tokenizer = tokenizer
-        self.image_length = 256
+        self.image_length = int(os.environ.get("INTERNVL_IMAGE_LENGTH", 256))
 
         self.image_start_tag = IMG_START_TOKEN
         self.image_start_id = tokenizer.convert_tokens_to_ids(self.image_start_tag)
diff --git a/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py b/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py
@@ -58,6 +58,10 @@ def context_forward(self, input_ids, infer_state: LlamaInferStateInfo, layer_wei
             img_weight = torch.cat(img_weight, dim=0).to(device=device, dtype=dtype)
         else:
             img_weight = torch.empty((0, hidden_size), device=device, dtype=dtype)
+        assert img_weight.shape[1] == hidden_size, (
+            f"Dimension mismatch: text weight dimension is {hidden_size}, "
+            f"but image weight dimension is {img_weight.shape[1]}"
+        )
         # each tp will fill the img embeds, should divide by world_size
         img_weight = img_weight / self.world_size_
         img_start_token_ids = torch.Tensor(img_start_token_ids).to(device=device, dtype=torch.long)