remove tensor2bytes byte2tensor

wangzaijun · wangzaijun · commit 8b528a932f4c · 2025-12-19T02:08:21.000Z
diff --git a/lightllm/models/gemma3/layer_infer/pre_layer_infer.py b/lightllm/models/gemma3/layer_infer/pre_layer_infer.py
@@ -2,7 +2,6 @@
 from lightllm.common.basemodel.triton_kernel.multimodal_emb import multimodal_emb
 from lightllm.distributed.communication_op import all_reduce
 from lightllm.models.qwen_vl.layer_infer.pre_layer_infer import LlamaMultimodalPreLayerInfer
-from lightllm.server.embed_cache.utils import bytes2tensor, get_shm_name_embed, read_shm
 
 
 class Gemma3PreLayerInfer(LlamaMultimodalPreLayerInfer):
diff --git a/lightllm/models/qwen3_vl/layer_infer/pre_layer_infer.py b/lightllm/models/qwen3_vl/layer_infer/pre_layer_infer.py
@@ -2,17 +2,9 @@
 import torch.distributed as dist
 
 from lightllm.models.llama.layer_weights.pre_and_post_layer_weight import LlamaPreAndPostLayerWeight
-from lightllm.models.llama.infer_struct import LlamaInferStateInfo
 from lightllm.models.qwen3_vl.infer_struct import Qwen3VLInferStateInfo
-
-from lightllm.server.embed_cache.utils import (
-    bytes2tensor,
-    read_shm,
-    get_shm_name_embed,
-)
 from lightllm.common.basemodel.triton_kernel.multimodal_emb import multimodal_emb
 from lightllm.distributed.communication_op import all_reduce
-
 from lightllm.models.qwen_vl.layer_infer.pre_layer_infer import LlamaMultimodalPreLayerInfer
 
 
diff --git a/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py b/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py
@@ -3,10 +3,7 @@
 
 from lightllm.models.llama.layer_weights.pre_and_post_layer_weight import LlamaPreAndPostLayerWeight
 from lightllm.models.llama.infer_struct import LlamaInferStateInfo
-
 from lightllm.models.llama.layer_infer.pre_layer_infer import LlamaPreLayerInfer
-from lightllm.utils.infer_utils import mark_cost_time
-from lightllm.server.embed_cache.utils import bytes2tensor, read_shm, get_shm_name_embed
 from lightllm.common.basemodel.triton_kernel.multimodal_emb import multimodal_emb
 from lightllm.distributed.communication_op import all_reduce
 
diff --git a/lightllm/models/qwen_vl/qwen_visual.py b/lightllm/models/qwen_vl/qwen_visual.py
@@ -11,7 +11,7 @@
 from PIL import Image
 from typing import Callable, Optional, Sequence, Tuple, List, Union
 import numpy as np
-from lightllm.server.embed_cache.utils import tensor2bytes, read_shm, create_shm, get_shm_name_data, get_shm_name_embed
+from lightllm.server.embed_cache.utils import read_shm, get_shm_name_data
 import torch
 from torch import nn
 from torch.nn import functional as F
diff --git a/lightllm/server/embed_cache/utils.py b/lightllm/server/embed_cache/utils.py
@@ -1,36 +1,4 @@
-import torch
-import numpy as np
-from io import BytesIO
 import multiprocessing.shared_memory as shm
-import time
-
-
-def tensor2bytes(t: torch.Tensor):
-    # 转换为 numpy array，使用 contiguous 确保内存连续
-    print(f"tensor2bytes shape: {t.shape} {t.is_contiguous()}")
-    memory_size = t.numel() * t.element_size()
-    out = torch.empty(memory_size, dtype=torch.uint8, device="cpu", pin_memory=True).copy_(t.view(torch.uint8).view(-1))
-    return out.numpy().tobytes()
-
-
-def bytes2tensor(b):
-    # 直接返回二进制数据的 uint8 tensor，外部自己转 dtype 和 view
-    # 避免 numpy 不支持 bfloat16 等问题
-    return torch.frombuffer(b, dtype=torch.uint8)
-
-
-def create_shm_and_dump(name, data: torch.Tensor):
-    try:
-        data_size = data.numel() * data.element_size()
-        shared_memory = shm.SharedMemory(name=name, create=True, size=data_size)
-        tensor = torch.frombuffer(shared_memory.buf, dtype=torch.uint8)
-        out = torch.empty(data_size, dtype=torch.uint8, device="cpu", pin_memory=True).copy_(
-            data.view(torch.uint8).view(-1)
-        )
-        tensor.copy_(out)
-        return tensor
-    except FileExistsError:
-        print("Warning create shm {} failed because of FileExistsError!".format(name))
 
 
 def create_shm(name, data):
@@ -57,7 +25,3 @@ def free_shm(name):
 
 def get_shm_name_data(uid):
     return str(uid) + "-data"
-
-
-def get_shm_name_embed(uid):
-    return str(uid) + "-embed"