Skip to content

Commit e000ae8

Browse files
hiworldwzjwangzaijunshihaobai
authored
fix set_items_embed (#1151)
Co-authored-by: wangzaijun <[email protected]> Co-authored-by: shihaobai <[email protected]>
1 parent fa0cb52 commit e000ae8

File tree

4 files changed

+3
-5
lines changed

4 files changed

+3
-5
lines changed

lightllm/models/qwen3_vl/layer_infer/pre_layer_infer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def context_forward(self, input_ids, infer_state: Qwen3VLInferStateInfo, layer_w
5050
infer_state.img_start_locs_in_cache = torch.tensor(
5151
img_start_locs_in_cache, dtype=torch.long, device="cpu", pin_memory=True
5252
).cuda(non_blocking=True)
53+
infer_state.input_ids = input_ids
5354

5455
multimodal_emb(
5556
out=out,

lightllm/models/qwen3_vl/triton_kernel/deepstack_multimodal_emb.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,6 @@ def apply_deepstack_features(
102102
apply deepstack features for all images in qwen3-vl/qwen3-vl-moe
103103
"""
104104

105-
if not infer_state.deepstack_features:
106-
return
107-
108105
deepstack_num_layers = infer_state.cpu_embed_cache_tensor.shape[1] - 1
109106

110107
if layer_num >= deepstack_num_layers:

lightllm/models/whisper/whisper_audio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,5 +239,5 @@ def encode(self, audio_items: List[AudioItem], cpu_embed_cache_client: CpuEmbedC
239239
ids_to_set.append(uid)
240240

241241
if ids_to_set:
242-
torch.cuda.current_stream().synchronize()
243242
self.cache_client.root.set_items_embed(ids=ids_to_set)
243+
torch.cuda.current_stream().synchronize()

lightllm/server/visualserver/model_infer/model_rpc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ def exposed_encode(self, images: List[ImageItem]):
120120
)
121121
ids_to_set.append(uid)
122122
if ids_to_set:
123-
torch.cuda.current_stream().synchronize()
124123
self.cache_client.root.set_items_embed(ids_to_set)
124+
torch.cuda.current_stream().synchronize()
125125
return
126126

127127

0 commit comments

Comments
 (0)