fix

hiworldwzj · hiworldwzj · commit b3e6d4ed4a6a · 2025-07-03T08:54:26.000Z
diff --git a/lightllm/common/basemodel/triton_kernel/multimodal_emb.py b/lightllm/common/basemodel/triton_kernel/multimodal_emb.py
@@ -146,49 +146,4 @@ def mark_multimodal_obj(obj_start_token_ids: torch.Tensor, obj_token_lens: torch
         num_warps=1,
         num_stages=1,
     )
-    return out_mark
-
-
-def test():
-    S, D = 1024 * 1000, 128 * 64
-    vob_size = 320000
-    image_size = 10
-    image_token_size = 512
-
-    text_weight = torch.randn((vob_size, D), device="cuda", dtype=torch.float16)
-    img_weight = torch.randn((image_size * image_token_size, D), device="cuda", dtype=torch.float16)
-    img_token_lens = torch.full((image_size,), image_token_size, device="cuda", dtype=torch.long)
-    img_start_token_ids = (
-        (torch.arange(0, image_size * image_token_size, image_token_size) + vob_size * 10).cuda().long()
-    )
-    img_start_locs = torch.arange(0, image_size * image_token_size, image_token_size).cuda().long()
-
-    prompt_ids = torch.arange(0, S, 1).cuda().long()
-    prompt_ids[0 : image_size * image_token_size] = (
-        (vob_size * 10 + torch.arange(0, image_size * image_token_size, 1)).cuda().long()
-    )
-
-    out = torch.zeros((S, D), dtype=torch.float16, device="cuda")
-    print(out.shape)
-
-    import time
-
-    multimodal_emb(
-        out, prompt_ids, text_weight, img_weight, img_token_lens, img_start_token_ids, img_start_locs, 0, vob_size
-    )
-
-    torch.cuda.synchronize()
-    iters = 20
-    t1 = time.time()
-    for _ in range(iters):
-        multimodal_emb(
-            out, prompt_ids, text_weight, img_weight, img_token_lens, img_start_token_ids, img_start_locs, 0, vob_size
-        )
-    torch.cuda.synchronize()
-    t2 = time.time()
-    print("Triton time cost", (t2 - t1) / iters)
-    return
-
-
-# if __name__ == "__main__":
-#     test()
+    return out_mark