small fix

airMeng · airMeng · commit b8a60748935f · 2025-09-09T19:50:43.000+08:00
diff --git a/cmake/BuildFlags.cmake b/cmake/BuildFlags.cmake
@@ -129,8 +129,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
 
   set(SYCL_FLAGS ${SYCL_FLAGS} ${SYCL_KERNEL_OPTIONS})
 
-  # set(SYCL_OFFLINE_COMPILER_CG_OPTIONS ${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -fno-sycl-instrument-device-code)
-  # set(SYCL_OFFLINE_COMPILER_CG_OPTIONS ${SYCL_OFFLINE_COMPILER_CG_OPTIONS} ${SYCL_LINK_FLAGS})
   set(SYCL_OFFLINE_COMPILER_FLAGS "${SYCL_OFFLINE_COMPILER_AOT_OPTIONS}${SYCL_OFFLINE_COMPILER_CG_OPTIONS}")
 else()
   message("Not compiling with XPU. Currently only support GCC compiler on Linux as CXX compiler.")
diff --git a/python/sgl_kernel/flash_attn.py b/python/sgl_kernel/flash_attn.py
@@ -198,8 +198,6 @@ def flash_attn_with_kvcache(
             )
         ).to(torch.int32)
 
-    import pdb; pdb.set_trace()
-
     out, softmax_lse, *rest = torch.ops.sgl_kernel.fwd.default(
         q,
         k_cache,
diff --git a/tests/test_flash_attention.py b/tests/test_flash_attention.py
@@ -921,65 +921,64 @@ def test_flash_attn_kvcache(
 
                 # # Check that FlashAttention's numerical error is at most twice the numerical error
                 # # of a Pytorch implementation.
-                # if new_kv:
-                #     if page_size is None:
-                #         k_cache_select = (
-                #             k_cache.to(dtype_ref)
-                #             if not has_batch_idx
-                #             else k_cache.to(dtype_ref)[cache_batch_idx]
-                #         )
-                #         v_cache_select = (
-                #             v_cache.to(dtype_ref)
-                #             if not has_batch_idx
-                #             else v_cache.to(dtype_ref)[cache_batch_idx]
-                #         )
-                #     else:
-                #         k_cache_select = rearrange(
-                #             k_cache_paged.to(dtype_ref)[
-                #                 (
-                #                     page_table
-                #                     if not has_batch_idx
-                #                     else page_table[cache_batch_idx]
-                #                 ).flatten()
-                #             ],
-                #             "(b nblocks) block_size ... -> b (nblocks block_size) ...",
-                #             b=batch_size,
-                #         )[:, :seqlen_k].to(dtype_ref)
-                #         v_cache_select = rearrange(
-                #             v_cache_paged.to(dtype_ref)[
-                #                 (
-                #                     page_table
-                #                     if not has_batch_idx
-                #                     else page_table[cache_batch_idx]
-                #                 ).flatten()
-                #             ],
-                #             "(b nblocks) block_size ... -> b (nblocks block_size) ...",
-                #             b=batch_size,
-                #         )[:, :seqlen_k].to(dtype_ref)
-                #     k_cache_ref = k_cache_ref.to(dtype).to(dtype_ref)
-                #     v_cache_ref = v_cache_ref.to(dtype).to(dtype_ref)
-                #     # if dtype is not torch.float8_e4m3fn:
-                #     #     import pdb; pdb.set_trace()
-                #     #     assert torch.equal(v_cache_select, v_cache_ref)
-                #     # else:
-                #     #     assert torch.allclose(
-                #     #         v_cache_select, v_cache_ref, rtol=1e-3, atol=1e-3
-                #     #     )
-                #     # breakpoint()
-                #     # if rotary_dim == 0 and dtype is not torch.float8_e4m3fn:
-                #     # if rotary_dim == 0:
-                #     #     assert torch.equal(k_cache_select, k_cache_ref)
-                #     # else:
-                #     #     # if not torch.allclose(k_cache_select, k_cache_ref, rtol=1e-3, atol=1e-3):
-                #     #     #     breakpoint()
-                #     #     if dtype is not torch.float8_e4m3fn:
-                #     #         assert torch.allclose(
-                #     #             k_cache_select, k_cache_ref, rtol=1e-3, atol=1e-3
-                #     #         )
-                #     #     else:
-                #     #         assert torch.allclose(
-                #     #             k_cache_select, k_cache_ref, rtol=1e-1, atol=1e-1
-                #     #         )
+                if new_kv:
+                    if page_size is None:
+                        k_cache_select = (
+                            k_cache.to(dtype_ref)
+                            if not has_batch_idx
+                            else k_cache.to(dtype_ref)[cache_batch_idx]
+                        )
+                        v_cache_select = (
+                            v_cache.to(dtype_ref)
+                            if not has_batch_idx
+                            else v_cache.to(dtype_ref)[cache_batch_idx]
+                        )
+                    else:
+                        k_cache_select = rearrange(
+                            k_cache_paged.to(dtype_ref)[
+                                (
+                                    page_table
+                                    if not has_batch_idx
+                                    else page_table[cache_batch_idx]
+                                ).flatten()
+                            ],
+                            "(b nblocks) block_size ... -> b (nblocks block_size) ...",
+                            b=batch_size,
+                        )[:, :seqlen_k].to(dtype_ref)
+                        v_cache_select = rearrange(
+                            v_cache_paged.to(dtype_ref)[
+                                (
+                                    page_table
+                                    if not has_batch_idx
+                                    else page_table[cache_batch_idx]
+                                ).flatten()
+                            ],
+                            "(b nblocks) block_size ... -> b (nblocks block_size) ...",
+                            b=batch_size,
+                        )[:, :seqlen_k].to(dtype_ref)
+                    k_cache_ref = k_cache_ref.to(dtype).to(dtype_ref)
+                    v_cache_ref = v_cache_ref.to(dtype).to(dtype_ref)
+                    if dtype is not torch.float8_e4m3fn:
+                        import pdb; pdb.set_trace()
+                        assert torch.equal(v_cache_select, v_cache_ref)
+                    else:
+                        assert torch.allclose(
+                            v_cache_select, v_cache_ref, rtol=1e-3, atol=1e-3
+                        )
+                    breakpoint()
+                    if rotary_dim == 0 and dtype is not torch.float8_e4m3fn:
+                        assert torch.equal(k_cache_select, k_cache_ref)
+                    else:
+                        # if not torch.allclose(k_cache_select, k_cache_ref, rtol=1e-3, atol=1e-3):
+                        #     breakpoint()
+                        if dtype is not torch.float8_e4m3fn:
+                            assert torch.allclose(
+                                k_cache_select, k_cache_ref, rtol=1e-3, atol=1e-3
+                            )
+                        else:
+                            assert torch.allclose(
+                                k_cache_select, k_cache_ref, rtol=1e-1, atol=1e-1
+                            )
                 mult = 4 if dtype == torch.float8_e4m3fn else 2
                 assert (out - out_ref).abs().max().item() <= mult * (
                     out_pt - out_ref

Original file line number	Diff line number	Diff line change
`@@ -198,8 +198,6 @@ def flash_attn_with_kvcache(`
`198`	`198`	`)`
`199`	`199`	`).to(torch.int32)`
`200`	`200`
`201`		`- import pdb; pdb.set_trace()`
`202`		`-`
`203`	`201`	`out, softmax_lse, *rest = torch.ops.sgl_kernel.fwd.default(`
`204`	`202`	`q,`
`205`	`203`	`k_cache,`