Infini-AI-Lab
diff --git a/‎csrc/register.cc‎
Lines changed: 3 additions & 0 deletions b/‎csrc/register.cc‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎csrc/register.h‎
Lines changed: 44 additions & 0 deletions b/‎csrc/register.h‎
Lines changed: 44 additions & 0 deletions
@@ -8,4 +8,7 @@ PYBIND11_MODULE(vortex_torch_C, m){
         m.def("Chunkwise_NH2HN_Transpose",      &Chunkwise_NH2HN_Transpose);
         m.def("Chunkwise_HN2NH_Transpose",      &Chunkwise_HN2NH_Transpose);
         m.def("topk_output",                    &topk_output);
+        m.def("sglang_plan_decode_fa3",         &sglang_plan_decode_fa3);
+        m.def("sglang_plan_prefill_fa3",        &sglang_plan_prefill_fa3);
+        m.def("Chunkwise_HN2NH_Transpose_FA3",  &Chunkwise_HN2NH_Transpose_FA3);
 }
@@ -86,4 +86,48 @@ const int64_t       max_seq_lengths
 );
 
 
+void sglang_plan_decode_fa3(
+const at::Tensor&   cached_seq_lens,
+at::Tensor&         dense_kv_indptr,
+at::Tensor&         dense_kv_indices,
+at::Tensor&         sparse_kv_indptr,
+at::Tensor&         sparse_kv_indices,
+at::Tensor&         dense_page_table,
+at::Tensor&         dense_cache_seqlens,
+at::Tensor&         sparse_page_table,
+at::Tensor&         sparse_cache_seqlens,
+const at::Tensor&   req_to_token,
+const at::Tensor&   req_indices,
+at::Tensor&         winfo_q_indices,
+at::Tensor&         winfo_kv_offsets,
+at::Tensor&         winfo_kv_lens,
+at::Tensor&         winfo_num_workload,
+at::Tensor&         winfo_chunk_size,
+const int64_t       page_size,
+const int64_t       num_kv_heads,
+const int64_t       topk_val,
+const int64_t       page_reserved_bos,
+const int64_t       page_reserved_eos,
+const int64_t       max_chunk_size,
+const int64_t       min_chunk_size
+);
 
+void sglang_plan_prefill_fa3(
+const at::Tensor&  cached_seq_lens,
+const at::Tensor&  cu_seqlens_q,
+const at::Tensor&  req_to_token,
+const at::Tensor&  req_indices,
+at::Tensor&        page_table,
+at::Tensor&        batch_table,
+const int64_t      page_size,
+const int64_t      num_kv_heads
+);
+
+at::Tensor Chunkwise_HN2NH_Transpose_FA3(
+const at::Tensor&   x,
+const at::Tensor&   indptr,
+const at::Tensor&   batch_table,
+const int64_t       num_qo_heads,
+const int64_t       num_kv_heads,
+const int64_t       head_dim
+);
Original file line number	Diff line number	Diff line change
`@@ -8,4 +8,7 @@ PYBIND11_MODULE(vortex_torch_C, m){`
`8`	`8`	`m.def("Chunkwise_NH2HN_Transpose", &Chunkwise_NH2HN_Transpose);`
`9`	`9`	`m.def("Chunkwise_HN2NH_Transpose", &Chunkwise_HN2NH_Transpose);`
`10`	`10`	`m.def("topk_output", &topk_output);`
	`11`	`+ m.def("sglang_plan_decode_fa3", &sglang_plan_decode_fa3);`
	`12`	`+ m.def("sglang_plan_prefill_fa3", &sglang_plan_prefill_fa3);`
	`13`	`+ m.def("Chunkwise_HN2NH_Transpose_FA3", &Chunkwise_HN2NH_Transpose_FA3);`
`11`	`14`	`}`