upd

cyx-6 · cyx-6 · commit e0eca6ac1550 · 2025-07-27T05:22:51.000Z
diff --git a/flashinfer/fused_moe.py b/flashinfer/fused_moe.py
@@ -774,10 +774,15 @@ def cutlass_fused_moe(
 
 
 def trtllm_gen_fused_moe_sm100_module() -> JitSpec:
-    hash = "f5deee96023f1d74b1ff71ac69f782a96741a053"
+    hash = "6b93c394210c89dccef13833c89797f1b8f8aefb"
+    tllm_gen_commit = "ce8ce46"
+    tllm_gen_config_hash = "2dc78d9"
+    include_path = (
+        f"{hash}/batched_gemm-{tllm_gen_commit}-{tllm_gen_config_hash}/include"
+    )
     metainfo = get_cubin(
-        f"{hash}/batched_gemm-c603ed2-3fa89e1/include/KernelMetaInfo",
-        "d789c63aaeee1aa0a68ebf22fa693b6b82a7c2319bd933a00a10306ca08d9e0e",
+        f"{include_path}/flashinferMetaInfo",
+        "b24fd5e7ae6b20e903c866ecb1d4a68f238301ba9b76df6a536056f2059a0d56",
         ".h",
     )
     assert metainfo, "KernelMetaInfo.h not found"
@@ -796,15 +801,12 @@ def trtllm_gen_fused_moe_sm100_module() -> JitSpec:
             "-DENABLE_BF16",
             "-DENABLE_FP8",
             "-DENABLE_FP4",
+            f'-DPIPELINE_HASH=\\"{hash}\\"',
+            f'-DTLLM_GEN_COMMIT=\\"{tllm_gen_commit}\\"',
+            f'-DTLLM_GEN_BATCHED_GEMM_CONFIG_HASH=\\"{tllm_gen_config_hash}\\"',
         ]
         + sm100a_nvcc_flags,
-        extra_include_paths=[
-            jit_env.FLASHINFER_CACHE_DIR
-            / "cubins"
-            / hash
-            / "batched_gemm-c603ed2-3fa89e1"
-            / "include"
-        ],
+        extra_include_paths=[jit_env.FLASHINFER_CACHE_DIR / "cubins" / include_path],
         extra_ldflags=["-lcuda"],
     )
 
diff --git a/flashinfer/jit/attention/pytorch.py b/flashinfer/jit/attention/pytorch.py
@@ -1483,10 +1483,11 @@ def gen_fmha_cutlass_sm100a_module(
 
 
 def trtllm_gen_fmha_module():
-    hash = "5f2779e6df822bc0b26940b6d3b0059c86f0a6a1"
+    hash = "6b93c394210c89dccef13833c89797f1b8f8aefb"
+    include_path = f"{hash}/fmha/trtllm-gen/include"
     metainfo = get_cubin(
-        f"{hash}/fmha/trtllm-gen/include/flashInferMetaInfo",
-        "11f31dc81f996e39c3f1d85d773864c9113c5837619e21418a846befa4f8dddd",
+        f"{include_path}/flashInferMetaInfo",
+        "ba35dc13249cd09bf39eed43e785b088d329acaf81a3f940a615904b81bfa02f",
         ".h",
     )
     assert metainfo, "flashInferMetaInfo.h not found"
@@ -1496,15 +1497,9 @@ def trtllm_gen_fmha_module():
             jit_env.FLASHINFER_CSRC_DIR / "trtllm_fmha_runner.cu",
             jit_env.FLASHINFER_CSRC_DIR / "trtllm_fmha_kernel_launcher.cu",
         ],
-        extra_include_paths=[
-            jit_env.FLASHINFER_CACHE_DIR
-            / "cubins"
-            / hash
-            / "fmha"
-            / "trtllm-gen"
-            / "include"
-        ],
+        extra_include_paths=[jit_env.FLASHINFER_CACHE_DIR / "cubins" / include_path],
         extra_ldflags=["-lcuda"],
+        extra_cuda_cflags=[f'-DPIPELINE_HASH=\\"{hash}\\"'],
     )
 
 
diff --git a/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/BatchedGemmInterface.h b/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/BatchedGemmInterface.h
@@ -24,7 +24,7 @@
 #include "trtllm/gen/CudaKernelLauncher.h"
 
 #ifdef TLLM_GEN_EXPORT_INTERFACE
-#include "KernelMetaInfo.h"
+#include "flashinferMetaInfo.h"
 #endif  // TLLM_GEN_EXPORT_INTERFACE
 
 namespace flashinfer::trtllm_cubin_loader {
@@ -466,7 +466,8 @@ BatchedGemmConfig const* BatchedGemmInterface::getBatchedGemmConfigs() const {
 
 size_t BatchedGemmInterface::getNumBatchedGemmConfigs() const {
 #ifdef TLLM_GEN_EXPORT_INTERFACE
-  return tensorrt_llm::kernels::tllmGenBatchedGemmListLen;
+  return sizeof(tensorrt_llm::kernels::tllmGenBatchedGemmList) /
+         sizeof(tensorrt_llm::kernels::tllmGenBatchedGemmList[0]);
 #else
   return 0;
 #endif
@@ -645,8 +646,7 @@ int32_t BatchedGemmInterface::run(BatchedGemmConfig const& config, void* workspa
 
   auto fiModuleLoadData = [&](CUmodule* module) {
     const std::string sha256 = config.mHash ? config.mHash : "";
-    const std::string pipeline_hash = "f5deee96023f1d74b1ff71ac69f782a96741a053";
-    const std::string cubin_path = pipeline_hash + "/" + std::string("batched_gemm-") +
+    const std::string cubin_path = std::string(PIPELINE_HASH) + "/" + std::string("batched_gemm-") +
                                    TLLM_GEN_COMMIT + "-" + TLLM_GEN_BATCHED_GEMM_CONFIG_HASH + "/";
     std::string fname_cubin = config.mFunctionName;
     if (!fname_cubin.empty()) {
diff --git a/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/BatchedGemmOptions.h b/include/flashinfer/trtllm/batched_gemm/trtllmGen_bmm_export/BatchedGemmOptions.h
@@ -302,8 +302,6 @@ struct BatchedGemmConfig {
   // defined. In this case, the cubins will be loaded from the provided data and function name.
   // Otherwise, the kernel will be loaded from the CudaRunner.
 #ifdef TLLM_GEN_EXPORT_INTERFACE
-  uint8_t const* mData{nullptr};
-  uint32_t const mSize{0};
   uint32_t const mSharedMemSize{0};
   char const* mFunctionName{nullptr};
   uint32_t const mNumThreadsPerCTA{0};
diff --git a/include/flashinfer/trtllm/fmha/fmhaKernels.cuh b/include/flashinfer/trtllm/fmha/fmhaKernels.cuh
@@ -231,10 +231,7 @@ class TllmGenFmhaKernel {
   }
 
   static std::string getCubinPath() {
-    const char* env_hash = std::getenv("FLASHINFER_CUBIN_ARTIFACTORY_HASH");
-    std::string hash =
-        env_hash ? std::string(env_hash) : "4c7bdebb4eba13311fc652a069e64782d5c0723d";
-    std::string cubin_path = hash + "/fmha/trtllm-gen/";
+    std::string cubin_path = std::string(PIPELINE_HASH) + "/fmha/trtllm-gen/";
     return cubin_path;
   }
 

Original file line number	Diff line number	Diff line change
`@@ -231,10 +231,7 @@ class TllmGenFmhaKernel {`
`231`	`231`	`}`
`232`	`232`
`233`	`233`	`static std::string getCubinPath() {`
`234`		`- const char* env_hash = std::getenv("FLASHINFER_CUBIN_ARTIFACTORY_HASH");`
`235`		`- std::string hash =`
`236`		`- env_hash ? std::string(env_hash) : "4c7bdebb4eba13311fc652a069e64782d5c0723d";`
`237`		`- std::string cubin_path = hash + "/fmha/trtllm-gen/";`
	`234`	`+ std::string cubin_path = std::string(PIPELINE_HASH) + "/fmha/trtllm-gen/";`
`238`	`235`	`return cubin_path;`
`239`	`236`	`}`
`240`	`237`