Skip to content

Commit 75df649

Browse files
authored
patch mm segfault & patch cubin avail. (#1628)
<!-- .github/pull_request_template.md --> ## 📌 Description <!-- What does this PR do? Briefly describe the changes and why they’re needed. --> ## 🔍 Related Issues <!-- Link any related issues here --> ## 🚀 Pull Request Checklist Thank you for contributing to FlashInfer! Before we review your pull request, please make sure the following items are complete. ### ✅ Pre-commit Checks - [x] I have installed `pre-commit` by running `pip install pre-commit` (or used your preferred method). - [x] I have installed the hooks with `pre-commit install`. - [x] I have run the hooks manually with `pre-commit run --all-files` and fixed any reported issues. > If you are unsure about how to set up `pre-commit`, see [the pre-commit documentation](https://pre-commit.com/). ## 🧪 Tests - [x] Tests have been added or updated as needed. - [x] All tests are passing (`unittest`, etc.). ## Reviewer Notes <!-- Optional: anything you'd like reviewers to focus on, concerns, etc. -->
1 parent 0296d06 commit 75df649

File tree

3 files changed

+7
-11
lines changed

3 files changed

+7
-11
lines changed

csrc/trtllm_gemm_runner.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ class TrtllmGenGemmRunner {
207207
return optionsA.mUseUnrollLoop2xForMma;
208208
}
209209

210-
return true;
210+
return false;
211211
});
212212

213213
bool findLoop2xMma = false;

flashinfer/artifacts.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,27 +48,27 @@ def get_available_cubin_files(source, retries=3, delay=5, timeout=10):
4848

4949

5050
class ArtifactPath:
51-
TRTLLM_GEN_FMHA: str = "6c74964c96684c3e674340f7e35fc20ad909a9a0/fmha/trtllm-gen/"
51+
TRTLLM_GEN_FMHA: str = "037e528e719ec3456a7d7d654f26b805e44c63b1/fmha/trtllm-gen/"
5252
TRTLLM_GEN_BMM: str = (
53-
"6c74964c96684c3e674340f7e35fc20ad909a9a0/batched_gemm-8704aa4-ba3b00d/"
53+
"037e528e719ec3456a7d7d654f26b805e44c63b1/batched_gemm-8704aa4-ba3b00d/"
5454
)
5555
TRTLLM_GEN_GEMM: str = (
56-
"6c74964c96684c3e674340f7e35fc20ad909a9a0/gemm-8704aa4-f91dc9e/"
56+
"037e528e719ec3456a7d7d654f26b805e44c63b1/gemm-8704aa4-f91dc9e/"
5757
)
5858
CUDNN_SDPA: str = "4c623163877c8fef5751c9c7a59940cd2baae02e/fmha/cudnn/"
5959
DEEPGEMM: str = "d25901733420c7cddc1adf799b0d4639ed1e162f/deep-gemm/"
6060

6161

6262
class MetaInfoHash:
6363
TRTLLM_GEN_FMHA: str = (
64-
"5a41a165d4d5e956d4cccd0a7d1627dbdcaccf4d07a9cfcc8055ef0cb52e0c87"
64+
"0ff77215b86997665cf75973e13cd2932f551d46b4e008f851d32d47e1d9560f"
6565
)
6666
TRTLLM_GEN_BMM: str = (
67-
"3edf4847059d465182779436397ece3d5fb45c3360a1d1abda3b71e35f957caa"
67+
"34bdfe7acfd49f5fb8b48e06d56e6a5ad88b951c730552f228fc5f614f7632a8"
6868
)
6969
DEEPGEMM: str = "69aa277b7f3663ed929e73f9c57301792b8c594dac15a465b44a5d151b6a1d50"
7070
TRTLLM_GEN_GEMM: str = (
71-
"c6265cf047fc5d2208a37c54b5d720f4755b1215de5f7434ad24ffbc81c31c27"
71+
"0345358c916d990709f9670e113e93f35c76aa22715e2d5128ec2ca8740be5ba"
7272
)
7373

7474

tests/test_mm_fp4.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@ def test_mm_fp4(m, n, k, res_dtype, backend, use_128x4_sf_layout, auto_tuning):
2525
if auto_tuning and backend == "cudnn":
2626
pytest.skip("Skipping test for cudnn fp4 with auto_tuning=True")
2727

28-
if not use_128x4_sf_layout and backend == "trtllm":
29-
# FIXME (bringup) quantization failure from main
30-
pytest.xfail("Skipping test for non-trtllm fp4 with use_128x4_sf_layout=False")
31-
3228
input = torch.randn([m, k], device="cuda", dtype=torch.bfloat16)
3329
mat2 = torch.randn([n, k], device="cuda", dtype=torch.bfloat16)
3430
a_sf_layout = SfLayout.layout_128x4 if use_128x4_sf_layout else SfLayout.layout_8x4

0 commit comments

Comments
 (0)