Skip to content

Commit 9298f1b

Browse files
authored
[None] [test] Add B300 cases to CI (#8056)
Signed-off-by: Xiwen Yu <[email protected]>
1 parent 2b8722b commit 9298f1b

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed

cpp/tensorrt_llm/kernels/trtllmGenKernels/batchedGemm/KernelRunner.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,25 @@ using namespace batchedGemm::trtllm::gen;
3636

3737
static BatchedGemmInterface::ModuleCache globalTrtllmGenBatchedGemmModuleCache;
3838

39+
constexpr bool isSMCompatible(int gpuSM, SmVersion kernelSM)
40+
{
41+
if (gpuSM == 103)
42+
{
43+
return kernelSM == SmVersion::Sm100f || kernelSM == SmVersion::Sm103a;
44+
}
45+
else if (gpuSM == 100)
46+
{
47+
return kernelSM == SmVersion::Sm100f || kernelSM == SmVersion::Sm100a;
48+
}
49+
else if (gpuSM == 90)
50+
{
51+
return kernelSM == SmVersion::Sm90a;
52+
}
53+
54+
TLLM_THROW("Unexpected gpuSM %d", gpuSM);
55+
return false;
56+
}
57+
3958
std::vector<int64_t> prioritizePredefinedConfigs(int m, int n, int k, std::vector<int64_t> const& sortedIndices,
4059
batchedGemm::batchedGemm::BatchedGemmConfig const* configs)
4160
{
@@ -98,6 +117,7 @@ TrtllmGenBatchedGemmRunner::TrtllmGenBatchedGemmRunner(TrtllmGenBatchedGemmRunne
98117

99118
mPassingConfigIndices.clear();
100119

120+
int gpuSM = tensorrt_llm::common::getSMVersion();
101121
for (size_t i = 0; i < bmm.getNumBatchedGemmConfigs(); ++i)
102122
{
103123
auto const options = configs[i].mOptions;
@@ -108,7 +128,7 @@ TrtllmGenBatchedGemmRunner::TrtllmGenBatchedGemmRunner(TrtllmGenBatchedGemmRunne
108128
&& options.mTransposeMmaOutput == mOptions.transposeMmaOutput
109129
&& (!doesRouteImplUseNoRoute(options.mRouteImpl)) == mOptions.routeAct
110130
&& options.mFusedAct == mOptions.fusedAct && options.mIsStaticBatch == mOptions.staticBatch
111-
&& tileSize == mOptions.tileSize)
131+
&& tileSize == mOptions.tileSize && isSMCompatible(gpuSM, configs[i].mSm))
112132
{
113133
auto sm = configs[i].mSm;
114134
if (sm != SmVersion::Sm100f)

tests/integration/test_lists/test-db/l0_b300.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,10 @@ l0_b300:
1515
backend: pytorch
1616
tests:
1717
# ------------- PyTorch tests ---------------
18+
- unittest/_torch/attention # 200s
19+
# - unittest/_torch/thop
20+
- unittest/_torch/executor # 250s
21+
- unittest/_torch/modules # 300s
1822
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
23+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
24+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]

tests/integration/test_lists/test-db/l0_gb300.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,6 @@ l0_gb300:
1616
tests:
1717
# ------------- PyTorch tests ---------------
1818
- accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[fp8-latency]
19+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-cutlass-auto]
20+
- accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency] # Cover nvbugs 5461712 and 5505402
21+
- unittest/_torch/thop/parallel TIMEOUT (90)

0 commit comments

Comments
 (0)