Skip to content

Commit 57079ce

Browse files
authored
[None][chroe] Rename TensorRT-LLM to TensorRT LLM for source code. (#7851)
Signed-off-by: nv-guomingz <[email protected]>
1 parent 68b7900 commit 57079ce

File tree

148 files changed

+311
-311
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

148 files changed

+311
-311
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ TensorRT LLM
2525
* [08/01] Scaling Expert Parallelism in TensorRT LLM (Part 2: Performance Status and Optimization)
2626
[➡️ link](./docs/source/blogs/tech_blog/blog8_Scaling_Expert_Parallelism_in_TensorRT-LLM_part2.md)
2727

28-
* [07/26] N-Gram Speculative Decoding in TensorRTLLM
28+
* [07/26] N-Gram Speculative Decoding in TensorRT LLM
2929
[➡️ link](./docs/source/blogs/tech_blog/blog7_NGram_performance_Analysis_And_Auto_Enablement.md)
3030

3131
* [06/19] Disaggregated Serving in TensorRT LLM

benchmarks/cpp/bertBenchmark.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ void benchmarkBert(std::string const& modelName, std::filesystem::path const& da
135135

136136
int main(int argc, char* argv[])
137137
{
138-
cxxopts::Options options("TensorRT-LLM C++ Runtime Benchmark", "TensorRT-LLM C++ Runtime Benchmark for BERT.");
138+
cxxopts::Options options("TensorRT LLM C++ Runtime Benchmark", "TensorRT LLM C++ Runtime Benchmark for BERT.");
139139
options.add_options()("h,help", "Print usage");
140140
options.add_options()(
141141
"m,model", "Model name specified for engines.", cxxopts::value<std::string>()->default_value("bert_base"));

benchmarks/cpp/disaggServerBenchmark.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
11451145
int main(int argc, char* argv[])
11461146

11471147
{
1148-
cxxopts::Options options("TensorRT-LLm DisaggServer Benchmark");
1148+
cxxopts::Options options("TensorRT LLM DisaggServer Benchmark");
11491149
options.add_options()("h,help", "Print usage");
11501150
options.add_options()("context_engine_dirs", "Directories that store context engines,separator is a ,",
11511151
cxxopts::value<std::vector<std::string>>());

benchmarks/cpp/gptManagerBenchmark.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,7 @@ void benchmarkExecutor(std::optional<std::filesystem::path> const& decoderEngine
10551055
int main(int argc, char* argv[])
10561056
{
10571057
cxxopts::Options options(
1058-
"TensorRT-LLM BatchManager Benchmark", "TensorRT-LLM BatchManager Benchmark for GPT and GPT-like models.");
1058+
"TensorRT LLM BatchManager Benchmark", "TensorRT LLM BatchManager Benchmark for GPT and GPT-like models.");
10591059
options.add_options()("h,help", "Print usage");
10601060
options.add_options()("engine_dir, decoder_engine_dir", "Directory that store the engines of decoder models.",
10611061
cxxopts::value<std::string>());

cpp/include/tensorrt_llm/deep_gemm/compiler.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ std::vector<std::filesystem::path> getJitIncludeDirs()
217217
}
218218
else
219219
{
220-
TLLM_LOG_WARNING("Failed to find TensorRT-LLM installation, DeepGEMM will be disabled.");
220+
TLLM_LOG_WARNING("Failed to find TensorRT LLM installation, DeepGEMM will be disabled.");
221221
}
222222
}
223223
return includeDirs;

cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ CacheTransceiver::CacheTransceiver(kv_cache_manager::BaseKVCacheManager* cacheMa
165165
{
166166
void* ret = dllGetSym(handle, name);
167167
TLLM_CHECK_WITH_INFO(ret != nullptr,
168-
"Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not "
168+
"Unable to load UCX wrapper library symbol, possible cause is that TensorRT LLM library is not "
169169
"built with UCX support, please rebuild in UCX-enabled environment.");
170170
return ret;
171171
};

cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm100(T* D, void const* A, void const*
105105
break;
106106
default:
107107
throw std::runtime_error(
108-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
108+
"[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
109109
break;
110110
}
111111
}
@@ -146,15 +146,15 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,
146146
occupancy);
147147
break;
148148
case tkc::CutlassTileConfigSM100::Undefined:
149-
throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
149+
throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
150150
break;
151151
case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:
152152
throw std::runtime_error(
153-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
153+
"[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
154154
"heuristic.");
155155
break;
156156
default:
157-
throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
157+
throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
158158
break;
159159
}
160160
}
@@ -177,7 +177,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm120(T* D, void const* A, void const*
177177
break;
178178
default:
179179
throw std::runtime_error(
180-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
180+
"[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
181181
break;
182182
}
183183
}
@@ -205,16 +205,16 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm120(T* D, void const* A, void const* B,
205205
occupancy);
206206
break;
207207
case tkc::CutlassTileConfigSM120::Undefined:
208-
throw std::runtime_error("[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
208+
throw std::runtime_error("[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
209209
break;
210210
case tkc::CutlassTileConfigSM120::ChooseWithHeuristic:
211211
throw std::runtime_error(
212-
"[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "
212+
"[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "
213213
"heuristic.");
214214
break;
215215
default:
216216
throw std::runtime_error(
217-
"[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
217+
"[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
218218
break;
219219
}
220220
}
@@ -257,7 +257,7 @@ size_t dispatchMXFP8xMXFP4GemmClusterShapeSm100(T* D, void const* A, void const*
257257
break;
258258
default:
259259
throw std::runtime_error(
260-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
260+
"[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
261261
break;
262262
}
263263
}
@@ -293,15 +293,15 @@ size_t dispatchMXFP8xMXFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,
293293
occupancy);
294294
break;
295295
case tkc::CutlassTileConfigSM100::Undefined:
296-
throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
296+
throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
297297
break;
298298
case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:
299299
throw std::runtime_error(
300-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
300+
"[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
301301
"heuristic.");
302302
break;
303303
default:
304-
throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
304+
throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
305305
break;
306306
}
307307
}
@@ -338,7 +338,7 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
338338
else
339339
{
340340
throw std::runtime_error(
341-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
341+
"[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
342342
}
343343
}
344344
else if constexpr (fp4GemmType == FP4GemmType::W4A4_NVFP4_NVFP4)
@@ -356,13 +356,13 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
356356
else
357357
{
358358
throw std::runtime_error(
359-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
359+
"[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
360360
}
361361
}
362362
else
363363
{
364364
throw std::runtime_error(
365-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
365+
"[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
366366
}
367367
}
368368

cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/mxfp8_mxfp4_gemm_template_sm100.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
9393
int* occupancy)
9494
{
9595
throw std::runtime_error(
96-
"[TensorRT-LLM Error][FP4 gemm Runner] TensorRT-LLM is not compiled with support for this Architecture.");
96+
"[TensorRT LLM Error][FP4 gemm Runner] TensorRT LLM is not compiled with support for this Architecture.");
9797
}
9898

9999
#else
@@ -250,7 +250,7 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
250250
{
251251
std::string errMsg = "SMEM size exceeds maximum allowed. Required " + std::to_string(smem_size) + ", got "
252252
+ std::to_string(mMaxSmemSize);
253-
throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);
253+
throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);
254254
}
255255
/* // Return workspace size */
256256
if (!A && !B && !D)
@@ -261,28 +261,28 @@ size_t genericMXFP8xMXFP4GemmKernelLauncher(void* D, void const* A, void const*
261261
{
262262
std::string errMsg("Requested workspace size insufficient. Required "
263263
+ std::to_string(gemm.get_workspace_size(args)) + ", got " + std::to_string(workspaceBytes));
264-
throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);
264+
throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);
265265
}
266266
auto can_implement = gemm.can_implement(args);
267267
if (can_implement != cutlass::Status::kSuccess)
268268
{
269269
std::string errMsg = "MXFP8xMXFP4 Gemm cutlass kernel will fail for params. Error: "
270270
+ std::string(cutlassGetStatusString(can_implement));
271-
throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg);
271+
throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg);
272272
}
273273
auto initStatus = gemm.initialize(args, workspace, stream);
274274
if (initStatus != cutlass::Status::kSuccess)
275275
{
276276
std::string errMsg = "Failed to initialize cutlass MXFP8xMXFP4 gemm. Error: "
277277
+ std::string(cutlassGetStatusString(initStatus));
278-
throw std::runtime_error("[TensorRT-LLM Error][MXFP8xMXFP4 gemm Runner] " + errMsg);
278+
throw std::runtime_error("[TensorRT LLM Error][MXFP8xMXFP4 gemm Runner] " + errMsg);
279279
}
280280
auto runStatus = gemm.run(args, workspace, stream, nullptr, tensorrt_llm::common::getEnvEnablePDL());
281281
if (runStatus != cutlass::Status::kSuccess)
282282
{
283283
std::string errMsg
284284
= "Failed to run cutlass MXFP8xMXFP4 gemm. Error: " + std::string(cutlassGetStatusString(runStatus));
285-
throw std::runtime_error("[TensorRT-LLM Error][MXFP8xMXFP4 gemm Runner] " + errMsg);
285+
throw std::runtime_error("[TensorRT LLM Error][MXFP8xMXFP4 gemm Runner] " + errMsg);
286286
}
287287
return gemm.get_workspace_size(args);
288288
}

cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/nvfp4_nvfp4_gemm_template_sm100.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
107107
int* occupancy) \
108108
{ \
109109
throw std::runtime_error( \
110-
"[TensorRT-LLM Error][FP4 gemm Runner] TensorRT-LLM is not compiled with support for this Architecture."); \
110+
"[TensorRT LLM Error][FP4 gemm Runner] TensorRT LLM is not compiled with support for this Architecture."); \
111111
}
112112

113113
#else
@@ -268,7 +268,7 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
268268
{ \
269269
std::string errMsg = "SMEM size exceeds maximum allowed. Required " + std::to_string(smem_size) + ", got " \
270270
+ std::to_string(mMaxSmemSize); \
271-
throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg); \
271+
throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg); \
272272
} \
273273
/* // Return workspace size */ \
274274
if (!A && !B && !D) \
@@ -279,28 +279,28 @@ size_t genericFp4GemmKernelLauncher(void* D, void const* A, void const* B, void
279279
{ \
280280
std::string errMsg("Requested workspace size insufficient. Required " \
281281
+ std::to_string(gemm.get_workspace_size(args)) + ", got " + std::to_string(workspaceBytes)); \
282-
throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg); \
282+
throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg); \
283283
} \
284284
auto can_implement = gemm.can_implement(args); \
285285
if (can_implement != cutlass::Status::kSuccess) \
286286
{ \
287287
std::string errMsg = "FP4 Gemm cutlass kernel will fail for params. Error: " \
288288
+ std::string(cutlassGetStatusString(can_implement)); \
289-
throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg); \
289+
throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg); \
290290
} \
291291
auto initStatus = gemm.initialize(args, workspace, stream); \
292292
if (initStatus != cutlass::Status::kSuccess) \
293293
{ \
294294
std::string errMsg \
295295
= "Failed to initialize cutlass FP4 gemm. Error: " + std::string(cutlassGetStatusString(initStatus)); \
296-
throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg); \
296+
throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg); \
297297
} \
298298
auto runStatus = gemm.run(args, workspace, stream, nullptr, tensorrt_llm::common::getEnvEnablePDL()); \
299299
if (runStatus != cutlass::Status::kSuccess) \
300300
{ \
301301
std::string errMsg \
302302
= "Failed to run cutlass FP4 gemm. Error: " + std::string(cutlassGetStatusString(runStatus)); \
303-
throw std::runtime_error("[TensorRT-LLM Error][FP4 gemm Runner] " + errMsg); \
303+
throw std::runtime_error("[TensorRT LLM Error][FP4 gemm Runner] " + errMsg); \
304304
} \
305305
return gemm.get_workspace_size(args); \
306306
}

0 commit comments

Comments
 (0)