Skip to content

Commit 05d8f5b

Browse files
authored
Merge branch 'main' into unwaive-ds-fp4-b200
2 parents 544fd2e + 77e37d9 commit 05d8f5b

File tree

62 files changed

+467
-373
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+467
-373
lines changed

cpp/tensorrt_llm/nanobind/runtime/hostfunc.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,13 @@ std::optional<uintptr_t> launchHostFunc(
7878
{
7979
auto const stream = reinterpret_cast<cudaStream_t>(streamPtr);
8080

81+
nb::gil_scoped_acquire gil;
82+
8183
auto hostFuncUserData
8284
= std::make_unique<HostFuncUserData>(freeUserData, pyHostFunc, nb::tuple(pyArgs), nb::dict(pyKwargs));
8385

86+
nb::gil_scoped_release release;
87+
8488
cudaError_t err = cudaLaunchHostFunc(stream, cudaHostFuncTrampoline, hostFuncUserData.get());
8589
if (err != cudaSuccess)
8690
{
@@ -110,6 +114,7 @@ void initHostFuncBindings(nb::module_& m)
110114
{
111115
m.def("launch_hostfunc", &launchHostFunc, "Launch a Python host function to a CUDA stream",
112116
nb::call_guard<nb::gil_scoped_release>());
113-
m.def("free_hostfunc_user_data", &freeHostFuncUserData, "Free the user data for the Python host function");
117+
m.def("free_hostfunc_user_data", &freeHostFuncUserData, "Free the user data for the Python host function",
118+
nb::call_guard<nb::gil_scoped_release>());
114119
}
115120
} // namespace tensorrt_llm::nanobind::runtime

cpp/tensorrt_llm/pybind/runtime/hostfunc.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,13 @@ std::optional<uintptr_t> launchHostFunc(
7878
{
7979
auto const stream = reinterpret_cast<cudaStream_t>(streamPtr);
8080

81+
py::gil_scoped_acquire gil;
82+
8183
auto hostFuncUserData
8284
= std::make_unique<HostFuncUserData>(freeUserData, pyHostFunc, py::tuple(pyArgs), py::dict(pyKwargs));
8385

86+
py::gil_scoped_release release;
87+
8488
cudaError_t err = cudaLaunchHostFunc(stream, cudaHostFuncTrampoline, hostFuncUserData.get());
8589
if (err != cudaSuccess)
8690
{
@@ -110,6 +114,7 @@ void initHostFuncBindings(pybind11::module_& m)
110114
{
111115
m.def("launch_hostfunc", &launchHostFunc, "Launch a Python host function to a CUDA stream",
112116
py::call_guard<py::gil_scoped_release>());
113-
m.def("free_hostfunc_user_data", &freeHostFuncUserData, "Free the user data for the Python host function");
117+
m.def("free_hostfunc_user_data", &freeHostFuncUserData, "Free the user data for the Python host function",
118+
py::call_guard<py::gil_scoped_release>());
114119
}
115120
} // namespace tensorrt_llm::pybind::runtime

examples/utils.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,12 @@ def _load_tokenizer(tokenizer_dir: Optional[str] = None,
135135
tokenizer_type: Optional[str] = None):
136136
if vocab_file is None:
137137
if 'whisper' in model_name.lower():
138-
tokenizer = AutoTokenizer.from_pretrained('openai/whisper-large-v3',
139-
language='english',
140-
task='transcribe',
141-
predict_timestamps=False)
138+
tokenizer = AutoTokenizer.from_pretrained(
139+
tokenizer_dir or 'openai/whisper-large-v3',
140+
language='english',
141+
task='transcribe',
142+
predict_timestamps=False,
143+
)
142144
elif tokenizer_type == 'language_adapter':
143145
tokenizer = None
144146
else:

security_scanning/examples/apps/poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/examples/auto_deploy/poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/examples/draft_target_model/poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/examples/eagle/poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/examples/lookahead/poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

security_scanning/examples/medusa/poetry.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)