Skip to content

Commit 5f60ecc

Browse files
authored
Fix CUDA synchronization issue between ORT-GenAI and TRT-RTX inference (microsoft#1733)
Fix CUDA synchronization issue between ORT-GenAI and TRT-RTX inference Problem: - Race condition between ORT-GenAI CUDA operations and TRT-RTX inference execution - CUDA operations were not completing before session.Run() was called - This caused incorrect inference outputs when GPU sampling was enabled Solution: - configure user_compute_stream for NvTensorRtRtx provider
1 parent 0ec562c commit 5f60ecc

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

src/models/model.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,10 @@ void State::Run(OrtSession& session, bool graph_capture_this_run) {
143143
ep_dynamic_options_next_run_.clear();
144144
}
145145

146+
if (model_.p_device_ && model_.p_device_->GetType() == DeviceType::NvTensorRtRtx) {
147+
run_options_->AddConfigEntry("disable_synchronize_execution_providers", "1");
148+
}
149+
146150
session.Run(run_options_.get(), input_names_.data(), inputs_.data(), input_names_.size(),
147151
output_names_.data(), outputs_.data(), output_names_.size());
148152

@@ -590,6 +594,17 @@ DeviceInterface* SetProviderSessionOptions(OrtSessionOptions& session_options,
590594
}
591595

592596
std::vector<const char*> keys, values;
597+
std::string stream_value_str;
598+
if (provider_options.name == "NvTensorRtRtx" && is_primary_session_options && p_device) {
599+
void* stream_ptr = p_device->GetCudaStream();
600+
std::stringstream stream_value;
601+
stream_value << reinterpret_cast<uintptr_t>(stream_ptr);
602+
stream_value_str = stream_value.str();
603+
604+
keys.emplace_back("user_compute_stream");
605+
values.emplace_back(stream_value_str.c_str());
606+
}
607+
593608
for (auto& option : provider_options.options) {
594609
keys.emplace_back(option.first.c_str());
595610
values.emplace_back(option.second.c_str());

0 commit comments

Comments
 (0)