Skip to content

Commit 08dca80

Browse files
encoder only trt ep for transducer (k2-fsa#1130)
1 parent 15e2bbe commit 08dca80

File tree

4 files changed

+31
-7
lines changed

4 files changed

+31
-7
lines changed

sherpa-onnx/csrc/online-zipformer2-transducer-model.cc

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ namespace sherpa_onnx {
3333
OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
3434
const OnlineModelConfig &config)
3535
: env_(ORT_LOGGING_LEVEL_WARNING),
36-
sess_opts_(GetSessionOptions(config)),
36+
encoder_sess_opts_(GetSessionOptions(config)),
37+
decoder_sess_opts_(GetSessionOptions(config, "decoder")),
38+
joiner_sess_opts_(GetSessionOptions(config, "joiner")),
3739
config_(config),
3840
allocator_{} {
3941
{
@@ -57,7 +59,9 @@ OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
5759
AAssetManager *mgr, const OnlineModelConfig &config)
5860
: env_(ORT_LOGGING_LEVEL_WARNING),
5961
config_(config),
60-
sess_opts_(GetSessionOptions(config)),
62+
encoder_sess_opts_(GetSessionOptions(config)),
63+
decoder_sess_opts_(GetSessionOptions(config)),
64+
joiner_sess_opts_(GetSessionOptions(config)),
6165
allocator_{} {
6266
{
6367
auto buf = ReadFile(mgr, config.transducer.encoder);
@@ -79,7 +83,7 @@ OnlineZipformer2TransducerModel::OnlineZipformer2TransducerModel(
7983
void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
8084
size_t model_data_length) {
8185
encoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
82-
model_data_length, sess_opts_);
86+
model_data_length, encoder_sess_opts_);
8387

8488
GetInputNames(encoder_sess_.get(), &encoder_input_names_,
8589
&encoder_input_names_ptr_);
@@ -132,7 +136,7 @@ void OnlineZipformer2TransducerModel::InitEncoder(void *model_data,
132136
void OnlineZipformer2TransducerModel::InitDecoder(void *model_data,
133137
size_t model_data_length) {
134138
decoder_sess_ = std::make_unique<Ort::Session>(env_, model_data,
135-
model_data_length, sess_opts_);
139+
model_data_length, decoder_sess_opts_);
136140

137141
GetInputNames(decoder_sess_.get(), &decoder_input_names_,
138142
&decoder_input_names_ptr_);
@@ -157,7 +161,7 @@ void OnlineZipformer2TransducerModel::InitDecoder(void *model_data,
157161
void OnlineZipformer2TransducerModel::InitJoiner(void *model_data,
158162
size_t model_data_length) {
159163
joiner_sess_ = std::make_unique<Ort::Session>(env_, model_data,
160-
model_data_length, sess_opts_);
164+
model_data_length, joiner_sess_opts_);
161165

162166
GetInputNames(joiner_sess_.get(), &joiner_input_names_,
163167
&joiner_input_names_ptr_);

sherpa-onnx/csrc/online-zipformer2-transducer-model.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,10 @@ class OnlineZipformer2TransducerModel : public OnlineTransducerModel {
6565

6666
private:
6767
Ort::Env env_;
68-
Ort::SessionOptions sess_opts_;
68+
Ort::SessionOptions encoder_sess_opts_;
69+
Ort::SessionOptions decoder_sess_opts_;
70+
Ort::SessionOptions joiner_sess_opts_;
71+
6972
Ort::AllocatorWithDefaultOptions allocator_;
7073

7174
std::unique_ptr<Ort::Session> encoder_sess_;

sherpa-onnx/csrc/session.cc

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
9494
std::to_string(trt_config.trt_timing_cache_enable);
9595
auto trt_dump_subgraphs =
9696
std::to_string(trt_config.trt_dump_subgraphs);
97-
9897
std::vector<TrtPairs> trt_options = {
9998
{"device_id", device_id.c_str()},
10099
{"trt_max_workspace_size", trt_max_workspace_size.c_str()},
@@ -223,6 +222,21 @@ Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config) {
223222
config.provider_config.provider, &config.provider_config);
224223
}
225224

225+
Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
226+
const std::string &model_type) {
227+
/*
228+
Transducer models : Only encoder will run with tensorrt,
229+
decoder and joiner will run with cuda
230+
*/
231+
if(config.provider_config.provider == "trt" &&
232+
(model_type == "decoder" || model_type == "joiner")) {
233+
return GetSessionOptionsImpl(config.num_threads,
234+
"cuda", &config.provider_config);
235+
}
236+
return GetSessionOptionsImpl(config.num_threads,
237+
config.provider_config.provider, &config.provider_config);
238+
}
239+
226240
Ort::SessionOptions GetSessionOptions(const OfflineModelConfig &config) {
227241
return GetSessionOptionsImpl(config.num_threads, config.provider);
228242
}

sherpa-onnx/csrc/session.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ namespace sherpa_onnx {
2424

2525
Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config);
2626

27+
Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config,
28+
const std::string &model_type);
29+
2730
Ort::SessionOptions GetSessionOptions(const OfflineModelConfig &config);
2831

2932
Ort::SessionOptions GetSessionOptions(const OfflineLMConfig &config);

0 commit comments

Comments
 (0)