@@ -67,6 +67,12 @@ Status LoadModel(const string& model_dir, const string& signature_key,
6767 std::vector<tensorflow::TensorInfo>* output_info) {
6868 tensorflow::RunOptions run_options;
6969 tensorflow::SessionOptions sess_options;
70+
71+ tensorflow::OptimizerOptions* optimizer_options =
72+ sess_options.config .mutable_graph_options ()->mutable_optimizer_options ();
73+ optimizer_options->set_opt_level (tensorflow::OptimizerOptions::L0);
74+ optimizer_options->set_global_jit_level (tensorflow::OptimizerOptions::OFF);
75+
7076 sess_options.config .mutable_gpu_options ()->force_gpu_compatible ();
7177 TF_RETURN_IF_ERROR (tensorflow::LoadSavedModel (sess_options, run_options,
7278 model_dir, {" serve" }, bundle));
@@ -141,8 +147,8 @@ int main(int argc, char* argv[]) {
141147 string model_path = " /path/to/model/" ;
142148 string signature_key = " serving_default" ;
143149 int32_t batch_size = 64 ;
144- int32_t warmup_iters = 50 ;
145- int32_t eval_iters = 1000 ;
150+ int32_t warmup_iters = 200 ;
151+ int32_t eval_iters = 800 ;
146152 bool input_from_device = true ;
147153 bool output_to_host = true ;
148154 std::vector<Flag> flag_list = {
@@ -212,6 +218,10 @@ int main(int argc, char* argv[]) {
212218 TFTRT_ENSURE_OK (device->Sync ());
213219 end_time = std::chrono::steady_clock::now ();
214220
221+ if ((i % 10 ) == 0 ) {
222+ LOG (INFO) << " step: " << i;
223+ }
224+
215225 double duration = (end_time - start_time).count () / 1e6 ;
216226 infer_time.push_back (duration);
217227 }
@@ -233,4 +243,4 @@ int main(int argc, char* argv[]) {
233243 LOG (INFO) << " Engine build time + first inference latency (ms): " << infer_time.front ();
234244
235245 return 0 ;
236- }
246+ }
0 commit comments