@@ -319,28 +319,27 @@ int SessionElement::calculate_inference_caused_latency(float max_possible_infere
319319 unsigned int max_possible_inferences_parallel = static_cast <unsigned int >(std::ceil ((max_possible_inferences) / static_cast <float >(m_inference_config.m_num_parallel_processors )));
320320 int already_inferred = 0 ;
321321 float wait_time_left = wait_time;
322+
322323 for (unsigned int i = 0 ; i < max_possible_inferences_parallel; ++i) {
323324 inference_time_left += m_inference_config.m_max_inference_time ;
325+
326+ if (wait_time_left >= m_inference_config.m_max_inference_time ) {
327+ already_inferred += m_inference_config.m_num_parallel_processors ;
328+ wait_time_left -= m_inference_config.m_max_inference_time ;
329+ }
330+
324331 while (inference_time_left >= host_buffer_time_int && host_buffer_size_int > 0 ) {
325332 inference_caused_latency += host_buffer_size_int;
326333 inference_time_left -= host_buffer_time_int;
327- wait_time_left += host_buffer_time_int;
328334 }
329335 }
330336
331- while (inference_time_left > 0 ) {
332- if (wait_time_left >= m_inference_config.m_max_inference_time ) {
333- inference_time_left -= m_inference_config.m_max_inference_time ;
334- already_inferred += m_inference_config.m_num_parallel_processors ;
335- wait_time_left -= m_inference_config.m_max_inference_time ;
336- } else {
337+ if (inference_time_left > wait_time) {
338+ if (host_buffer_time_int > 0 ) {
339+ inference_time_left -= host_buffer_time_int;
337340 inference_caused_latency += host_buffer_size_int;
338- if (host_buffer_time_int > 0 ) {
339- inference_time_left -= host_buffer_time_int;
340- } else {
341- inference_caused_latency += 1 ;
342- break ;
343- }
341+ } else {
342+ inference_caused_latency += 1 ;
344343 }
345344 }
346345
0 commit comments