Skip to content

Commit 963ad46

Browse files
achartierdominicshanshan
authored andcommitted
[https://nvbugs/5404000][fix] Ensure consistency between firstTokenTime and lastTokenTime (NVIDIA#8294)
Signed-off-by: Aurelien Chartier <[email protected]>
1 parent ef9a3df commit 963ad46

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

cpp/include/tensorrt_llm/batch_manager/llmRequest.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1828,9 +1828,10 @@ class GenericLlmRequest
18281828

18291829
void updatePerfMetrics(executor::IterationType iter)
18301830
{
1831+
auto const currentTokenTime = getSteadyClockNow();
1832+
18311833
if (!mPerfMetrics.firstIter)
18321834
{
1833-
auto const currentTokenTime = getSteadyClockNow();
18341835
mPerfMetrics.firstIter = iter;
18351836
mPerfMetrics.timingMetrics.firstTokenTime = currentTokenTime;
18361837
}
@@ -1839,7 +1840,6 @@ class GenericLlmRequest
18391840

18401841
if (isFinished())
18411842
{
1842-
auto const currentTokenTime = getSteadyClockNow();
18431843
mPerfMetrics.lastIter = iter;
18441844
mPerfMetrics.timingMetrics.lastTokenTime = currentTokenTime;
18451845
}

0 commit comments

Comments
 (0)