Add timer

kwen2501 · kwen2501 · commit 5951e393b97e · 2024-09-22T23:25:25.000-07:00
diff --git a/dist_run.py b/dist_run.py
@@ -394,7 +394,6 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
     s = set(prompt_lengths)
     assert len(s) == 1, f"prompt_lengths should be the same, got {s}"
 
-    # with CUDATrackTime() as timer:
     # Need these global ids due to the API definition of dist.send and recv
     first_pp_rank_global_id = dist.get_global_rank(pp_group, first_pp_rank)
     last_pp_rank_global_id = dist.get_global_rank(pp_group, last_pp_rank)
@@ -411,14 +410,18 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
     # TODO: we need to pass `input_pos` and `cache_lane` to each stage.
     lane = 0
     kwargs = {"input_pos": input_pos, "cache_lane": lane}
-    with torch.no_grad():
+    with torch.no_grad(), CUDATrackTime() as timer:
         if pp_rank == first_pp_rank:
             output = prefiller.step(padded_sequence, **kwargs)
         elif pp_rank == last_pp_rank:
             output = prefiller.step(**kwargs)
         else:  # middle pp ranks
             prefiller.step(**kwargs)
 
+    logger.info(
+        f"{color.green}Prefilling time: {timer.get_time()} {timer.unit} for rank {rank}{color.reset}"
+    )
+
     # Decode the output -- first generated token
     if pp_rank == last_pp_rank:
         decode_results = _batch_decode_next_tokens(
@@ -456,7 +459,7 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
     decorder = ScheduleGPipe(decode_stage, mbs)
 
     # Decoding
-    with torch.no_grad():
+    with torch.no_grad(), CUDATrackTime() as timer:
         for step in range(num_tokens - 1):
             kwargs = {"input_pos": input_pos, "cache_lane": lane}
             # sendrecv between last and first ranks, only if:
@@ -501,6 +504,10 @@ def get_example_ins_outs(seqlen: int) -> Tuple[torch.Tensor, torch.Tensor]:
 
             input_pos += 1
 
+    logger.info(
+        f"{color.green}Decoding time: {timer.get_time()} {timer.unit} for rank {rank}{color.reset}"
+    )
+
     # Display the decoding results
 
     # output formatted response via last pp group and tp rank 0