Skip to content

Commit b9e5315

Browse files
authored
[https://nvbugs/5623960][fix] Fix the logger once key issue and further compress log in AutoTuner. (#8873)
Signed-off-by: Yukun He <[email protected]>
1 parent 3111682 commit b9e5315

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

tensorrt_llm/_torch/autotuner.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ def choose_one(
711711
if not is_cache_hit:
712712
logger.warning_once(
713713
f"[AutoTunner] Using the fallback tactic, due to cache miss on input shapes={input_shapes}",
714-
key=custom_op)
714+
key=(custom_op, "warning_autotuning_cache_miss_fallback"))
715715

716716
return (best_runner, best_tactic)
717717

@@ -755,7 +755,7 @@ def choose_one(
755755
f"At least one valid (runner, tactic) pair is required. "
756756
f"If get_valid_tactics is intended to return empty list, please ensure that this profile is not valid for the custom_op "
757757
f"and should not occurs during the inference stage, or fallback tactic is implemented. Otherwise, the the tuning process will crash.",
758-
key=custom_op,
758+
key=(custom_op, "warning_autotuning_no_valid_tactic"),
759759
)
760760
new_tuning_failure_occured = new_tuning_failure_occured or has_tuning_failure_occured
761761

@@ -767,7 +767,7 @@ def choose_one(
767767
f"This will not block the tuning process. "
768768
f"Please set TLLM_LOG_LEVEL=WARNING to find out when the tactic profiling fails. "
769769
f"Set TLLM_LOG_LEVEL=DEBUG to get more details of the failures.",
770-
key=custom_op,
770+
key=(custom_op, "warning_autotuning_tuning_error_summary"),
771771
)
772772

773773
# Get the best runner and tactic from cache
@@ -815,10 +815,14 @@ def _profile_runners(
815815
except Exception as e:
816816
# Handle None tensors for optional inputs
817817
shapes = self._get_input_sizes(input_tensors)
818-
logger.warning(
819-
f"[Autotuner] Failed when profiling runner={runner}, tactic={tac}, shapes={shapes}. Set TLLM_LOG_LEVEL=DEBUG for more details."
818+
logger.warning_once(
819+
f"[Autotuner] Failed when profiling runner={runner}, tactic={tac}, shapes={shapes}. Set TLLM_LOG_LEVEL=DEBUG for more details.",
820+
key=(custom_op, "warning_autotuning_profile_failure"),
821+
)
822+
logger.debug_once(
823+
f"[Autotuner] Exception captured: {e}",
824+
key=(custom_op, "debug_autotuning_exception"),
820825
)
821-
logger.debug(f"[Autotuner] Exception captured: {e}")
822826

823827
# Record the failed profiling combinations
824828
if custom_op not in self.stats.failed_profiling_count:

0 commit comments

Comments
 (0)