[https://nvbugs/5623960][fix] Fix the logger once key issue and further compress log in AutoTuner. (#8873)

hyukn · web-flow · commit b9e5315dfb4a · 2025-11-05T15:25:43.000+08:00
Signed-off-by: Yukun He &lt;23156053+hyukn@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/autotuner.py b/tensorrt_llm/_torch/autotuner.py
@@ -711,7 +711,7 @@ def choose_one(
             if not is_cache_hit:
                 logger.warning_once(
                     f"[AutoTunner] Using the fallback tactic, due to cache miss on input shapes={input_shapes}",
-                    key=custom_op)
+                    key=(custom_op, "warning_autotuning_cache_miss_fallback"))
 
             return (best_runner, best_tactic)
 
@@ -755,7 +755,7 @@ def choose_one(
                         f"At least one valid (runner, tactic) pair is required. "
                         f"If get_valid_tactics is intended to return empty list, please ensure that this profile is not valid for the custom_op "
                         f"and should not occurs during the inference stage, or fallback tactic is implemented. Otherwise, the the tuning process will crash.",
-                        key=custom_op,
+                        key=(custom_op, "warning_autotuning_no_valid_tactic"),
                     )
                 new_tuning_failure_occured = new_tuning_failure_occured or has_tuning_failure_occured
 
@@ -767,7 +767,7 @@ def choose_one(
                 f"This will not block the tuning process. "
                 f"Please set TLLM_LOG_LEVEL=WARNING to find out when the tactic profiling fails. "
                 f"Set TLLM_LOG_LEVEL=DEBUG to get more details of the failures.",
-                key=custom_op,
+                key=(custom_op, "warning_autotuning_tuning_error_summary"),
             )
 
         # Get the best runner and tactic from cache
@@ -815,10 +815,14 @@ def _profile_runners(
                 except Exception as e:
                     # Handle None tensors for optional inputs
                     shapes = self._get_input_sizes(input_tensors)
-                    logger.warning(
-                        f"[Autotuner] Failed when profiling runner={runner}, tactic={tac}, shapes={shapes}. Set TLLM_LOG_LEVEL=DEBUG for more details."
+                    logger.warning_once(
+                        f"[Autotuner] Failed when profiling runner={runner}, tactic={tac}, shapes={shapes}. Set TLLM_LOG_LEVEL=DEBUG for more details.",
+                        key=(custom_op, "warning_autotuning_profile_failure"),
+                    )
+                    logger.debug_once(
+                        f"[Autotuner] Exception captured: {e}",
+                        key=(custom_op, "debug_autotuning_exception"),
                     )
-                    logger.debug(f"[Autotuner] Exception captured: {e}")
 
                     # Record the failed profiling combinations
                     if custom_op not in self.stats.failed_profiling_count: