[None][fix] Reduce load_weight_shard warning message on integrated systems to only log once.

SimengLiu-nv · farazkh80 · commit f12bb5fa4775 · 2025-11-18T18:04:34.000Z
Signed-off-by: Simeng Liu &lt;109828133+SimengLiu-nv@users.noreply.github.com&gt;
Signed-off-by: Faraz Khoubsirat &lt;58580514+farazkh80@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/modules/linear.py b/tensorrt_llm/_torch/modules/linear.py
@@ -75,9 +75,9 @@ def load_weight_shard(
         # For integrated GPU systems (e.g., DGX Spark), CPU and GPU share limited physical memory.
         # Avoiding device transfers reduces memory consumption and unnecessary data copies,
         # enabling support for larger models on memory-constrained systems.
-        logger.debug(
-            f"[load_weight_shard] Skipping device transfer from {weight.device} to {device} on integrated GPU to conserve shared memory."
-        )
+        logger.warning_once(
+            f"[load_weight_shard] Skipping device transfer from {weight.device} to {device} on integrated GPU to conserve shared memory.",
+            key="load_weight_shard_skip_device_transfer_with_integrated_gpu")
         device = weight.device
     if isinstance(weight, torch.Tensor):
         tensor_shape = weight.shape