NVIDIA · farazkh80 · Nov 19, 2025 · Nov 14, 2025 · Nov 18, 2025 · Nov 18, 2025
@@ -75,9 +75,9 @@ def load_weight_shard(
         # For integrated GPU systems (e.g., DGX Spark), CPU and GPU share limited physical memory.
         # Avoiding device transfers reduces memory consumption and unnecessary data copies,
         # enabling support for larger models on memory-constrained systems.
-        logger.warning(
-            f"[load_weight_shard] Skipping device transfer from {weight.device} to {device} on integrated GPU to conserve shared memory."
-        )
+        logger.warning_once(
+            f"[load_weight_shard] Skipping device transfer from {weight.device} to {device} on integrated GPU to conserve shared memory.",
+            key="load_weight_shard_skip_device_transfer_with_integrated_gpu")
         device = weight.device
     if isinstance(weight, torch.Tensor):
         tensor_shape = weight.shape