fix: reduce preemption log verbosity to V(4) to avoid log flooding

Code2Life · claude · Code2Life · commit 0f532afef0c7 · 2026-02-11T16:04:12.000+08:00
During preemption, N candidate nodes × M victims triggers massive logging
from validatePreemption, FilterWithPreempt, and queueingHint paths.
Changed all preemption DBG/validation Info-level logs to V(4) to keep
production logs clean while preserving debuggability with -v=4.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
@@ -240,7 +240,7 @@ func (s *GpuAllocator) FilterWithPreempt(
 	// Use a temporary map to accumulate releases for the same GPU (multiple victims on same GPU)
 	gpuReleasedMap := make(map[string]*tfv1.GPU)
 
-	log.FromContext(s.ctx).Info("[PREEMPT-DBG] FilterWithPreempt: starting to simulate release",
+	log.FromContext(s.ctx).V(4).Info("[PREEMPT-DBG] FilterWithPreempt: starting to simulate release",
 		"preemptAllocRequestsCount", len(preemptAllocRequests))
 
 	for i, preemptAllocRequest := range preemptAllocRequests {
@@ -285,7 +285,7 @@ func (s *GpuAllocator) FilterWithPreempt(
 			gpuCopy.Status.Available.Vram.Add(preemptAllocRequest.Request.Vram)
 
 			// Log GPU state after simulated release
-			log.FromContext(s.ctx).Info("[PREEMPT-DBG] Simulated release on GPU",
+			log.FromContext(s.ctx).V(4).Info("[PREEMPT-DBG] Simulated release on GPU",
 				"gpu", gpuName,
 				"node", gpuCopy.Status.NodeSelector[constants.KubernetesHostNameLabel],
 				"victimWorkload", preemptAllocRequest.WorkloadNameNamespace.Name,
@@ -369,7 +369,7 @@ func (s *GpuAllocator) FilterWithPreempt(
 		return nil, nil, fmt.Errorf("apply filters: %w", err)
 	}
 
-	log.FromContext(s.ctx).Info("[PREEMPT-DBG] FilterWithPreempt: filter results",
+	log.FromContext(s.ctx).V(4).Info("[PREEMPT-DBG] FilterWithPreempt: filter results",
 		"filteredGPUsCount", len(filteredGPUs),
 		"toFilterGPUsCount", len(toFilterGPUs),
 		"filterDetailsCount", len(filterDetails))
@@ -1557,7 +1557,7 @@ func (s *GpuAllocator) CheckQuotaAndFilterSingleNodePreempt(
 		return err
 	}
 	if len(filteredGPUs) < int(allocReq.Count) {
-		log.FromContext(s.ctx).Info("[PREEMPT] not enough GPUs after filter during preempt",
+		log.FromContext(s.ctx).V(4).Info("[PREEMPT] not enough GPUs after filter during preempt",
 			"node", nodeName,
 			"requiredGPUs", allocReq.Count,
 			"filteredGPUsCount", len(filteredGPUs),
diff --git a/internal/scheduler/gpuresources/gpuresources.go b/internal/scheduler/gpuresources/gpuresources.go
@@ -465,7 +465,7 @@ func (s *GPUFit) checkNominatedPodsGPUReservation(pod *v1.Pod, nodeName string,
 	// Check if remaining resources are sufficient
 	if remainingTflops.Cmp(currentTflopsTotal) < 0 ||
 		remainingVram.Cmp(currentVramTotal) < 0 {
-		s.logger.Info("Insufficient GPU resources after reserving for nominated pods",
+		s.logger.V(4).Info("Insufficient GPU resources after reserving for nominated pods",
 			"currentPod", pod.Name,
 			"node", nodeName,
 			"currentPriority", currentPodPriority,
@@ -783,7 +783,7 @@ func (s *GPUFit) queueingHint(logger klog.Logger, pod *v1.Pod, oldObj, newObj in
 	if pod.Status.NominatedNodeName != "" && newGPU != nil {
 		gpuNodeName := newGPU.Status.NodeSelector[constants.KubernetesHostNameLabel]
 		if gpuNodeName == pod.Status.NominatedNodeName {
-			logger.Info("GPU CR updated on nominated node, immediately requeue preempting pod",
+			logger.V(4).Info("GPU CR updated on nominated node, immediately requeue preempting pod",
 				"pod", klog.KObj(pod),
 				"nominatedNode", pod.Status.NominatedNodeName,
 				"gpu", newGPU.Name)
@@ -903,7 +903,7 @@ func (s *GPUFit) validatePreemption(state fwk.CycleState, pod *v1.Pod, nodeInfo
 	// - Checks quota constraints
 	err = s.allocator.CheckQuotaAndFilterSingleNodePreempt(nodeName, allocReq, victims)
 	if err != nil {
-		s.logger.Info("GPU preemption validation failed",
+		s.logger.V(4).Info("GPU preemption validation failed",
 			"pod", pod.Name,
 			"namespace", pod.Namespace,
 			"node", nodeName,
@@ -916,7 +916,7 @@ func (s *GPUFit) validatePreemption(state fwk.CycleState, pod *v1.Pod, nodeInfo
 
 	// Preemption validated successfully
 	victimList := victims.UnsortedList()
-	s.logger.Info("GPU preemption validated successfully",
+	s.logger.V(4).Info("GPU preemption validated successfully",
 		"pod", pod.Name,
 		"namespace", pod.Namespace,
 		"node", nodeName,