@@ -367,26 +367,39 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
367
367
continue
368
368
}
369
369
370
- // Check whether container is present in state, there may be 3 reasons why it's not present:
371
- // - policy does not want to track the container
372
- // - kubelet has just been restarted - and there is no previous state file
373
- // - container has been removed from state by RemoveContainer call (DeletionTimestamp is set)
374
- if _ , ok := m .state .GetCPUSet (string (pod .UID ), container .Name ); ! ok {
375
- if pstatus .Phase == v1 .PodRunning && pod .DeletionTimestamp == nil {
376
- klog .V (4 ).Infof ("[cpumanager] reconcileState: container is not present in state - trying to add (pod: %s, container: %s, container id: %s)" , pod .Name , container .Name , containerID )
377
- err := m .AddContainer (pod , & container , containerID )
370
+ cstatus , err := findContainerStatusByName (& pstatus , container .Name )
371
+ if err != nil {
372
+ klog .Warningf ("[cpumanager] reconcileState: skipping container; container status not found in pod status (pod: %s, container: %s, error: %v)" , pod .Name , container .Name , err )
373
+ failure = append (failure , reconciledContainer {pod .Name , container .Name , "" })
374
+ continue
375
+ }
376
+
377
+ if cstatus .State .Waiting != nil ||
378
+ (cstatus .State .Waiting == nil && cstatus .State .Running == nil && cstatus .State .Terminated == nil ) {
379
+ klog .Warningf ("[cpumanager] reconcileState: skipping container; container still in the waiting state (pod: %s, container: %s)" , pod .Name , container .Name )
380
+ failure = append (failure , reconciledContainer {pod .Name , container .Name , "" })
381
+ continue
382
+ }
383
+
384
+ if cstatus .State .Terminated != nil {
385
+ // Since the container is terminated, we know it is safe to
386
+ // remove it without any reconciliation. Removing the container
387
+ // will also remove it from the `containerMap` so that this
388
+ // container will be skipped next time around the loop.
389
+ _ , _ , err := m .containerMap .GetContainerRef (containerID )
390
+ if err == nil {
391
+ klog .Warningf ("[cpumanager] reconcileState: skipping container; already terminated (pod: %s, container id: %s)" , pod .Name , containerID )
392
+ err := m .RemoveContainer (containerID )
378
393
if err != nil {
379
- klog .Errorf ("[cpumanager] reconcileState: failed to add container (pod: %s, container: %s, container id: %s, error: %v)" , pod . Name , container .Name , containerID , err )
394
+ klog .Errorf ("[cpumanager] reconcileState: failed to remove container (pod: %s, container id: %s, error: %v)" , pod .Name , containerID , err )
380
395
failure = append (failure , reconciledContainer {pod .Name , container .Name , containerID })
381
- continue
382
396
}
383
- } else {
384
- // if DeletionTimestamp is set, pod has already been removed from state
385
- // skip the pod/container since it's not running and will be deleted soon
386
- continue
387
397
}
398
+ continue
388
399
}
389
400
401
+ m .containerMap .Add (string (pod .UID ), container .Name , containerID )
402
+
390
403
cset := m .state .GetCPUSetOrDefault (string (pod .UID ), container .Name )
391
404
if cset .IsEmpty () {
392
405
// NOTE: This should not happen outside of tests.
@@ -424,6 +437,15 @@ func findContainerIDByName(status *v1.PodStatus, name string) (string, error) {
424
437
return "" , fmt .Errorf ("unable to find ID for container with name %v in pod status (it may not be running)" , name )
425
438
}
426
439
440
+ func findContainerStatusByName (status * v1.PodStatus , name string ) (* v1.ContainerStatus , error ) {
441
+ for _ , status := range append (status .InitContainerStatuses , status .ContainerStatuses ... ) {
442
+ if status .Name == name {
443
+ return & status , nil
444
+ }
445
+ }
446
+ return nil , fmt .Errorf ("unable to find status for container with name %v in pod status (it may not be running)" , name )
447
+ }
448
+
427
449
func (m * manager ) updateContainerCPUSet (containerID string , cpus cpuset.CPUSet ) error {
428
450
// TODO: Consider adding a `ResourceConfigForContainer` helper in
429
451
// helpers_linux.go similar to what exists for pods.
0 commit comments