Skip to content

Commit cc68c34

Browse files
authored
feat: [NPM] Support graceful shutdown in pod (#1083)
* Support graceful shutdown in pod * Update detailed comments and cleaning up codes * Add Unit tests * Address lint errors * Address comments * Addressed comment and add UTs
1 parent 08df7f7 commit cc68c34

File tree

4 files changed

+301
-19
lines changed

4 files changed

+301
-19
lines changed

npm/pkg/controlplane/controllers/v1/podController.go

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,18 @@ func (nPod *NpmPod) updateNpmPodAttributes(podObj *corev1.Pod) {
8282
}
8383
}
8484

85+
// noUpdate evaluates whether NpmPod is required to be update given podObj.
86+
func (nPod *NpmPod) noUpdate(podObj *corev1.Pod) bool {
87+
return nPod.Namespace == podObj.ObjectMeta.Namespace &&
88+
nPod.Name == podObj.ObjectMeta.Name &&
89+
nPod.Phase == podObj.Status.Phase &&
90+
nPod.PodIP == podObj.Status.PodIP &&
91+
util.IsSameLabels(nPod.Labels, podObj.ObjectMeta.Labels) &&
92+
// TODO(jungukcho) to avoid using DeepEqual for ContainerPorts,
93+
// it needs a precise sorting. Will optimize it later if needed.
94+
reflect.DeepEqual(nPod.ContainerPorts, getContainerPortList(podObj))
95+
}
96+
8597
type PodController struct {
8698
podLister corelisters.PodLister
8799
workqueue workqueue.RateLimitingInterface
@@ -168,7 +180,8 @@ func (c *PodController) addPod(obj interface{}) {
168180
}
169181
podObj, _ := obj.(*corev1.Pod)
170182

171-
// If newPodObj status is either corev1.PodSucceeded or corev1.PodFailed or DeletionTimestamp is set, do not need to add it into workqueue.
183+
// To check whether this pod is needed to queue or not.
184+
// If the pod are in completely terminated states, the pod is not enqueued to avoid unnecessary computation.
172185
if isCompletePod(podObj) {
173186
return
174187
}
@@ -324,7 +337,9 @@ func (c *PodController) syncPod(key string) error {
324337
return err
325338
}
326339

327-
// If newPodObj status is either corev1.PodSucceeded or corev1.PodFailed or DeletionTimestamp is set, start clean-up the lastly applied states.
340+
// if this pod is completely in terminated states (which means pod is gracefully shutdown),
341+
// NPM starts clean-up the lastly applied states even in update events.
342+
// This proactive clean-up helps to miss stale pod object in case delete event is missed.
328343
if isCompletePod(pod) {
329344
if err = c.cleanUpDeletedPod(key); err != nil {
330345
return fmt.Errorf("Error: %v when when pod is in completed state.\n", err)
@@ -337,7 +352,7 @@ func (c *PodController) syncPod(key string) error {
337352
// if pod does not have different states against lastly applied states stored in cachedNpmPod,
338353
// podController does not need to reconcile this update.
339354
// in this updatePod event, newPod was updated with states which PodController does not need to reconcile.
340-
if isInvalidPodUpdate(cachedNpmPod, pod) {
355+
if cachedNpmPod.noUpdate(pod) {
341356
return nil
342357
}
343358
}
@@ -590,13 +605,20 @@ func (c *PodController) manageNamedPortIpsets(portList []corev1.ContainerPort, p
590605
return nil
591606
}
592607

608+
// isCompletePod evaluates whether this pod is completely in terminated states,
609+
// which means pod is gracefully shutdown.
593610
func isCompletePod(podObj *corev1.Pod) bool {
594-
if podObj.DeletionTimestamp != nil {
611+
// DeletionTimestamp and DeletionGracePeriodSeconds in pod are not nil,
612+
// which means pod is expected to be deleted and
613+
// DeletionGracePeriodSeconds value is zero, which means the pod is gracefully terminated.
614+
if podObj.DeletionTimestamp != nil && podObj.DeletionGracePeriodSeconds != nil && *podObj.DeletionGracePeriodSeconds == 0 {
595615
return true
596616
}
597617

598-
// K8s categorizes Succeeded and Failed pods as a terminated pod and will not restart them
618+
// K8s categorizes Succeeded and Failed pods as a terminated pod and will not restart them.
599619
// So NPM will ignorer adding these pods
620+
// TODO(jungukcho): what are the values of DeletionTimestamp and podObj.DeletionGracePeriodSeconds
621+
// in either below status?
600622
if podObj.Status.Phase == corev1.PodSucceeded || podObj.Status.Phase == corev1.PodFailed {
601623
return true
602624
}
@@ -618,15 +640,3 @@ func getContainerPortList(podObj *corev1.Pod) []corev1.ContainerPort {
618640
}
619641
return portList
620642
}
621-
622-
// (TODO): better naming?
623-
func isInvalidPodUpdate(npmPod *NpmPod, newPodObj *corev1.Pod) bool {
624-
return npmPod.Namespace == newPodObj.ObjectMeta.Namespace &&
625-
npmPod.Name == newPodObj.ObjectMeta.Name &&
626-
npmPod.Phase == newPodObj.Status.Phase &&
627-
npmPod.PodIP == newPodObj.Status.PodIP &&
628-
newPodObj.ObjectMeta.DeletionTimestamp == nil &&
629-
newPodObj.ObjectMeta.DeletionGracePeriodSeconds == nil &&
630-
reflect.DeepEqual(npmPod.Labels, newPodObj.ObjectMeta.Labels) &&
631-
reflect.DeepEqual(npmPod.ContainerPorts, getContainerPortList(newPodObj))
632-
}

npm/pkg/controlplane/controllers/v1/podController_test.go

Lines changed: 153 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/Azure/azure-container-networking/npm/util"
1313
testutils "github.com/Azure/azure-container-networking/test/utils"
1414
"github.com/stretchr/testify/assert"
15+
"github.com/stretchr/testify/require"
1516
corev1 "k8s.io/api/core/v1"
1617
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1718
"k8s.io/apimachinery/pkg/runtime"
@@ -86,7 +87,7 @@ func (f *podFixture) newPodController(stopCh chan struct{}) {
8687
// f.kubeInformer.Start(stopCh)
8788
}
8889

89-
func createPod(name, ns, rv, podIP string, labels map[string]string, isHostNewtwork bool, podPhase corev1.PodPhase) *corev1.Pod {
90+
func createPod(name, ns, rv, podIP string, labels map[string]string, isHostNetwork bool, podPhase corev1.PodPhase) *corev1.Pod {
9091
return &corev1.Pod{
9192
ObjectMeta: metav1.ObjectMeta{
9293
Name: name,
@@ -95,7 +96,7 @@ func createPod(name, ns, rv, podIP string, labels map[string]string, isHostNewtw
9596
ResourceVersion: rv,
9697
},
9798
Spec: corev1.PodSpec{
98-
HostNetwork: isHostNewtwork,
99+
HostNetwork: isHostNetwork,
99100
Containers: []corev1.Container{
100101
{
101102
Ports: []corev1.ContainerPort{
@@ -690,6 +691,88 @@ func TestHasValidPodIP(t *testing.T) {
690691
}
691692
}
692693

694+
func TestIsCompletePod(t *testing.T) {
695+
var zeroGracePeriod int64
696+
var defaultGracePeriod int64 = 30
697+
698+
type podState struct {
699+
phase corev1.PodPhase
700+
deletionTimestamp *metav1.Time
701+
deletionGracePeriodSeconds *int64
702+
}
703+
704+
tests := []struct {
705+
name string
706+
podState podState
707+
expectedCompletedPod bool
708+
}{
709+
710+
{
711+
name: "pod is in running status",
712+
podState: podState{
713+
phase: corev1.PodRunning,
714+
deletionTimestamp: nil,
715+
deletionGracePeriodSeconds: nil,
716+
},
717+
expectedCompletedPod: false,
718+
},
719+
{
720+
name: "pod is in completely terminating states after graceful shutdown period",
721+
podState: podState{
722+
phase: corev1.PodRunning,
723+
deletionTimestamp: &metav1.Time{},
724+
deletionGracePeriodSeconds: &zeroGracePeriod,
725+
},
726+
expectedCompletedPod: true,
727+
},
728+
{
729+
name: "pod is in terminating states, but in graceful shutdown period",
730+
podState: podState{
731+
phase: corev1.PodRunning,
732+
deletionTimestamp: &metav1.Time{},
733+
deletionGracePeriodSeconds: &defaultGracePeriod,
734+
},
735+
expectedCompletedPod: false,
736+
},
737+
{
738+
name: "pod is in PodSucceeded status",
739+
podState: podState{
740+
phase: corev1.PodSucceeded,
741+
deletionTimestamp: nil,
742+
deletionGracePeriodSeconds: nil,
743+
},
744+
expectedCompletedPod: true,
745+
},
746+
{
747+
name: "pod is in PodFailed status",
748+
podState: podState{
749+
phase: corev1.PodSucceeded,
750+
deletionTimestamp: nil,
751+
deletionGracePeriodSeconds: nil,
752+
},
753+
expectedCompletedPod: true,
754+
},
755+
}
756+
757+
for _, tt := range tests {
758+
tt := tt
759+
t.Run(tt.name, func(t *testing.T) {
760+
t.Parallel()
761+
corev1Pod := &corev1.Pod{
762+
ObjectMeta: metav1.ObjectMeta{
763+
DeletionTimestamp: tt.podState.deletionTimestamp,
764+
DeletionGracePeriodSeconds: tt.podState.deletionGracePeriodSeconds,
765+
},
766+
Status: corev1.PodStatus{
767+
Phase: tt.podState.phase,
768+
},
769+
}
770+
isPodCompleted := isCompletePod(corev1Pod)
771+
require.Equal(t, tt.expectedCompletedPod, isPodCompleted)
772+
})
773+
}
774+
}
775+
693776
// Extra unit test which is not quite related to PodController,
694777
// but help to understand how workqueue works to make event handler logic lock-free.
695778
// If the same key are queued into workqueue in multiple times,
@@ -721,3 +804,71 @@ func TestWorkQueue(t *testing.T) {
721804
}
722805
}
723806
}
807+
808+
func TestNPMPodNoUpdate(t *testing.T) {
809+
type podInfo struct {
810+
podName string
811+
ns string
812+
rv string
813+
podIP string
814+
labels map[string]string
815+
isHostNetwork bool
816+
podPhase corev1.PodPhase
817+
}
818+
819+
labels := map[string]string{
820+
"app": "test-pod",
821+
}
822+
823+
tests := []struct {
824+
name string
825+
podInfo
826+
updatingNPMPod bool
827+
expectedNoUpdate bool
828+
}{
829+
{
830+
"Required update of NPMPod given Pod",
831+
podInfo{
832+
podName: "test-pod-1",
833+
ns: "test-namespace",
834+
rv: "0",
835+
podIP: "1.2.3.4",
836+
labels: labels,
837+
isHostNetwork: NonHostNetwork,
838+
podPhase: corev1.PodRunning,
839+
},
840+
false,
841+
false,
842+
},
843+
{
844+
"No required update of NPMPod given Pod",
845+
podInfo{
846+
podName: "test-pod-2",
847+
ns: "test-namespace",
848+
rv: "0",
849+
podIP: "1.2.3.4",
850+
labels: labels,
851+
isHostNetwork: NonHostNetwork,
852+
podPhase: corev1.PodRunning,
853+
},
854+
true,
855+
true,
856+
},
857+
}
858+
859+
for _, tt := range tests {
860+
tt := tt
861+
t.Run(tt.name, func(t *testing.T) {
862+
t.Parallel()
863+
corev1Pod := createPod(tt.podName, tt.ns, tt.rv, tt.podIP, tt.labels, tt.isHostNetwork, tt.podPhase)
864+
npmPod := newNpmPod(corev1Pod)
865+
if tt.updatingNPMPod {
866+
npmPod.appendLabels(corev1Pod.Labels, AppendToExistingLabels)
867+
npmPod.updateNpmPodAttributes(corev1Pod)
868+
npmPod.appendContainerPorts(corev1Pod)
869+
}
870+
noUpdate := npmPod.noUpdate(corev1Pod)
871+
require.Equal(t, tt.expectedNoUpdate, noUpdate)
872+
})
873+
}
874+
}

npm/util/util.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,3 +338,18 @@ func CompareSlices(list1, list2 []string) bool {
338338
func SliceToString(list []string) string {
339339
return strings.Join(list, SetPolicyDelimiter)
340340
}
341+
342+
// IsSameLabels return if all pairs of key and value in two maps are same.
343+
// Otherwise, it returns false.
344+
func IsSameLabels(labelA, labelB map[string]string) bool {
345+
if len(labelA) != len(labelB) {
346+
return false
347+
}
348+
349+
for labelKey, labelVal := range labelA {
350+
if val, exist := labelB[labelKey]; !exist || labelVal != val {
351+
return false
352+
}
353+
}
354+
return true
355+
}

0 commit comments

Comments
 (0)