@@ -21,13 +21,20 @@ import (
21
21
"fmt"
22
22
"net"
23
23
"strconv"
24
+ "strings"
24
25
"time"
25
26
26
27
"github.com/onsi/ginkgo/v2"
28
+ v1 "k8s.io/api/core/v1"
27
29
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30
+ types "k8s.io/apimachinery/pkg/types"
31
+ "k8s.io/apimachinery/pkg/util/intstr"
32
+ "k8s.io/apimachinery/pkg/util/wait"
28
33
clientset "k8s.io/client-go/kubernetes"
29
34
"k8s.io/kubernetes/test/e2e/framework"
30
35
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
36
+ e2eoutput "k8s.io/kubernetes/test/e2e/framework/pod/output"
37
+ e2eservice "k8s.io/kubernetes/test/e2e/framework/service"
31
38
"k8s.io/kubernetes/test/e2e/network/common"
32
39
admissionapi "k8s.io/pod-security-admission/api"
33
40
)
@@ -98,4 +105,234 @@ var _ = common.SIGDescribe("Connectivity Pod Lifecycle", func() {
98
105
execHostnameTest (* pausePod , podIPAddress , webserverPod .Name )
99
106
})
100
107
108
+ ginkgo .It ("should be able to have zero downtime on a Blue Green deployment using Services and Readiness Gates" , func (ctx context.Context ) {
109
+ readinessGate := "k8s.io/blue-green"
110
+ patchStatusFmt := `{"status":{"conditions":[{"type":%q, "status":%q}]}}`
111
+
112
+ serviceName := "blue-green-svc"
113
+ blueGreenJig := e2eservice .NewTestJig (cs , ns , serviceName )
114
+ ginkgo .By ("creating a service " + serviceName + " with type=ClusterIP in " + ns )
115
+ blueGreenService , err := blueGreenJig .CreateTCPService (ctx , func (svc * v1.Service ) {
116
+ svc .Spec .Type = v1 .ServiceTypeClusterIP
117
+ svc .Spec .Ports = []v1.ServicePort {
118
+ {Port : 80 , Name : "http" , Protocol : v1 .ProtocolTCP , TargetPort : intstr .FromInt32 (80 )},
119
+ }
120
+ })
121
+ framework .ExpectNoError (err )
122
+
123
+ ginkgo .By ("Creating 2 webserver pod (green and blue) able to serve traffic during the grace period of 30 seconds" )
124
+ gracePeriod := int64 (30 )
125
+ bluePod := e2epod .NewAgnhostPod (ns , "blue-pod" , nil , nil , nil , "netexec" , "--http-port=80" , fmt .Sprintf ("--delay-shutdown=%d" , gracePeriod ))
126
+ bluePod .Labels = blueGreenService .Labels
127
+ bluePod .Spec .ReadinessGates = []v1.PodReadinessGate {
128
+ {ConditionType : v1 .PodConditionType (readinessGate )},
129
+ }
130
+ podClient .Create (ctx , bluePod )
131
+ err = e2epod .WaitForPodNameRunningInNamespace (ctx , cs , bluePod .Name , ns )
132
+ if err != nil {
133
+ framework .Failf ("waiting for pod %s : %v" , bluePod .Name , err )
134
+ }
135
+ if podClient .PodIsReady (ctx , bluePod .Name ) {
136
+ framework .Failf ("Expect pod(%s/%s)'s Ready condition to be false initially." , ns , bluePod .Name )
137
+ }
138
+
139
+ greenPod := e2epod .NewAgnhostPod (ns , "green-pod" , nil , nil , nil , "netexec" , "--http-port=80" , fmt .Sprintf ("--delay-shutdown=%d" , gracePeriod ))
140
+ greenPod .Labels = blueGreenService .Labels
141
+ greenPod .Spec .ReadinessGates = []v1.PodReadinessGate {
142
+ {ConditionType : v1 .PodConditionType (readinessGate )},
143
+ }
144
+ podClient .Create (ctx , greenPod )
145
+ err = e2epod .WaitForPodNameRunningInNamespace (ctx , cs , greenPod .Name , ns )
146
+ if err != nil {
147
+ framework .Failf ("waiting for pod %s : %v" , greenPod .Name , err )
148
+ }
149
+ if podClient .PodIsReady (ctx , greenPod .Name ) {
150
+ framework .Failf ("Expect pod(%s/%s)'s Ready condition to be false initially." , ns , greenPod .Name )
151
+ }
152
+
153
+ ginkgo .By ("Creating 1 client pod that will try to connect to the blue-green-svc" )
154
+ clientPod := e2epod .NewAgnhostPod (ns , "client-pod-1" , nil , nil , nil )
155
+ clientPod .Spec .TerminationGracePeriodSeconds = & gracePeriod
156
+ clientPod = podClient .CreateSync (ctx , clientPod )
157
+
158
+ ginkgo .By (fmt .Sprintf ("patching blue pod status with condition %q to true" , readinessGate ))
159
+ _ , err = podClient .Patch (ctx , bluePod .Name , types .StrategicMergePatchType , []byte (fmt .Sprintf (patchStatusFmt , readinessGate , "True" )), metav1.PatchOptions {}, "status" )
160
+ if err != nil {
161
+ framework .Failf ("failed to patch %s pod condition: %v" , bluePod .Name , err )
162
+ }
163
+
164
+ // Expect EndpointSlice resource to have the blue pod ready to serve traffic
165
+ if err := wait .PollUntilContextTimeout (ctx , 2 * time .Second , wait .ForeverTestTimeout , true , func (context.Context ) (bool , error ) {
166
+ endpointSliceList , err := cs .DiscoveryV1 ().EndpointSlices (blueGreenJig .Namespace ).List (ctx , metav1.ListOptions {
167
+ LabelSelector : "kubernetes.io/service-name=" + blueGreenJig .Name ,
168
+ })
169
+ if err != nil {
170
+ return false , err
171
+ }
172
+ for _ , slice := range endpointSliceList .Items {
173
+ for _ , ep := range slice .Endpoints {
174
+ if ep .TargetRef != nil &&
175
+ ep .TargetRef .Name == bluePod .Name &&
176
+ ep .TargetRef .Namespace == bluePod .Namespace &&
177
+ ep .Conditions .Ready != nil && * ep .Conditions .Ready {
178
+ return true , nil
179
+ }
180
+ }
181
+ }
182
+ return false , nil
183
+ }); err != nil {
184
+ framework .Failf ("No EndpointSlice found for Service %s/%s: %s" , blueGreenJig .Namespace , blueGreenJig .Name , err )
185
+ }
186
+
187
+ ginkgo .By ("Try to connect to the blue pod through the service" )
188
+ scvAddress := net .JoinHostPort (blueGreenService .Spec .ClusterIP , strconv .Itoa (80 ))
189
+ // assert 5 times that we can connect only to the blue pod
190
+ for i := 0 ; i < 5 ; i ++ {
191
+ err := wait .PollUntilContextTimeout (ctx , 3 * time .Second , 30 * time .Second , true , func (ctx context.Context ) (done bool , err error ) {
192
+ cmd := fmt .Sprintf (`curl -q -s --connect-timeout 5 %s/hostname` , scvAddress )
193
+ stdout , err := e2eoutput .RunHostCmd (clientPod .Namespace , clientPod .Name , cmd )
194
+ if err != nil {
195
+ framework .Logf ("expected error when trying to connect to cluster IP : %v" , err )
196
+ return false , nil
197
+ }
198
+ if strings .TrimSpace (stdout ) == "" {
199
+ framework .Logf ("got empty stdout, retry until timeout" )
200
+ return false , nil
201
+ }
202
+ // Ensure we're comparing hostnames and not FQDNs
203
+ targetHostname := strings .Split (bluePod .Name , "." )[0 ]
204
+ hostname := strings .TrimSpace (strings .Split (stdout , "." )[0 ])
205
+ if hostname != targetHostname {
206
+ return false , fmt .Errorf ("expecting hostname %s got %s" , targetHostname , hostname )
207
+ }
208
+ return true , nil
209
+ })
210
+ if err != nil {
211
+ framework .Failf ("can not connect to pod %s on address %s : %v" , bluePod .Name , scvAddress , err )
212
+ }
213
+ }
214
+
215
+ // Switch from blue to green
216
+ ginkgo .By (fmt .Sprintf ("patching green pod status with condition %q to true" , readinessGate ))
217
+ _ , err = podClient .Patch (ctx , greenPod .Name , types .StrategicMergePatchType , []byte (fmt .Sprintf (patchStatusFmt , readinessGate , "True" )), metav1.PatchOptions {}, "status" )
218
+ if err != nil {
219
+ framework .Failf ("failed to patch %s pod condition: %v" , greenPod .Name , err )
220
+ }
221
+
222
+ // Expect EndpointSlice resource to have the green pod ready to serve traffic
223
+ if err := wait .PollUntilContextTimeout (ctx , 2 * time .Second , wait .ForeverTestTimeout , true , func (context.Context ) (bool , error ) {
224
+ endpointSliceList , err := cs .DiscoveryV1 ().EndpointSlices (blueGreenJig .Namespace ).List (ctx , metav1.ListOptions {
225
+ LabelSelector : "kubernetes.io/service-name=" + blueGreenJig .Name ,
226
+ })
227
+ if err != nil {
228
+ return false , err
229
+ }
230
+ for _ , slice := range endpointSliceList .Items {
231
+ for _ , ep := range slice .Endpoints {
232
+ if ep .TargetRef != nil &&
233
+ ep .TargetRef .Name == greenPod .Name &&
234
+ ep .TargetRef .Namespace == greenPod .Namespace &&
235
+ ep .Conditions .Ready != nil && * ep .Conditions .Ready {
236
+ return true , nil
237
+ }
238
+ }
239
+ }
240
+ return false , nil
241
+ }); err != nil {
242
+ framework .Failf ("No EndpointSlice found for Service %s/%s: %s" , blueGreenJig .Namespace , blueGreenJig .Name , err )
243
+ }
244
+
245
+ ginkgo .By (fmt .Sprintf ("patching blue pod status with condition %q to false" , readinessGate ))
246
+ _ , err = podClient .Patch (ctx , bluePod .Name , types .StrategicMergePatchType , []byte (fmt .Sprintf (patchStatusFmt , readinessGate , "False" )), metav1.PatchOptions {}, "status" )
247
+ if err != nil {
248
+ framework .Failf ("failed to patch %s pod condition: %v" , bluePod .Name , err )
249
+ }
250
+
251
+ // Expect EndpointSlice resource to have the blue pod NOT ready to serve traffic
252
+ if err := wait .PollUntilContextTimeout (ctx , 2 * time .Second , wait .ForeverTestTimeout , true , func (context.Context ) (bool , error ) {
253
+ endpointSliceList , err := cs .DiscoveryV1 ().EndpointSlices (blueGreenJig .Namespace ).List (ctx , metav1.ListOptions {
254
+ LabelSelector : "kubernetes.io/service-name=" + blueGreenJig .Name ,
255
+ })
256
+ if err != nil {
257
+ return false , err
258
+ }
259
+ for _ , slice := range endpointSliceList .Items {
260
+ for _ , ep := range slice .Endpoints {
261
+ if ep .TargetRef != nil &&
262
+ ep .TargetRef .Name == bluePod .Name &&
263
+ ep .TargetRef .Namespace == bluePod .Namespace &&
264
+ ep .Conditions .Ready != nil && ! * ep .Conditions .Ready {
265
+ return true , nil
266
+ }
267
+ }
268
+ }
269
+ return false , nil
270
+ }); err != nil {
271
+ framework .Failf ("No EndpointSlice found for Service %s/%s: %s" , blueGreenJig .Namespace , blueGreenJig .Name , err )
272
+ }
273
+
274
+ // We have checked the endpoint slices reflect the desired state:
275
+ // bluePod not ready and greenPod ready, but we need to remember kubernetes
276
+ // is a distributed system eventually consistent, so there is a propagation
277
+ // delay until this information is present on the nodes and a programming delay
278
+ // until the corresponding node components program the information on the dataplane.
279
+ err = wait .PollUntilContextTimeout (ctx , 3 * time .Second , 30 * time .Second , true , func (ctx context.Context ) (done bool , err error ) {
280
+ cmd := fmt .Sprintf (`curl -q -s --connect-timeout 5 %s/hostname` , scvAddress )
281
+ stdout , err := e2eoutput .RunHostCmd (clientPod .Namespace , clientPod .Name , cmd )
282
+ if err != nil {
283
+ framework .Logf ("expected error when trying to connect to cluster IP : %v" , err )
284
+ return false , nil
285
+ }
286
+ if strings .TrimSpace (stdout ) == "" {
287
+ framework .Logf ("got empty stdout, retry until timeout" )
288
+ return false , nil
289
+ }
290
+ // Ensure we're comparing hostnames and not FQDNs
291
+ targetHostname := strings .Split (greenPod .Name , "." )[0 ]
292
+ hostname := strings .TrimSpace (strings .Split (stdout , "." )[0 ])
293
+ if hostname != targetHostname {
294
+ framework .Logf ("expecting hostname %s got %s" , targetHostname , hostname )
295
+ return false , nil
296
+ }
297
+ return true , nil
298
+ })
299
+ if err != nil {
300
+ framework .Failf ("can not connect to pod %s on address %s : %v" , greenPod .Name , scvAddress , err )
301
+ }
302
+
303
+ ginkgo .By ("Try to connect to the green pod through the service" )
304
+ // assert 5 times that we can connect only to the green pod
305
+ for i := 0 ; i < 5 ; i ++ {
306
+ err := wait .PollUntilContextTimeout (ctx , 3 * time .Second , 30 * time .Second , true , func (ctx context.Context ) (done bool , err error ) {
307
+ cmd := fmt .Sprintf (`curl -q -s --connect-timeout 5 %s/hostname` , scvAddress )
308
+ stdout , err := e2eoutput .RunHostCmd (clientPod .Namespace , clientPod .Name , cmd )
309
+ if err != nil {
310
+ framework .Logf ("expected error when trying to connect to cluster IP : %v" , err )
311
+ return false , nil
312
+ }
313
+ if strings .TrimSpace (stdout ) == "" {
314
+ framework .Logf ("got empty stdout, retry until timeout" )
315
+ return false , nil
316
+ }
317
+ // Ensure we're comparing hostnames and not FQDNs
318
+ targetHostname := strings .Split (greenPod .Name , "." )[0 ]
319
+ hostname := strings .TrimSpace (strings .Split (stdout , "." )[0 ])
320
+ // At this point we should only receive traffic from the green Pod.
321
+ if hostname != targetHostname {
322
+ return false , fmt .Errorf ("expecting hostname %s got %s" , targetHostname , hostname )
323
+ }
324
+ return true , nil
325
+ })
326
+ if err != nil {
327
+ framework .Failf ("can not connect to pod %s on address %s : %v" , greenPod .Name , scvAddress , err )
328
+ }
329
+ }
330
+
331
+ // TODO there can be multiple combinations like:
332
+ // test zero downtime deleting the blue pod instead setting the readiness to false
333
+ // test roll back setting back the readiness to true on the blue pod
334
+ // ...
335
+
336
+ })
337
+
101
338
})
0 commit comments