Skip to content

Commit 6449707

Browse files
authored
Merge pull request #1460 from himanshug/leader_shutdown
Leader to give up leadership lock on graceful shutdown
2 parents 631f52c + a70bb73 commit 6449707

File tree

7 files changed

+268
-20
lines changed

7 files changed

+268
-20
lines changed

extended/src/main/java/io/kubernetes/client/extended/leaderelection/LeaderElector.java

Lines changed: 89 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -262,21 +262,46 @@ private boolean tryAcquireOrRenew() {
262262
return false;
263263
}
264264

265+
if (log.isDebugEnabled()) {
266+
log.debug("Lock not found, try to create it");
267+
}
268+
269+
// No Lock resource exists, try to get leadership by creating it
265270
return createLock(lock, leaderElectionRecord);
266271
}
267272

273+
// alright, we have an existing lock resource
274+
// 1. Is Lock Empty? --> try to get leadership by updating it
275+
// 2. Am I the Leader? --> update info and renew lease by updating it
276+
// 3. I am not the Leader?
277+
// 3.1 is Lock expired? --> try to get leadership by updating it
278+
// 3.2 Lock not expired? --> update info, try later
279+
268280
if (oldLeaderElectionRecord == null
269281
|| oldLeaderElectionRecord.getAcquireTime() == null
270282
|| oldLeaderElectionRecord.getRenewTime() == null
271283
|| oldLeaderElectionRecord.getHolderIdentity() == null) {
272-
return createLock(lock, leaderElectionRecord);
284+
// We found the lock resource with an empty LeaderElectionRecord, try to get leadership by
285+
// updating it
286+
if (log.isDebugEnabled()) {
287+
log.debug("Update lock to get lease");
288+
}
289+
290+
if (oldLeaderElectionRecord != null) {
291+
// maintain the leaderTransitions
292+
leaderElectionRecord.setLeaderTransitions(
293+
oldLeaderElectionRecord.getLeaderTransitions() + 1);
294+
}
295+
296+
return updateLock(lock, leaderElectionRecord);
273297
}
274298

275-
// 2. Record obtained, check the Identity & Time
299+
// 2. Record obtained with LeaderElectionRecord, check the Identity & Time
276300
if (!oldLeaderElectionRecord.equals(this.observedRecord)) {
277301
this.observedRecord = oldLeaderElectionRecord;
278302
this.observedTimeMilliSeconds = System.currentTimeMillis();
279303
}
304+
280305
if (observedTimeMilliSeconds + config.getLeaseDuration().toMillis() > now.getTime()
281306
&& !isLeader()) {
282307
if (log.isDebugEnabled()) {
@@ -296,26 +321,20 @@ private boolean tryAcquireOrRenew() {
296321
leaderElectionRecord.setLeaderTransitions(oldLeaderElectionRecord.getLeaderTransitions() + 1);
297322
}
298323

299-
// update the lock itself
300324
if (log.isDebugEnabled()) {
301-
log.debug("Update lock acquire time to keep lease");
325+
log.debug("Update lock to renew lease");
302326
}
303-
boolean updateSuccess = config.getLock().update(leaderElectionRecord);
304-
if (!updateSuccess) {
305-
return false;
306-
}
307-
this.observedRecord = leaderElectionRecord;
308-
this.observedTimeMilliSeconds = System.currentTimeMillis();
309-
if (log.isDebugEnabled()) {
327+
328+
boolean renewalStatus = updateLock(lock, leaderElectionRecord);
329+
330+
if (renewalStatus && log.isDebugEnabled()) {
310331
log.debug("TryAcquireOrRenew return success");
311332
}
312-
return true;
333+
334+
return renewalStatus;
313335
}
314336

315337
private boolean createLock(Lock lock, LeaderElectionRecord leaderElectionRecord) {
316-
if (log.isDebugEnabled()) {
317-
log.debug("Lock not found, try to create it");
318-
}
319338
boolean createSuccess = lock.create(leaderElectionRecord);
320339
if (!createSuccess) {
321340
return false;
@@ -325,6 +344,16 @@ private boolean createLock(Lock lock, LeaderElectionRecord leaderElectionRecord)
325344
return true;
326345
}
327346

347+
private boolean updateLock(Lock lock, LeaderElectionRecord leaderElectionRecord) {
348+
boolean updateSuccess = lock.update(leaderElectionRecord);
349+
if (!updateSuccess) {
350+
return false;
351+
}
352+
this.observedRecord = leaderElectionRecord;
353+
this.observedTimeMilliSeconds = System.currentTimeMillis();
354+
return true;
355+
}
356+
328357
private boolean isLeader() {
329358
return this.config.getLock().identity().equals(this.observedRecord.getHolderIdentity());
330359
}
@@ -345,8 +374,53 @@ private void maybeReportTransition() {
345374

346375
@Override
347376
public void close() {
377+
log.info("Closing...");
348378
scheduledWorkers.shutdownNow();
349379
leaseWorkers.shutdownNow();
350380
hookWorkers.shutdownNow();
381+
382+
// If I am the leader, free the lock so that other candidates can take it immediately
383+
if (observedRecord != null && isLeader()) {
384+
385+
// First ensure that all executors have stopped
386+
try {
387+
boolean isTerminated =
388+
scheduledWorkers.awaitTermination(
389+
config.getRetryPeriod().getSeconds(), TimeUnit.SECONDS);
390+
if (!isTerminated) {
391+
log.warn("scheduledWorkers executor termination didn't finish.");
392+
return;
393+
}
394+
395+
isTerminated =
396+
leaseWorkers.awaitTermination(config.getRetryPeriod().getSeconds(), TimeUnit.SECONDS);
397+
if (!isTerminated) {
398+
log.warn("leaseWorkers executor termination didn't finish.");
399+
return;
400+
}
401+
402+
isTerminated =
403+
hookWorkers.awaitTermination(config.getRetryPeriod().getSeconds(), TimeUnit.SECONDS);
404+
if (!isTerminated) {
405+
log.warn("hookWorkers executor termination didn't finish.");
406+
return;
407+
}
408+
} catch (InterruptedException ex) {
409+
log.warn("Failed to ensure executors termination.", ex);
410+
return;
411+
}
412+
413+
log.info("Giving up the lock....");
414+
LeaderElectionRecord emptyRecord = new LeaderElectionRecord();
415+
// maintain leaderTransitions count
416+
emptyRecord.setLeaderTransitions(observedRecord.getLeaderTransitions());
417+
boolean status = this.config.getLock().update(emptyRecord);
418+
419+
if (!status) {
420+
log.warn("Failed to give up the lock.");
421+
}
422+
}
423+
424+
log.info("Closed");
351425
}
352426
}

extended/src/main/java/io/kubernetes/client/extended/leaderelection/resourcelock/ConfigMapLock.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ public ConfigMapLock(String namespace, String name, String identity, ApiClient a
5959
@Override
6060
public LeaderElectionRecord get() throws ApiException {
6161
V1ConfigMap configMap = coreV1Client.readNamespacedConfigMap(name, namespace, null, null, null);
62+
configMapRefer.set(configMap);
63+
6264
Map<String, String> annotations = configMap.getMetadata().getAnnotations();
6365
if (annotations == null || annotations.isEmpty()) {
6466
configMap.getMetadata().setAnnotations(new HashMap<>());
@@ -74,7 +76,7 @@ public LeaderElectionRecord get() throws ApiException {
7476
.getApiClient()
7577
.getJSON()
7678
.deserialize(recordRawStringContent, LeaderElectionRecord.class);
77-
configMapRefer.set(configMap);
79+
7880
return record;
7981
}
8082

extended/src/main/java/io/kubernetes/client/extended/leaderelection/resourcelock/EndpointsLock.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ public EndpointsLock(String namespace, String name, String identity, ApiClient a
5959
@Override
6060
public LeaderElectionRecord get() throws ApiException {
6161
V1Endpoints endpoints = coreV1Client.readNamespacedEndpoints(name, namespace, null, null, null);
62+
endpointsRefer.set(endpoints);
63+
6264
Map<String, String> annotations = endpoints.getMetadata().getAnnotations();
6365
if (annotations == null || annotations.isEmpty()) {
6466
endpoints.getMetadata().setAnnotations(new HashMap<>());
@@ -74,7 +76,6 @@ public LeaderElectionRecord get() throws ApiException {
7476
.getApiClient()
7577
.getJSON()
7678
.deserialize(recordRawStringContent, LeaderElectionRecord.class);
77-
endpointsRefer.set(endpoints);
7879
return record;
7980
}
8081

extended/src/test/java/io/kubernetes/client/extended/controller/LeaderElectingControllerTest.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import io.kubernetes.client.extended.leaderelection.LeaderElector;
2626
import io.kubernetes.client.extended.leaderelection.Lock;
2727
import io.kubernetes.client.openapi.ApiException;
28+
import java.net.HttpURLConnection;
2829
import java.time.Duration;
2930
import java.util.concurrent.atomic.AtomicReference;
3031
import org.junit.Test;
@@ -56,8 +57,10 @@ public void testLeaderElectingController() throws ApiException {
5657
record.set(new LeaderElectionRecord());
5758

5859
when(mockLock.identity()).thenReturn("foo");
59-
60-
doAnswer(invocationOnMock -> record.get()).when(mockLock).get();
60+
when(mockLock.get())
61+
.thenThrow(
62+
new ApiException("Record Not Found", HttpURLConnection.HTTP_NOT_FOUND, null, null))
63+
.thenReturn(record.get());
6164

6265
doAnswer(
6366
invocationOnMock -> {

extended/src/test/java/io/kubernetes/client/extended/leaderelection/LeaderElectionTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import static org.mockito.Mockito.*;
1818

1919
import io.kubernetes.client.openapi.ApiException;
20+
import java.net.HttpURLConnection;
2021
import java.time.Duration;
2122
import java.util.ArrayList;
2223
import java.util.Date;
@@ -412,9 +413,12 @@ public MockResourceLock(String iden) {
412413
}
413414

414415
@Override
415-
public LeaderElectionRecord get() {
416+
public LeaderElectionRecord get() throws ApiException {
416417
lock.lock();
417418
try {
419+
if (leaderRecord == null) {
420+
throw new ApiException("Record Not Found", HttpURLConnection.HTTP_NOT_FOUND, null, null);
421+
}
418422
return leaderRecord;
419423
} finally {
420424
lock.unlock();
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors.
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
Unless required by applicable law or agreed to in writing, software
8+
distributed under the License is distributed on an "AS IS" BASIS,
9+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
See the License for the specific language governing permissions and
11+
limitations under the License.
12+
*/
13+
package io.kubernetes.client.extended.leaderelection;
14+
15+
import java.time.Duration;
16+
import java.util.concurrent.CountDownLatch;
17+
import java.util.concurrent.TimeUnit;
18+
import org.junit.Test;
19+
20+
/** Leader Election tests using "simulated" locks created by {@link LockSmith} */
21+
public class LeaderElectorTest {
22+
/**
23+
* Tests that when a leader candidate is stopped gracefully, second candidate immediately becomes
24+
* leader.
25+
*/
26+
@Test(timeout = 20000L)
27+
public void testLeaderGracefulShutdown() throws Exception {
28+
LockSmith lockSmith = new LockSmith();
29+
30+
CountDownLatch startBeingLeader1 = new CountDownLatch(1);
31+
CountDownLatch stopBeingLeader1 = new CountDownLatch(1);
32+
33+
LeaderElector leaderElector1 =
34+
makeAndRunLeaderElectorAsync(lockSmith, "candidate1", startBeingLeader1, stopBeingLeader1);
35+
36+
// wait for candidate1 to become leader
37+
startBeingLeader1.await();
38+
39+
CountDownLatch startBeingLeader2 = new CountDownLatch(1);
40+
CountDownLatch stopBeingLeader2 = new CountDownLatch(1);
41+
42+
LeaderElector leaderElector2 =
43+
makeAndRunLeaderElectorAsync(lockSmith, "candidate2", startBeingLeader2, stopBeingLeader2);
44+
45+
leaderElector1.close();
46+
47+
// ensure stopBeingLeader hook is called
48+
stopBeingLeader1.await();
49+
50+
// wait for candidate2 to become leader
51+
startBeingLeader2.await();
52+
53+
leaderElector2.close();
54+
}
55+
56+
private LeaderElector makeAndRunLeaderElectorAsync(
57+
LockSmith lockSmith,
58+
String lockIdentity,
59+
CountDownLatch startBeingLeader,
60+
CountDownLatch stopBeingLeader) {
61+
LeaderElectionConfig leaderElectionConfig =
62+
new LeaderElectionConfig(
63+
lockSmith.makeLock(lockIdentity),
64+
Duration.ofMillis(TimeUnit.MINUTES.toMillis(1)),
65+
Duration.ofMillis(TimeUnit.SECONDS.toMillis(51)),
66+
Duration.ofMillis(TimeUnit.SECONDS.toMillis(3)));
67+
LeaderElector leaderElector = new LeaderElector(leaderElectionConfig);
68+
69+
Thread thread =
70+
new Thread(
71+
() ->
72+
leaderElector.run(
73+
() -> startBeingLeader.countDown(), () -> stopBeingLeader.countDown()),
74+
String.format("%s-leader-elector-main", lockIdentity));
75+
thread.setDaemon(true);
76+
thread.start();
77+
78+
return leaderElector;
79+
}
80+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors.
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
Unless required by applicable law or agreed to in writing, software
8+
distributed under the License is distributed on an "AS IS" BASIS,
9+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
See the License for the specific language governing permissions and
11+
limitations under the License.
12+
*/
13+
package io.kubernetes.client.extended.leaderelection;
14+
15+
import io.kubernetes.client.openapi.ApiException;
16+
import java.net.HttpURLConnection;
17+
import java.util.concurrent.atomic.AtomicReference;
18+
19+
/** Makes simulated {@link Lock} objects that behave as if they were backed by real API server. */
20+
public class LockSmith {
21+
private AtomicReference<Resource> lockResourceRef = new AtomicReference<>();
22+
23+
public Lock makeLock(String identity) {
24+
return new SimulatedLock(identity);
25+
}
26+
27+
private class SimulatedLock implements Lock {
28+
private final String identity;
29+
30+
public SimulatedLock(String identity) {
31+
this.identity = identity;
32+
}
33+
34+
@Override
35+
public LeaderElectionRecord get() throws ApiException {
36+
if (lockResourceRef.get() == null) {
37+
throw new ApiException("Record Not Found", HttpURLConnection.HTTP_NOT_FOUND, null, null);
38+
}
39+
40+
return lockResourceRef.get().record;
41+
}
42+
43+
@Override
44+
public boolean create(LeaderElectionRecord record) {
45+
return lockResourceRef.compareAndSet(null, new Resource(record));
46+
}
47+
48+
@Override
49+
public boolean update(LeaderElectionRecord record) {
50+
Resource res = lockResourceRef.get();
51+
if (res == null) {
52+
return false;
53+
} else {
54+
Resource newResource = new Resource(res.version + 1, record);
55+
return lockResourceRef.compareAndSet(res, newResource);
56+
}
57+
}
58+
59+
@Override
60+
public String identity() {
61+
return identity;
62+
}
63+
64+
@Override
65+
public String describe() {
66+
return "simulated/lock";
67+
}
68+
}
69+
70+
private static class Resource {
71+
final int version;
72+
final LeaderElectionRecord record;
73+
74+
public Resource(LeaderElectionRecord record) {
75+
this.version = 0;
76+
this.record = record;
77+
}
78+
79+
public Resource(int version, LeaderElectionRecord record) {
80+
this.version = version;
81+
this.record = record;
82+
}
83+
}
84+
}

0 commit comments

Comments
 (0)