Skip to content

Commit 70b4502

Browse files
authored
Merge pull request #530 from mhrivnak/leaderelection
Added leader election.
2 parents 0057336 + 67a2606 commit 70b4502

File tree

2 files changed

+233
-0
lines changed

2 files changed

+233
-0
lines changed

pkg/leader/doc.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright 2018 The Operator-SDK Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
/*
16+
Package leader implements Leader For Life, a simple alternative to lease-based
17+
leader election.
18+
19+
Both the Leader For Life and lease-based approaches to leader election are
20+
built on the concept that each candidate will attempt to create a resource with
21+
the same GVK, namespace, and name. Whichever candidate succeeds becomes the
22+
leader. The rest receive "already exists" errors and wait for a new
23+
opportunity.
24+
25+
Leases provide a way to indirectly observe whether the leader still exists. The
26+
leader must periodically renew its lease, usually by updating a timestamp in
27+
its lock record. If it fails to do so, it is presumed dead, and a new election
28+
takes place. If the leader is in fact still alive but unreachable, it is
29+
expected to gracefully step down. A variety of factors can cause a leader to
30+
fail at updating its lease, but continue acting as the leader before succeeding
31+
at stepping down.
32+
33+
In the "leader for life" approach, a specific Pod is the leader. Once
34+
established (by creating a lock record), the Pod is the leader until it is
35+
destroyed. There is no possibility for multiple pods to think they are the
36+
leader at the same time. The leader does not need to renew a lease, consider
37+
stepping down, or do anything related to election activity once it becomes the
38+
leader.
39+
40+
The lock record in this case is a ConfigMap whose OwnerReference is set to the
41+
Pod that is the leader. When the leader is destroyed, the ConfigMap gets
42+
garbage-collected, enabling a different candidate Pod to become the leader.
43+
44+
Leader for Life requires that all candidate Pods be in the same Namespace.
45+
*/
46+
package leader

pkg/leader/leader.go

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
// Copyright 2018 The Operator-SDK Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package leader
16+
17+
import (
18+
"context"
19+
"errors"
20+
"io/ioutil"
21+
"os"
22+
"strings"
23+
"time"
24+
25+
"github.com/sirupsen/logrus"
26+
corev1 "k8s.io/api/core/v1"
27+
apierrors "k8s.io/apimachinery/pkg/api/errors"
28+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29+
"k8s.io/apimachinery/pkg/util/wait"
30+
"k8s.io/client-go/rest"
31+
crclient "sigs.k8s.io/controller-runtime/pkg/client"
32+
)
33+
34+
// errNoNS indicates that a namespace could not be found for the current
35+
// environment
36+
var errNoNS = errors.New("namespace not found for current environment")
37+
38+
// maxBackoffInterval defines the maximum amount of time to wait between
39+
// attempts to become the leader.
40+
const maxBackoffInterval = time.Second * 16
41+
42+
// Become ensures that the current pod is the leader within its namespace. If
43+
// run outside a cluster, it will skip leader election and return nil. It
44+
// continuously tries to create a ConfigMap with the provided name and the
45+
// current pod set as the owner reference. Only one can exist at a time with
46+
// the same name, so the pod that successfully creates the ConfigMap is the
47+
// leader. Upon termination of that pod, the garbage collector will delete the
48+
// ConfigMap, enabling a different pod to become the leader.
49+
func Become(ctx context.Context, lockName string) error {
50+
logrus.Info("trying to become the leader")
51+
52+
ns, err := myNS()
53+
if err != nil {
54+
if err == errNoNS {
55+
logrus.Info("Skipping leader election; not running in a cluster")
56+
return nil
57+
}
58+
return err
59+
}
60+
61+
config, err := rest.InClusterConfig()
62+
if err != nil {
63+
return err
64+
}
65+
66+
client, err := crclient.New(config, crclient.Options{})
67+
if err != nil {
68+
return err
69+
}
70+
71+
owner, err := myOwnerRef(ctx, client, ns)
72+
if err != nil {
73+
return err
74+
}
75+
76+
// check for existing lock from this pod, in case we got restarted
77+
existing := &corev1.ConfigMap{
78+
TypeMeta: metav1.TypeMeta{
79+
APIVersion: "v1",
80+
Kind: "ConfigMap",
81+
},
82+
}
83+
key := crclient.ObjectKey{Namespace: ns, Name: lockName}
84+
err = client.Get(ctx, key, existing)
85+
86+
switch {
87+
case err == nil:
88+
for _, existingOwner := range existing.GetOwnerReferences() {
89+
if existingOwner.Name == owner.Name {
90+
logrus.Info("Found existing lock with my name. I was likely restarted.")
91+
logrus.Info("Continuing as the leader.")
92+
return nil
93+
} else {
94+
logrus.Infof("Found existing lock from %s", existingOwner.Name)
95+
}
96+
}
97+
case apierrors.IsNotFound(err):
98+
logrus.Info("No pre-existing lock was found.")
99+
default:
100+
logrus.Error("unknown error trying to get ConfigMap")
101+
return err
102+
}
103+
104+
cm := &corev1.ConfigMap{
105+
TypeMeta: metav1.TypeMeta{
106+
APIVersion: "v1",
107+
Kind: "ConfigMap",
108+
},
109+
ObjectMeta: metav1.ObjectMeta{
110+
Name: lockName,
111+
Namespace: ns,
112+
OwnerReferences: []metav1.OwnerReference{*owner},
113+
},
114+
}
115+
116+
// try to create a lock
117+
backoff := time.Second
118+
for {
119+
err := client.Create(ctx, cm)
120+
switch {
121+
case err == nil:
122+
logrus.Info("Became the leader.")
123+
return nil
124+
case apierrors.IsAlreadyExists(err):
125+
logrus.Info("Not the leader. Waiting.")
126+
select {
127+
case <-time.After(wait.Jitter(backoff, .2)):
128+
if backoff < maxBackoffInterval {
129+
backoff *= 2
130+
}
131+
continue
132+
case <-ctx.Done():
133+
return ctx.Err()
134+
}
135+
default:
136+
logrus.Errorf("unknown error creating configmap: %v", err)
137+
return err
138+
}
139+
}
140+
}
141+
142+
// myNS returns the name of the namespace in which this code is currently running.
143+
func myNS() (string, error) {
144+
nsBytes, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
145+
if err != nil {
146+
if os.IsNotExist(err) {
147+
logrus.Debug("current namespace not found")
148+
return "", errNoNS
149+
}
150+
return "", err
151+
}
152+
ns := strings.TrimSpace(string(nsBytes))
153+
logrus.Debugf("found namespace: %s", ns)
154+
return ns, nil
155+
}
156+
157+
// myOwnerRef returns an OwnerReference that corresponds to the pod in which
158+
// this code is currently running.
159+
func myOwnerRef(ctx context.Context, client crclient.Client, ns string) (*metav1.OwnerReference, error) {
160+
hostname, err := os.Hostname()
161+
if err != nil {
162+
return nil, err
163+
}
164+
logrus.Debugf("found hostname: %s", hostname)
165+
166+
myPod := &corev1.Pod{
167+
TypeMeta: metav1.TypeMeta{
168+
APIVersion: "v1",
169+
Kind: "Pod",
170+
},
171+
}
172+
173+
key := crclient.ObjectKey{Namespace: ns, Name: hostname}
174+
err = client.Get(ctx, key, myPod)
175+
if err != nil {
176+
logrus.Errorf("failed to get pod: %v", err)
177+
return nil, err
178+
}
179+
180+
owner := &metav1.OwnerReference{
181+
APIVersion: "v1",
182+
Kind: "Pod",
183+
Name: myPod.ObjectMeta.Name,
184+
UID: myPod.ObjectMeta.UID,
185+
}
186+
return owner, nil
187+
}

0 commit comments

Comments
 (0)