Skip to content

Commit b117a92

Browse files
committed
kubeadm: prevent bootstrap of nodes with known names
If a Node name in the cluster is already taken and this Node is Ready, prevent TLS bootsrap on "kubeadm join" and exit early. This change requires that a new ClusterRole is granted to the "system:bootstrappers:kubeadm:default-node-token" group to be able get Nodes in the cluster. The same group already has access to obtain objects such as the KubeletConfiguration and kubeadm's ClusterConfiguration. The motivation of this change is to prevent undefined behavior and the potential control-plane breakdown if such a cluster is racing to have two nodes with the same name for long periods of time. The following values are validated in the following precedence from lower to higher: - actual hostname - NodeRegistration.Name (or "--node-name") from JoinConfiguration - "--hostname-override" passed via kubeletExtraArgs If the user decides to not let kubeadm know about a custom node name and to instead override the hostname from a kubelet systemd unit file, kubeadm will not be able to detect the problem.
1 parent 5441a99 commit b117a92

File tree

6 files changed

+116
-28
lines changed

6 files changed

+116
-28
lines changed

cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ func runBootstrapToken(c workflow.RunData) error {
8686
if err := nodebootstraptokenphase.UpdateOrCreateTokens(client, false, data.Cfg().BootstrapTokens); err != nil {
8787
return errors.Wrap(err, "error updating or creating token")
8888
}
89+
// Create RBAC rules that makes the bootstrap tokens able to get nodes
90+
if err := nodebootstraptokenphase.AllowBoostrapTokensToGetNodes(client); err != nil {
91+
return errors.Wrap(err, "error allowing bootstrap tokens to get Nodes")
92+
}
8993
// Create RBAC rules that makes the bootstrap tokens able to post CSRs
9094
if err := nodebootstraptokenphase.AllowBootstrapTokensToPostCSRs(client); err != nil {
9195
return errors.Wrap(err, "error allowing bootstrap tokens to post CSRs")

cmd/kubeadm/app/cmd/phases/join/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ go_library(
3030
"//cmd/kubeadm/app/preflight:go_default_library",
3131
"//cmd/kubeadm/app/util/apiclient:go_default_library",
3232
"//cmd/kubeadm/app/util/kubeconfig:go_default_library",
33+
"//staging/src/k8s.io/api/core/v1:go_default_library",
34+
"//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library",
35+
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
3336
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
3437
"//staging/src/k8s.io/apimachinery/pkg/util/version:go_default_library",
3538
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",

cmd/kubeadm/app/cmd/phases/join/kubelet.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ import (
2222

2323
"github.com/lithammer/dedent"
2424
"github.com/pkg/errors"
25+
v1 "k8s.io/api/core/v1"
26+
apierrors "k8s.io/apimachinery/pkg/api/errors"
27+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2528
"k8s.io/apimachinery/pkg/util/version"
2629
"k8s.io/apimachinery/pkg/util/wait"
2730
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
@@ -128,6 +131,28 @@ func runKubeletStartJoinPhase(c workflow.RunData) (returnErr error) {
128131
return errors.Errorf("couldn't create client from kubeconfig file %q", bootstrapKubeConfigFile)
129132
}
130133

134+
// Obtain the name of this Node.
135+
nodeName, _, err := kubeletphase.GetNodeNameAndHostname(&cfg.NodeRegistration)
136+
if err != nil {
137+
klog.Warning(err)
138+
}
139+
140+
// Make sure to exit before TLS bootstrap if a Node with the same name exist in the cluster
141+
// and it has the "Ready" status.
142+
// A new Node with the same name as an existing control-plane Node can cause undefined
143+
// behavior and ultimately control-plane failure.
144+
klog.V(1).Infof("[kubelet-start] Checking for an existing Node in the cluster with name %q and status %q", nodeName, v1.NodeReady)
145+
node, err := bootstrapClient.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
146+
if err != nil && !apierrors.IsNotFound(err) {
147+
return errors.Wrapf(err, "cannot get Node %q", nodeName)
148+
}
149+
for _, cond := range node.Status.Conditions {
150+
if cond.Type == v1.NodeReady {
151+
return errors.Errorf("a Node with name %q and status %q already exists in the cluster. "+
152+
"You must delete the existing Node or change the name of this new joining Node", nodeName, v1.NodeReady)
153+
}
154+
}
155+
131156
// Configure the kubelet. In this short timeframe, kubeadm is trying to stop/restart the kubelet
132157
// Try to stop the kubelet service so no race conditions occur when configuring it
133158
klog.V(1).Infoln("[kubelet-start] Stopping the kubelet")

cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ const (
3232
NodeBootstrapperClusterRoleName = "system:node-bootstrapper"
3333
// NodeKubeletBootstrap defines the name of the ClusterRoleBinding that lets kubelets post CSRs
3434
NodeKubeletBootstrap = "kubeadm:kubelet-bootstrap"
35+
// GetNodesClusterRoleName defines the name of the ClusterRole and ClusterRoleBinding to get nodes
36+
GetNodesClusterRoleName = "kubeadm:get-nodes"
3537

3638
// CSRAutoApprovalClusterRoleName defines the name of the auto-bootstrapped ClusterRole for making the csrapprover controller auto-approve the CSR
3739
// TODO: This value should be defined in an other, generic authz package instead of here
@@ -67,6 +69,45 @@ func AllowBootstrapTokensToPostCSRs(client clientset.Interface) error {
6769
})
6870
}
6971

72+
// AllowBoostrapTokensToGetNodes creates RBAC rules to allow Node Bootstrap Tokens to list nodes
73+
func AllowBoostrapTokensToGetNodes(client clientset.Interface) error {
74+
fmt.Println("[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to get nodes")
75+
76+
if err := apiclient.CreateOrUpdateClusterRole(client, &rbac.ClusterRole{
77+
ObjectMeta: metav1.ObjectMeta{
78+
Name: GetNodesClusterRoleName,
79+
Namespace: metav1.NamespaceSystem,
80+
},
81+
Rules: []rbac.PolicyRule{
82+
{
83+
Verbs: []string{"get"},
84+
APIGroups: []string{""},
85+
Resources: []string{"nodes"},
86+
},
87+
},
88+
}); err != nil {
89+
return err
90+
}
91+
92+
return apiclient.CreateOrUpdateClusterRoleBinding(client, &rbac.ClusterRoleBinding{
93+
ObjectMeta: metav1.ObjectMeta{
94+
Name: GetNodesClusterRoleName,
95+
Namespace: metav1.NamespaceSystem,
96+
},
97+
RoleRef: rbac.RoleRef{
98+
APIGroup: rbac.GroupName,
99+
Kind: "ClusterRole",
100+
Name: GetNodesClusterRoleName,
101+
},
102+
Subjects: []rbac.Subject{
103+
{
104+
Kind: rbac.GroupKind,
105+
Name: constants.NodeBootstrapTokenAuthGroup,
106+
},
107+
},
108+
})
109+
}
110+
70111
// AutoApproveNodeBootstrapTokens creates RBAC rules in a way that makes Node Bootstrap Tokens' CSR auto-approved by the csrapprover controller
71112
func AutoApproveNodeBootstrapTokens(client clientset.Interface) error {
72113
fmt.Println("[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token")

cmd/kubeadm/app/phases/kubelet/flags.go

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,29 @@ type kubeletFlagsOpts struct {
4141
registerTaintsUsingFlags bool
4242
execer utilsexec.Interface
4343
isServiceActiveFunc func(string) (bool, error)
44-
defaultHostname string
44+
}
45+
46+
// GetNodeNameAndHostname obtains the name for this Node using the following precedence
47+
// (from lower to higher):
48+
// - actual hostname
49+
// - NodeRegistrationOptions.Name (same as "--node-name" passed to "kubeadm init/join")
50+
// - "hostname-overide" flag in NodeRegistrationOptions.KubeletExtraArgs
51+
// It also returns the hostname or an error if getting the hostname failed.
52+
func GetNodeNameAndHostname(cfg *kubeadmapi.NodeRegistrationOptions) (string, string, error) {
53+
hostname, err := kubeadmutil.GetHostname("")
54+
nodeName := hostname
55+
if cfg.Name != "" {
56+
nodeName = cfg.Name
57+
}
58+
if name, ok := cfg.KubeletExtraArgs["hostname-override"]; ok {
59+
nodeName = name
60+
}
61+
return nodeName, hostname, err
4562
}
4663

4764
// WriteKubeletDynamicEnvFile writes an environment file with dynamic flags to the kubelet.
4865
// Used at "kubeadm init" and "kubeadm join" time.
4966
func WriteKubeletDynamicEnvFile(cfg *kubeadmapi.ClusterConfiguration, nodeReg *kubeadmapi.NodeRegistrationOptions, registerTaintsUsingFlags bool, kubeletDir string) error {
50-
hostName, err := kubeadmutil.GetHostname("")
51-
if err != nil {
52-
return err
53-
}
54-
5567
flagOpts := kubeletFlagsOpts{
5668
nodeRegOpts: nodeReg,
5769
featureGates: cfg.FeatureGates,
@@ -65,7 +77,6 @@ func WriteKubeletDynamicEnvFile(cfg *kubeadmapi.ClusterConfiguration, nodeReg *k
6577
}
6678
return initSystem.ServiceIsActive(name), nil
6779
},
68-
defaultHostname: hostName,
6980
}
7081
stringMap := buildKubeletArgMap(flagOpts)
7182
argList := kubeadmutil.BuildArgumentListFromMap(stringMap, nodeReg.KubeletExtraArgs)
@@ -113,15 +124,19 @@ func buildKubeletArgMap(opts kubeletFlagsOpts) map[string]string {
113124
kubeletFlags["resolv-conf"] = "/run/systemd/resolve/resolv.conf"
114125
}
115126

116-
// Make sure the node name we're passed will work with Kubelet
117-
if opts.nodeRegOpts.Name != "" && opts.nodeRegOpts.Name != opts.defaultHostname {
118-
klog.V(1).Infof("setting kubelet hostname-override to %q", opts.nodeRegOpts.Name)
119-
kubeletFlags["hostname-override"] = opts.nodeRegOpts.Name
127+
// Pass the "--hostname-override" flag to the kubelet only if it's different from the hostname
128+
nodeName, hostname, err := GetNodeNameAndHostname(opts.nodeRegOpts)
129+
if err != nil {
130+
klog.Warning(err)
131+
}
132+
if nodeName != hostname {
133+
klog.V(1).Infof("setting kubelet hostname-override to %q", nodeName)
134+
kubeletFlags["hostname-override"] = nodeName
120135
}
121136

122137
// TODO: Conditionally set `--cgroup-driver` to either `systemd` or `cgroupfs` for CRI other than Docker
123138

124-
// TODO: The following code should be remvoved after dual-stack is GA.
139+
// TODO: The following code should be removed after dual-stack is GA.
125140
// Note: The user still retains the ability to explicitly set feature-gates and that value will overwrite this base value.
126141
if enabled, present := opts.featureGates[features.IPv6DualStack]; present {
127142
kubeletFlags["feature-gates"] = fmt.Sprintf("%s=%t", features.IPv6DualStack, enabled)

cmd/kubeadm/app/phases/kubelet/flags_test.go

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ func TestBuildKubeletArgMap(t *testing.T) {
109109
opts: kubeletFlagsOpts{
110110
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
111111
CRISocket: "/var/run/dockershim.sock",
112-
Name: "foo",
113112
Taints: []v1.Taint{ // This should be ignored as registerTaintsUsingFlags is false
114113
{
115114
Key: "foo",
@@ -120,22 +119,35 @@ func TestBuildKubeletArgMap(t *testing.T) {
120119
},
121120
execer: errCgroupExecer,
122121
isServiceActiveFunc: serviceIsNotActiveFunc,
123-
defaultHostname: "foo",
124122
},
125123
expected: map[string]string{
126124
"network-plugin": "cni",
127125
},
128126
},
129127
{
130-
name: "nodeRegOpts.Name != default hostname",
128+
name: "hostname override from NodeRegistrationOptions.Name",
131129
opts: kubeletFlagsOpts{
132130
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
133131
CRISocket: "/var/run/dockershim.sock",
134132
Name: "override-name",
135133
},
136134
execer: errCgroupExecer,
137135
isServiceActiveFunc: serviceIsNotActiveFunc,
138-
defaultHostname: "default",
136+
},
137+
expected: map[string]string{
138+
"network-plugin": "cni",
139+
"hostname-override": "override-name",
140+
},
141+
},
142+
{
143+
name: "hostname override from NodeRegistrationOptions.KubeletExtraArgs",
144+
opts: kubeletFlagsOpts{
145+
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
146+
CRISocket: "/var/run/dockershim.sock",
147+
KubeletExtraArgs: map[string]string{"hostname-override": "override-name"},
148+
},
149+
execer: errCgroupExecer,
150+
isServiceActiveFunc: serviceIsNotActiveFunc,
139151
},
140152
expected: map[string]string{
141153
"network-plugin": "cni",
@@ -147,11 +159,9 @@ func TestBuildKubeletArgMap(t *testing.T) {
147159
opts: kubeletFlagsOpts{
148160
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
149161
CRISocket: "/var/run/dockershim.sock",
150-
Name: "foo",
151162
},
152163
execer: systemdCgroupExecer,
153164
isServiceActiveFunc: serviceIsNotActiveFunc,
154-
defaultHostname: "foo",
155165
},
156166
expected: map[string]string{
157167
"network-plugin": "cni",
@@ -163,11 +173,9 @@ func TestBuildKubeletArgMap(t *testing.T) {
163173
opts: kubeletFlagsOpts{
164174
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
165175
CRISocket: "/var/run/dockershim.sock",
166-
Name: "foo",
167176
},
168177
execer: cgroupfsCgroupExecer,
169178
isServiceActiveFunc: serviceIsNotActiveFunc,
170-
defaultHostname: "foo",
171179
},
172180
expected: map[string]string{
173181
"network-plugin": "cni",
@@ -179,11 +187,9 @@ func TestBuildKubeletArgMap(t *testing.T) {
179187
opts: kubeletFlagsOpts{
180188
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
181189
CRISocket: "/var/run/containerd.sock",
182-
Name: "foo",
183190
},
184191
execer: cgroupfsCgroupExecer,
185192
isServiceActiveFunc: serviceIsNotActiveFunc,
186-
defaultHostname: "foo",
187193
},
188194
expected: map[string]string{
189195
"container-runtime": "remote",
@@ -195,7 +201,6 @@ func TestBuildKubeletArgMap(t *testing.T) {
195201
opts: kubeletFlagsOpts{
196202
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
197203
CRISocket: "/var/run/containerd.sock",
198-
Name: "foo",
199204
Taints: []v1.Taint{
200205
{
201206
Key: "foo",
@@ -212,7 +217,6 @@ func TestBuildKubeletArgMap(t *testing.T) {
212217
registerTaintsUsingFlags: true,
213218
execer: cgroupfsCgroupExecer,
214219
isServiceActiveFunc: serviceIsNotActiveFunc,
215-
defaultHostname: "foo",
216220
},
217221
expected: map[string]string{
218222
"container-runtime": "remote",
@@ -225,11 +229,9 @@ func TestBuildKubeletArgMap(t *testing.T) {
225229
opts: kubeletFlagsOpts{
226230
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
227231
CRISocket: "/var/run/containerd.sock",
228-
Name: "foo",
229232
},
230233
execer: cgroupfsCgroupExecer,
231234
isServiceActiveFunc: serviceIsActiveFunc,
232-
defaultHostname: "foo",
233235
},
234236
expected: map[string]string{
235237
"container-runtime": "remote",
@@ -242,12 +244,10 @@ func TestBuildKubeletArgMap(t *testing.T) {
242244
opts: kubeletFlagsOpts{
243245
nodeRegOpts: &kubeadmapi.NodeRegistrationOptions{
244246
CRISocket: "/var/run/dockershim.sock",
245-
Name: "foo",
246247
},
247248
pauseImage: "gcr.io/pause:3.1",
248249
execer: cgroupfsCgroupExecer,
249250
isServiceActiveFunc: serviceIsNotActiveFunc,
250-
defaultHostname: "foo",
251251
},
252252
expected: map[string]string{
253253
"network-plugin": "cni",

0 commit comments

Comments
 (0)