Skip to content

Commit b361d94

Browse files
authored
Use the URL from join token to validate connectivity (#799)
Signed-off-by: Kimmo Lehto <[email protected]>
1 parent 4d134e5 commit b361d94

File tree

10 files changed

+157
-124
lines changed

10 files changed

+157
-124
lines changed

phase/initialize_k0s.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"strings"
7+
"time"
78

89
"github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1"
910
"github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster"
@@ -115,11 +116,22 @@ func (p *InitializeK0s) Run() error {
115116
return err
116117
}
117118

118-
log.Infof("%s: waiting for kubernetes api to respond", h)
119-
if err := retry.Timeout(context.TODO(), retry.DefaultTimeout, node.KubeAPIReadyFunc(h, p.Config)); err != nil {
119+
log.Infof("%s: wait for kubernetes to reach ready state", h)
120+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
121+
defer cancel()
122+
err := retry.Context(ctx, func(_ context.Context) error {
123+
out, err := h.ExecOutput(h.Configurer.KubectlCmdf(h, h.K0sDataDir(), "get --raw='/readyz'"), exec.Sudo(h))
124+
if out != "ok" {
125+
return fmt.Errorf("kubernetes api /readyz responded with %q", out)
126+
}
120127
return err
128+
})
129+
if err != nil {
130+
return fmt.Errorf("kubernetes not ready: %w", err)
121131
}
122132

133+
h.Metadata.Ready = true
134+
123135
return nil
124136
})
125137
if err != nil {

phase/install_controllers.go

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ func (p *InstallControllers) Prepare(config *v1beta1.Cluster) error {
3333
p.hosts = p.Config.Spec.Hosts.Controllers().Filter(func(h *cluster.Host) bool {
3434
return !h.Reset && !h.Metadata.NeedsUpgrade && (h != p.leader && h.Metadata.K0sRunningVersion == nil)
3535
})
36-
3736
return nil
3837
}
3938

@@ -65,13 +64,13 @@ func (p *InstallControllers) CleanUp() {
6564

6665
func (p *InstallControllers) After() error {
6766
for i, h := range p.hosts {
68-
if h.Metadata.K0sJoinTokenID == "" {
67+
if h.Metadata.K0sTokenData.Token == "" {
6968
continue
7069
}
71-
h.Metadata.K0sJoinToken = ""
70+
h.Metadata.K0sTokenData.Token = ""
7271
err := p.Wet(p.leader, fmt.Sprintf("invalidate k0s join token for controller %s", h), func() error {
7372
log.Debugf("%s: invalidating join token for controller %d", p.leader, i+1)
74-
return p.leader.Exec(p.leader.Configurer.K0sCmdf("token invalidate --data-dir=%s %s", p.leader.K0sDataDir(), h.Metadata.K0sJoinTokenID), exec.Sudo(p.leader))
73+
return p.leader.Exec(p.leader.Configurer.K0sCmdf("token invalidate --data-dir=%s %s", p.leader.K0sDataDir(), h.Metadata.K0sTokenData.ID), exec.Sudo(p.leader))
7574
})
7675
if err != nil {
7776
log.Warnf("%s: failed to invalidate worker join token: %v", p.leader, err)
@@ -88,54 +87,51 @@ func (p *InstallControllers) After() error {
8887

8988
// Run the phase
9089
func (p *InstallControllers) Run() error {
91-
url := p.Config.Spec.InternalKubeAPIURL()
92-
healthz := fmt.Sprintf("%s/healthz", url)
93-
94-
err := p.parallelDo(p.hosts, func(h *cluster.Host) error {
95-
if p.IsWet() || !p.leader.Metadata.DryRunFakeLeader {
96-
log.Infof("%s: validating api connection to %s", h, url)
97-
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
98-
defer cancel()
99-
if err := retry.Context(ctx, node.HTTPStatusFunc(h, healthz, 200, 401)); err != nil {
100-
return fmt.Errorf("failed to connect from controller to kubernetes api at %s - check networking", url)
101-
}
102-
} else {
103-
log.Warnf("%s: dry-run: skipping api connection validation to %s because cluster is not running", h, url)
104-
}
105-
return nil
106-
})
107-
if err != nil {
108-
return err
109-
}
110-
11190
for _, h := range p.hosts {
112-
var token string
113-
var tokenID string
114-
11591
if p.IsWet() {
116-
log.Infof("%s: generating token", p.leader)
117-
token, err = p.Config.Spec.K0s.GenerateToken(
92+
log.Infof("%s: generate join token for %s", p.leader, h)
93+
token, err := p.Config.Spec.K0s.GenerateToken(
11894
p.leader,
11995
"controller",
12096
time.Duration(10)*time.Minute,
12197
)
12298
if err != nil {
12399
return err
124100
}
125-
h.Metadata.K0sJoinToken = token
126-
tokenID, err = cluster.TokenID(token)
101+
tokenData, err := cluster.ParseToken(token)
127102
if err != nil {
128103
return err
129104
}
130-
log.Debugf("%s: join token ID: %s", p.leader, tokenID)
131-
h.Metadata.K0sJoinTokenID = tokenID
105+
h.Metadata.K0sTokenData = tokenData
132106
} else {
133107
p.DryMsgf(p.leader, "generate a k0s join token for controller %s", h)
134-
h.Metadata.K0sJoinTokenID = "dry-run"
108+
h.Metadata.K0sTokenData.ID = "dry-run"
109+
h.Metadata.K0sTokenData.URL = p.Config.Spec.KubeAPIURL()
135110
}
136-
137-
log.Infof("%s: writing join token", h)
138-
if err := h.Configurer.WriteFile(h, h.K0sJoinTokenPath(), h.Metadata.K0sJoinToken, "0640"); err != nil {
111+
}
112+
err := p.parallelDo(p.hosts, func(h *cluster.Host) error {
113+
if p.IsWet() || !p.leader.Metadata.DryRunFakeLeader {
114+
log.Infof("%s: validating api connection to %s", h, h.Metadata.K0sTokenData.URL)
115+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
116+
defer cancel()
117+
if err := retry.Context(ctx, node.HTTPStatusFunc(h, h.Metadata.K0sTokenData.URL, 200, 401, 404)); err != nil {
118+
return fmt.Errorf("failed to connect from controller to kubernetes api - check networking: %w", err)
119+
}
120+
} else {
121+
log.Warnf("%s: dry-run: skipping api connection validation to because cluster is not actually running", h)
122+
}
123+
return nil
124+
})
125+
if err != nil {
126+
return err
127+
}
128+
return p.parallelDo(p.hosts, func(h *cluster.Host) error {
129+
tokenPath := h.K0sJoinTokenPath()
130+
log.Infof("%s: writing join token to %s", h, tokenPath)
131+
err := p.Wet(h, fmt.Sprintf("write k0s join token to %s", tokenPath), func() error {
132+
return h.Configurer.WriteFile(h, tokenPath, h.Metadata.K0sTokenData.Token, "0600")
133+
})
134+
if err != nil {
139135
return err
140136
}
141137

@@ -180,17 +176,22 @@ func (p *InstallControllers) Run() error {
180176
return err
181177
}
182178

183-
if err := p.waitJoined(h); err != nil {
184-
return err
179+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
180+
defer cancel()
181+
err := retry.Context(ctx, func(_ context.Context) error {
182+
out, err := h.ExecOutput(h.Configurer.KubectlCmdf(h, h.K0sDataDir(), "get --raw='/readyz?verbose=true'"), exec.Sudo(h))
183+
if err != nil {
184+
return fmt.Errorf("readiness endpoint reports %q: %w", out, err)
185+
}
186+
return nil
187+
})
188+
if err != nil {
189+
return fmt.Errorf("controller did not reach ready state: %w", err)
185190
}
186-
}
187-
h.Metadata.Ready = true
188-
}
189191

190-
return nil
191-
}
192+
h.Metadata.Ready = true
193+
}
192194

193-
func (p *InstallControllers) waitJoined(h *cluster.Host) error {
194-
log.Infof("%s: waiting for kubernetes api to respond", h)
195-
return retry.Timeout(context.TODO(), retry.DefaultTimeout, node.KubeAPIReadyFunc(h, p.Config))
195+
return nil
196+
})
196197
}

phase/install_workers.go

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -66,27 +66,26 @@ func (p *InstallWorkers) CleanUp() {
6666
func (p *InstallWorkers) After() error {
6767
if NoWait {
6868
for _, h := range p.hosts {
69-
if h.Metadata.K0sJoinToken != "" {
69+
if h.Metadata.K0sTokenData.Token != "" {
7070
log.Warnf("%s: --no-wait given, created join tokens will remain valid for 10 minutes", p.leader)
7171
break
7272
}
7373
}
7474
return nil
7575
}
7676
for i, h := range p.hosts {
77-
if h.Metadata.K0sJoinTokenID == "" {
77+
h.Metadata.K0sTokenData.Token = ""
78+
if h.Metadata.K0sTokenData.ID == "" {
7879
continue
7980
}
80-
h.Metadata.K0sJoinToken = ""
8181
err := p.Wet(p.leader, fmt.Sprintf("invalidate k0s join token for worker %s", h), func() error {
8282
log.Debugf("%s: invalidating join token for worker %d", p.leader, i+1)
83-
return p.leader.Exec(p.leader.Configurer.K0sCmdf("token invalidate --data-dir=%s %s", p.leader.K0sDataDir(), h.Metadata.K0sJoinTokenID), exec.Sudo(p.leader))
83+
return p.leader.Exec(p.leader.Configurer.K0sCmdf("token invalidate --data-dir=%s %s", p.leader.K0sDataDir(), h.Metadata.K0sTokenData.ID), exec.Sudo(p.leader))
8484
})
8585
if err != nil {
8686
log.Warnf("%s: failed to invalidate worker join token: %v", p.leader, err)
8787
}
8888
_ = p.Wet(h, "overwrite k0s join token file", func() error {
89-
9089
if err := h.Configurer.WriteFile(h, h.K0sJoinTokenPath(), "# overwritten by k0sctl after join\n", "0600"); err != nil {
9190
log.Warnf("%s: failed to overwrite the join token file at %s", h, h.K0sJoinTokenPath())
9291
}
@@ -98,30 +97,9 @@ func (p *InstallWorkers) After() error {
9897

9998
// Run the phase
10099
func (p *InstallWorkers) Run() error {
101-
url := p.Config.Spec.InternalKubeAPIURL()
102-
healthz := fmt.Sprintf("%s/healthz", url)
103-
104-
err := p.parallelDo(p.hosts, func(h *cluster.Host) error {
105-
if p.IsWet() || !p.leader.Metadata.DryRunFakeLeader {
106-
log.Infof("%s: validating api connection to %s", h, url)
107-
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
108-
defer cancel()
109-
if err := retry.Context(ctx, node.HTTPStatusFunc(h, healthz, 200, 401)); err != nil {
110-
return fmt.Errorf("failed to connect from worker to kubernetes api at %s - check networking", url)
111-
}
112-
} else {
113-
log.Warnf("%s: dry-run: skipping api connection validation to %s because cluster is not running", h, url)
114-
}
115-
return nil
116-
})
117-
118-
if err != nil {
119-
return err
120-
}
121-
122100
for i, h := range p.hosts {
123101
log.Infof("%s: generating a join token for worker %d", p.leader, i+1)
124-
err = p.Wet(p.leader, fmt.Sprintf("generate a k0s join token for worker %s", h), func() error {
102+
err := p.Wet(p.leader, fmt.Sprintf("generate a k0s join token for worker %s", h), func() error {
125103
t, err := p.Config.Spec.K0s.GenerateToken(
126104
p.leader,
127105
"worker",
@@ -130,29 +108,54 @@ func (p *InstallWorkers) Run() error {
130108
if err != nil {
131109
return err
132110
}
133-
h.Metadata.K0sJoinToken = t
134111

135-
ti, err := cluster.TokenID(t)
112+
td, err := cluster.ParseToken(t)
136113
if err != nil {
137-
return err
114+
return fmt.Errorf("parse k0s token: %w", err)
138115
}
139-
h.Metadata.K0sJoinTokenID = ti
140116

141-
log.Debugf("%s: join token ID: %s", h, ti)
117+
h.Metadata.K0sTokenData = td
118+
142119
return nil
143120
}, func() error {
144-
h.Metadata.K0sJoinTokenID = "dry-run"
121+
h.Metadata.K0sTokenData.ID = "dry-run"
122+
h.Metadata.K0sTokenData.URL = p.Config.Spec.KubeAPIURL()
145123
return nil
146124
})
147125
if err != nil {
148126
return err
149127
}
150128
}
151129

130+
err := p.parallelDo(p.hosts, func(h *cluster.Host) error {
131+
if p.IsWet() || !p.leader.Metadata.DryRunFakeLeader {
132+
log.Infof("%s: validating api connection to %s using join token", h, h.Metadata.K0sTokenData.URL)
133+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
134+
defer cancel()
135+
err := retry.Context(ctx, func(_ context.Context) error {
136+
err := h.Exec(h.Configurer.KubectlCmdf(h, h.K0sDataDir(), "get --raw='/version' --kubeconfig=/dev/stdin"), exec.Sudo(h), exec.Stdin(string(h.Metadata.K0sTokenData.Kubeconfig)))
137+
if err != nil {
138+
return fmt.Errorf("failed to connect to kubernetes api using the join token - check networking: %w", err)
139+
}
140+
return nil
141+
})
142+
if err != nil {
143+
return fmt.Errorf("connectivity check failed: %w", err)
144+
}
145+
} else {
146+
log.Warnf("%s: dry-run: skipping api connection validation because cluster is not actually running", h)
147+
}
148+
return nil
149+
})
150+
if err != nil {
151+
return err
152+
}
153+
152154
return p.parallelDo(p.hosts, func(h *cluster.Host) error {
153-
err := p.Wet(h, fmt.Sprintf("write k0s join token to %s", h.K0sJoinTokenPath()), func() error {
154-
log.Infof("%s: writing join token", h)
155-
return h.Configurer.WriteFile(h, h.K0sJoinTokenPath(), h.Metadata.K0sJoinToken, "0640")
155+
tokenPath := h.K0sJoinTokenPath()
156+
err := p.Wet(h, fmt.Sprintf("write k0s join token to %s", tokenPath), func() error {
157+
log.Infof("%s: writing join token to %s", h, tokenPath)
158+
return h.Configurer.WriteFile(h, tokenPath, h.Metadata.K0sTokenData.Token, "0600")
156159
})
157160
if err != nil {
158161
return err

phase/reinstall.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,5 @@ func (p *Reinstall) reinstall(h *cluster.Host) error {
107107
return fmt.Errorf("restart after reinstall: %w", err)
108108
}
109109

110-
if h != p.Config.Spec.K0sLeader() {
111-
return nil
112-
}
113-
114110
return nil
115111
}

phase/upgrade_controllers.go

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,17 @@ func (p *UpgradeControllers) Run() error {
133133
}
134134

135135
if p.IsWet() {
136-
if err := retry.Timeout(context.TODO(), retry.DefaultTimeout, node.KubeAPIReadyFunc(h, p.Config)); err != nil {
137-
return fmt.Errorf("kube api did not become ready: %w", err)
136+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
137+
defer cancel()
138+
err := retry.Context(ctx, func(_ context.Context) error {
139+
out, err := h.ExecOutput(h.Configurer.KubectlCmdf(h, h.K0sDataDir(), "get --raw='/readyz?verbose=true'"), exec.Sudo(h))
140+
if err != nil {
141+
return fmt.Errorf("readiness endpoint reports %q: %w", out, err)
142+
}
143+
return nil
144+
})
145+
if err != nil {
146+
return fmt.Errorf("controller did not reach ready state: %w", err)
138147
}
139148
}
140149

@@ -147,13 +156,5 @@ func (p *UpgradeControllers) Run() error {
147156
return nil
148157
}
149158

150-
log.Infof("%s: waiting for the scheduler to become ready", leader)
151-
if err := retry.Timeout(context.TODO(), retry.DefaultTimeout, node.ScheduledEventsAfterFunc(leader, time.Now())); err != nil {
152-
if !Force {
153-
return fmt.Errorf("failed to observe scheduling events after api start-up, you can ignore this check by using --force: %w", err)
154-
}
155-
log.Warnf("%s: failed to observe scheduling events after api start-up: %s", leader, err)
156-
}
157-
158159
return nil
159160
}

pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,7 @@ type HostMetadata struct {
179179
K0sInstalled bool
180180
K0sExistingConfig string
181181
K0sNewConfig string
182-
K0sJoinToken string
183-
K0sJoinTokenID string
182+
K0sTokenData TokenData
184183
K0sStatusArgs Flags
185184
Arch string
186185
IsK0sLeader bool

0 commit comments

Comments
 (0)