Skip to content

Commit 30312f3

Browse files
committed
Merge remote-tracking branch 'origin/main' into k0s-1-29
2 parents 46652e6 + c0663d4 commit 30312f3

File tree

28 files changed

+643
-117
lines changed

28 files changed

+643
-117
lines changed

cmd/buildtools/adminconsole.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ var updateAdminConsoleAddonCommand = &cli.Command{
4444
defer hcli.Close()
4545

4646
logrus.Infof("getting admin console latest tag")
47-
latest, err := GetLatestGitHubTag(c.Context, "replicatedhq", "kots-helm")
47+
latest, err := GetLatestKotsHelmTag(c.Context)
4848
if err != nil {
4949
return fmt.Errorf("failed to get admin console latest tag: %w", err)
5050
}

cmd/buildtools/utils.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,37 @@ func GetLatestGitHubTag(ctx context.Context, owner, repo string) (string, error)
211211
return tags[0].GetName(), nil
212212
}
213213

214+
// GetLatestKotsHelmTag returns the correct tag from the kots-helm repository.
215+
// this is not quite the same as the latest tag from the kots-helm repository, as github
216+
// will list "v1.124.12" as being newer than "v1.124.12-build.0" and it is not in our usage.
217+
func GetLatestKotsHelmTag(ctx context.Context) (string, error) {
218+
client := github.NewClient(nil)
219+
tags, _, err := client.Repositories.ListTags(ctx, "replicatedhq", "kots-helm", &github.ListOptions{PerPage: 100})
220+
if err != nil {
221+
return "", fmt.Errorf("list tags: %w", err)
222+
}
223+
if len(tags) == 0 {
224+
return "", fmt.Errorf("no tags found")
225+
}
226+
latestTag := tags[0].GetName()
227+
logrus.Infof("latest tag: %s", latestTag)
228+
229+
// check to see if there is a 'build.x' tag - if so, return that
230+
for _, tag := range tags {
231+
logrus.Infof("checkingtag: %s", tag.GetName())
232+
if !strings.HasPrefix(tag.GetName(), latestTag) {
233+
// tags are sorted, so once we find a tag that doesn't have the same prefix, we can break
234+
logrus.Infof("tag does not have same prefix: %s", tag.GetName())
235+
break
236+
}
237+
if strings.Contains(tag.GetName(), "-build.") {
238+
logrus.Infof("tag is a build tag, returning: %s", tag.GetName())
239+
return tag.GetName(), nil
240+
}
241+
}
242+
return latestTag, nil
243+
}
244+
214245
// GetGreatestGitHubTag returns the greatest non-prerelease semver tag from a GitHub repository
215246
// that matches the provided constraints.
216247
func GetGreatestGitHubTag(ctx context.Context, owner, repo string, constrants *semver.Constraints) (string, error) {

cmd/installer/cli/install.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ type InstallCmdFlags struct {
8282
func InstallCmd(ctx context.Context, name string) *cobra.Command {
8383
var flags InstallCmdFlags
8484

85+
ctx, cancel := context.WithCancel(ctx)
86+
8587
cmd := &cobra.Command{
8688
Use: "install",
8789
Short: fmt.Sprintf("Install %s", name),
@@ -94,13 +96,20 @@ func InstallCmd(ctx context.Context, name string) *cobra.Command {
9496
},
9597
PostRun: func(cmd *cobra.Command, args []string) {
9698
runtimeconfig.Cleanup()
99+
cancel() // Cancel context when command completes
97100
},
98101
RunE: func(cmd *cobra.Command, args []string) error {
99102
clusterID := metrics.ClusterID()
100103
metricsReporter := NewInstallReporter(
101104
replicatedAppURL(), flags.license.Spec.LicenseID, clusterID, cmd.CalledAs(),
102105
)
103106
metricsReporter.ReportInstallationStarted(ctx)
107+
108+
// Setup signal handler with the metrics reporter cleanup function
109+
signalHandler(ctx, cancel, func(ctx context.Context, err error) {
110+
metricsReporter.ReportInstallationFailed(ctx, err)
111+
})
112+
104113
if err := runInstall(cmd.Context(), name, flags, metricsReporter); err != nil {
105114
metricsReporter.ReportInstallationFailed(ctx, err)
106115
return err

cmd/installer/cli/join.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ type JoinCmdFlags struct {
4646
func JoinCmd(ctx context.Context, name string) *cobra.Command {
4747
var flags JoinCmdFlags
4848

49+
ctx, cancel := context.WithCancel(ctx)
50+
4951
cmd := &cobra.Command{
5052
Use: "join <url> <token>",
5153
Short: fmt.Sprintf("Join %s", name),
@@ -61,6 +63,7 @@ func JoinCmd(ctx context.Context, name string) *cobra.Command {
6163
},
6264
PostRun: func(cmd *cobra.Command, args []string) {
6365
runtimeconfig.Cleanup()
66+
cancel() // Cancel context when command completes
6467
},
6568
RunE: func(cmd *cobra.Command, args []string) error {
6669
logrus.Debugf("fetching join token remotely")
@@ -70,6 +73,12 @@ func JoinCmd(ctx context.Context, name string) *cobra.Command {
7073
}
7174
metricsReporter := NewJoinReporter(jcmd.InstallationSpec.MetricsBaseURL, jcmd.ClusterID, cmd.CalledAs())
7275
metricsReporter.ReportJoinStarted(ctx)
76+
77+
// Setup signal handler with the metrics reporter cleanup function
78+
signalHandler(ctx, cancel, func(ctx context.Context, err error) {
79+
metricsReporter.ReportJoinFailed(ctx, err)
80+
})
81+
7382
if err := runJoin(cmd.Context(), name, flags, jcmd, metricsReporter); err != nil {
7483
metricsReporter.ReportJoinFailed(ctx, err)
7584
return err

cmd/installer/cli/reset.go

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,7 @@ func ResetCmd(ctx context.Context, name string) *cobra.Command {
127127
}
128128

129129
// try and leave etcd cluster
130-
err = currentHost.leaveEtcdcluster()
131-
if !checkErrPrompt(assumeYes, force, err) {
132-
return err
133-
}
134-
130+
currentHost.leaveEtcdcluster()
135131
}
136132
}
137133

@@ -329,27 +325,56 @@ type etcdMembers struct {
329325
}
330326

331327
// leaveEtcdcluster uses k0s to attempt to leave the etcd cluster
332-
func (h *hostInfo) leaveEtcdcluster() error {
333-
334-
// if we're the only etcd member we don't need to leave the cluster
335-
out, err := helpers.RunCommand(k0sBinPath, "etcd", "member-list")
336-
if err != nil {
337-
return err
328+
func (h *hostInfo) leaveEtcdcluster() {
329+
// Try to list members with retries
330+
var memberlist etcdMembers
331+
var out string
332+
var err error
333+
334+
// Retry member list up to 3 times
335+
for i := 0; i < 3; i++ {
336+
out, err = helpers.RunCommand(k0sBinPath, "etcd", "member-list")
337+
if err == nil {
338+
err = json.Unmarshal([]byte(out), &memberlist)
339+
if err == nil {
340+
break
341+
}
342+
}
343+
if i < 2 { // Don't sleep on last attempt
344+
time.Sleep(2 * time.Second)
345+
}
338346
}
339-
memberlist := etcdMembers{}
340-
err = json.Unmarshal([]byte(out), &memberlist)
347+
341348
if err != nil {
342-
return err
349+
logrus.Warnf("Unable to list etcd members, continuing with reset: %v", err)
350+
return
343351
}
352+
353+
// If we're the only member, no need to leave
344354
if len(memberlist.Members) == 1 && memberlist.Members[h.Hostname] != "" {
345-
return nil
355+
return
346356
}
347357

348-
out, err = helpers.RunCommand(k0sBinPath, "etcd", "leave")
349-
if err != nil {
350-
return fmt.Errorf("unable to leave etcd cluster: %w, %s", err, out)
358+
// Attempt to leave the cluster with retries
359+
for i := 0; i < 3; i++ {
360+
out, err = helpers.RunCommand(k0sBinPath, "etcd", "leave")
361+
if err == nil {
362+
return
363+
}
364+
365+
// Check if the error is due to etcd being stopped
366+
if strings.Contains(err.Error(), "etcdserver: server stopped") {
367+
logrus.Warnf("Etcd server is stopped, continuing with reset")
368+
return
369+
}
370+
371+
if i < 2 { // Don't sleep on last attempt
372+
time.Sleep(2 * time.Second)
373+
}
351374
}
352-
return nil
375+
376+
// If we get here, we failed to leave after retries
377+
logrus.Warnf("Unable to leave etcd cluster after retries (this is often normal during reset): %v, %s", err, out)
353378
}
354379

355380
var (

cmd/installer/cli/signal.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package cli
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"os/signal"
8+
"syscall"
9+
10+
"github.com/sirupsen/logrus"
11+
)
12+
13+
// osExit is a variable to make testing easier
14+
var osExit = os.Exit
15+
16+
// signalHandler sets up handling for signals to ensure cleanup functions are called.
17+
func signalHandler(ctx context.Context, cancel context.CancelFunc, cleanupFuncs ...func(context.Context, error)) {
18+
sigChan := make(chan os.Signal, 1)
19+
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
20+
21+
go func() {
22+
select {
23+
case sig := <-sigChan:
24+
logrus.Debugf("Received signal: %v", sig)
25+
err := fmt.Errorf("command interrupted by signal: %v", sig)
26+
27+
for _, cleanup := range cleanupFuncs {
28+
cleanup(ctx, err)
29+
}
30+
31+
// Cancel the context after cleanup functions run
32+
cancel()
33+
34+
// Exit with non-zero status
35+
osExit(1)
36+
case <-ctx.Done():
37+
// Context was canceled elsewhere, do nothing
38+
return
39+
}
40+
}()
41+
}

cmd/installer/cli/signal_test.go

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
package cli
2+
3+
import (
4+
"context"
5+
"os"
6+
"sync"
7+
"syscall"
8+
"testing"
9+
"time"
10+
11+
"github.com/stretchr/testify/assert"
12+
)
13+
14+
func Test_signalHandler_Signal(t *testing.T) {
15+
// Create a context with cancel function
16+
ctx, cancel := context.WithCancel(context.Background())
17+
defer cancel()
18+
19+
// Create a waitgroup to synchronize the test
20+
var wg sync.WaitGroup
21+
wg.Add(1)
22+
23+
// Track if cleanup function was called
24+
cleanupCalled := false
25+
cleanupError := ""
26+
27+
// Mock cleanup function
28+
cleanup := func(ctx context.Context, err error) {
29+
cleanupCalled = true
30+
if err != nil {
31+
cleanupError = err.Error()
32+
}
33+
wg.Done()
34+
}
35+
36+
// Save original os.Exit and restore after test
37+
originalOsExit := osExit
38+
defer func() { osExit = originalOsExit }()
39+
40+
exitCode := 0
41+
osExit = func(code int) {
42+
exitCode = code
43+
// Instead of exiting, just cancel the context
44+
cancel()
45+
}
46+
47+
// Set up the signal handler
48+
signalHandler(ctx, cancel, cleanup)
49+
50+
// Send a signal to trigger the handler
51+
p, err := os.FindProcess(os.Getpid())
52+
if err != nil {
53+
t.Fatalf("Failed to find process: %v", err)
54+
}
55+
56+
// Send SIGINT to trigger the handler
57+
err = p.Signal(syscall.SIGINT)
58+
if err != nil {
59+
t.Fatalf("Failed to send signal: %v", err)
60+
}
61+
62+
// Wait for cleanup to be called with a timeout
63+
waitCh := make(chan struct{})
64+
go func() {
65+
wg.Wait()
66+
close(waitCh)
67+
}()
68+
69+
select {
70+
case <-waitCh:
71+
// Success - cleanup was called
72+
case <-time.After(1 * time.Second):
73+
t.Fatal("Timed out waiting for cleanup function to be called")
74+
}
75+
76+
// Verify cleanup was called with the expected error
77+
assert.True(t, cleanupCalled, "Cleanup function should have been called")
78+
assert.Contains(t, cleanupError, "command interrupted by signal: interrupt")
79+
assert.Equal(t, 1, exitCode, "Exit code should be 1")
80+
}
81+
82+
func Test_signalHandler_ContextDone(t *testing.T) {
83+
// Create a context with cancel function
84+
ctx, cancel := context.WithCancel(context.Background())
85+
86+
// We expect cleanup NOT to be called when context is cancelled
87+
cleanupCalled := false
88+
89+
cleanup := func(ctx context.Context, err error) {
90+
cleanupCalled = true
91+
}
92+
93+
// Set up the signal handler
94+
signalHandler(ctx, cancel, cleanup)
95+
96+
// Cancel the context
97+
cancel()
98+
99+
// Give some time for any handlers to run
100+
time.Sleep(100 * time.Millisecond)
101+
102+
// Verify cleanup was NOT called
103+
assert.False(t, cleanupCalled, "Cleanup function should not have been called when context is done")
104+
}

e2e/kots-release-unsupported-overrides/cluster-config.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,3 @@ spec:
2727
spec:
2828
telemetry:
2929
enabled: true
30-
workerProfiles:
31-
- name: ip-forward
32-
values:
33-
allowedUnsafeSysctls:
34-
- net.ipv4.ip_forward

e2e/proxy_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,16 @@ func TestInstallWithMITMProxy(t *testing.T) {
249249
failOnProxyTCPDenied(t, tc)
250250
})
251251

252+
// TODO: our preflight checks do not yet fail when run with a MITM proxy, the MITM CA cert on the host, but without the CA cert passed as a CLI arg
253+
//// test to ensure that preflight checks fail without the CA cert
254+
//t.Logf("%s: checking preflight checks with MITM proxy", time.Now().Format(time.RFC3339))
255+
//line = []string{"check-preflights-fail.sh", "--http-proxy", lxd.HTTPMITMProxy, "--https-proxy", lxd.HTTPMITMProxy}
256+
//if stdout, stderr, err := tc.RunCommandOnNode(0, line, lxd.WithMITMProxyEnv(tc.IPs)); err != nil {
257+
// t.Fatalf("fail to check preflight checks: %v: %s: %s", err, stdout, stderr)
258+
//} else {
259+
// t.Logf("Preflight checks failed as expected:\n%s\n%s", stdout, stderr)
260+
//}
261+
252262
// bootstrap the first node and makes sure it is healthy. also executes the kots
253263
// ssl certificate configuration (kurl-proxy).
254264
installSingleNodeWithOptions(t, tc, installOptions{
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/env bash
2+
set -euox pipefail
3+
4+
DIR=/usr/local/bin
5+
. $DIR/common.sh
6+
7+
main() {
8+
echo "Expecting failing preflight checks"
9+
10+
local additional_args=
11+
if [ -n "${1:-}" ]; then
12+
additional_args="${*:1}"
13+
echo "Running install with additional args: $additional_args"
14+
fi
15+
if embedded-cluster install run-preflights --license /assets/license.yaml --yes $additional_args 2>&1 ; then
16+
echo "preflight_with_failure: Expected installation to fail"
17+
exit 1
18+
fi
19+
}
20+
21+
main "$@"

0 commit comments

Comments
 (0)