Skip to content

Commit 1f6db5b

Browse files
authored
Merge branch 'openshift:master' into master
2 parents 8550989 + bab9c54 commit 1f6db5b

File tree

46 files changed

+647
-234
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+647
-234
lines changed

cmd/openshift-install/agent.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package main
22

33
import (
4+
"context"
5+
46
"github.com/spf13/cobra"
57

68
"github.com/openshift/installer/cmd/openshift-install/agent"
@@ -14,7 +16,7 @@ import (
1416
"github.com/openshift/installer/pkg/asset/password"
1517
)
1618

17-
func newAgentCmd() *cobra.Command {
19+
func newAgentCmd(ctx context.Context) *cobra.Command {
1820
agentCmd := &cobra.Command{
1921
Use: "agent",
2022
Short: "Commands for supporting cluster installation using agent installer",
@@ -23,7 +25,7 @@ func newAgentCmd() *cobra.Command {
2325
},
2426
}
2527

26-
agentCmd.AddCommand(newAgentCreateCmd())
28+
agentCmd.AddCommand(newAgentCreateCmd(ctx))
2729
agentCmd.AddCommand(agent.NewWaitForCmd())
2830
agentCmd.AddCommand(newAgentGraphCmd())
2931
return agentCmd
@@ -115,8 +117,7 @@ var (
115117
agentTargets = []target{agentConfigTarget, agentManifestsTarget, agentImageTarget, agentPXEFilesTarget, agentConfigImageTarget, agentUnconfiguredIgnitionTarget}
116118
)
117119

118-
func newAgentCreateCmd() *cobra.Command {
119-
120+
func newAgentCreateCmd(ctx context.Context) *cobra.Command {
120121
cmd := &cobra.Command{
121122
Use: "create",
122123
Short: "Commands for generating agent installation artifacts",
@@ -127,7 +128,7 @@ func newAgentCreateCmd() *cobra.Command {
127128

128129
for _, t := range agentTargets {
129130
t.command.Args = cobra.ExactArgs(0)
130-
t.command.Run = runTargetCmd(t.assets...)
131+
t.command.Run = runTargetCmd(ctx, t.assets...)
131132
cmd.AddCommand(t.command)
132133
}
133134

cmd/openshift-install/create.go

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ const (
6868
exitCodeBootstrapFailed
6969
exitCodeInstallFailed
7070
exitCodeOperatorStabilityFailed
71+
exitCodeInterrupt
7172

7273
// coStabilityThreshold is how long a cluster operator must have Progressing=False
7374
// in order to be considered stable. Measured in seconds.
@@ -127,12 +128,10 @@ var (
127128
Short: "Create an OpenShift cluster",
128129
// FIXME: add longer descriptions for our commands with examples for better UX.
129130
// Long: "",
130-
PostRun: func(_ *cobra.Command, _ []string) {
131-
// Setup a context that is canceled when the user presses Ctrl+C,
132-
// or SIGTERM and SIGINT are received, this allows for a clean shutdown.
133-
ctx, cancel := context.WithCancel(context.TODO())
134-
defer cancel()
135-
logrus.RegisterExitHandler(cancel)
131+
PostRun: func(cmd *cobra.Command, _ []string) {
132+
133+
// Get the context that was set in newCreateCmd.
134+
ctx := cmd.Context()
136135

137136
exitCode, err := clusterCreatePostRun(ctx)
138137
if err != nil {
@@ -167,7 +166,7 @@ func clusterCreatePostRun(ctx context.Context) (int, error) {
167166
}
168167

169168
// Handle the case when the API server is not reachable.
170-
if err := handleUnreachableAPIServer(config); err != nil {
169+
if err := handleUnreachableAPIServer(ctx, config); err != nil {
171170
logrus.Fatal(fmt.Errorf("unable to handle api server override: %w", err))
172171
}
173172

@@ -176,7 +175,7 @@ func clusterCreatePostRun(ctx context.Context) (int, error) {
176175
//
177176
timer.StartTimer("Bootstrap Complete")
178177
if err := waitForBootstrapComplete(ctx, config); err != nil {
179-
bundlePath, gatherErr := runGatherBootstrapCmd(command.RootOpts.Dir)
178+
bundlePath, gatherErr := runGatherBootstrapCmd(ctx, command.RootOpts.Dir)
180179
if gatherErr != nil {
181180
logrus.Error("Attempted to gather debug logs after installation failure: ", gatherErr)
182181
}
@@ -277,7 +276,7 @@ func newClientError(errorInfo error) *clusterCreateError {
277276
}
278277
}
279278

280-
func newCreateCmd() *cobra.Command {
279+
func newCreateCmd(ctx context.Context) *cobra.Command {
281280
cmd := &cobra.Command{
282281
Use: "create",
283282
Short: "Create part of an OpenShift cluster",
@@ -288,22 +287,25 @@ func newCreateCmd() *cobra.Command {
288287

289288
for _, t := range targets {
290289
t.command.Args = cobra.ExactArgs(0)
291-
t.command.Run = runTargetCmd(t.assets...)
290+
t.command.Run = runTargetCmd(ctx, t.assets...)
292291
cmd.AddCommand(t.command)
293292
}
294293

295294
return cmd
296295
}
297296

298-
func runTargetCmd(targets ...asset.WritableAsset) func(cmd *cobra.Command, args []string) {
297+
func runTargetCmd(ctx context.Context, targets ...asset.WritableAsset) func(cmd *cobra.Command, args []string) {
299298
runner := func(directory string) error {
300299
fetcher := assetstore.NewAssetsFetcher(directory)
301-
return fetcher.FetchAndPersist(targets)
300+
return fetcher.FetchAndPersist(ctx, targets)
302301
}
303302

304303
return func(cmd *cobra.Command, args []string) {
305304
timer.StartTimer(timer.TotalTimeElapsed)
306305

306+
// Set the context to be used in the PostRun function.
307+
cmd.SetContext(ctx)
308+
307309
cleanup := command.SetupFileHook(command.RootOpts.Dir)
308310
defer cleanup()
309311

@@ -855,15 +857,15 @@ func meetsStabilityThreshold(progressing *configv1.ClusterOperatorStatusConditio
855857
return progressing.Status == configv1.ConditionFalse && time.Since(progressing.LastTransitionTime.Time).Seconds() > coStabilityThreshold
856858
}
857859

858-
func handleUnreachableAPIServer(config *rest.Config) error {
860+
func handleUnreachableAPIServer(ctx context.Context, config *rest.Config) error {
859861
assetStore, err := assetstore.NewStore(command.RootOpts.Dir)
860862
if err != nil {
861863
return fmt.Errorf("failed to create asset store: %w", err)
862864
}
863865

864866
// Ensure that the install is expecting the user to provision their own DNS solution.
865867
installConfig := &installconfig.InstallConfig{}
866-
if err := assetStore.Fetch(installConfig); err != nil {
868+
if err := assetStore.Fetch(ctx, installConfig); err != nil {
867869
return fmt.Errorf("failed to fetch %s: %w", installConfig.Name(), err)
868870
}
869871
switch installConfig.Config.Platform.Name() { //nolint:gocritic
@@ -876,7 +878,7 @@ func handleUnreachableAPIServer(config *rest.Config) error {
876878
}
877879

878880
lbConfig := &lbconfig.Config{}
879-
if err := assetStore.Fetch(lbConfig); err != nil {
881+
if err := assetStore.Fetch(ctx, lbConfig); err != nil {
880882
return fmt.Errorf("failed to fetch %s: %w", lbConfig.Name(), err)
881883
}
882884

cmd/openshift-install/gather.go

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import (
3434
_ "github.com/openshift/installer/pkg/gather/gcp"
3535
)
3636

37-
func newGatherCmd() *cobra.Command {
37+
func newGatherCmd(ctx context.Context) *cobra.Command {
3838
cmd := &cobra.Command{
3939
Use: "gather",
4040
Short: "Gather debugging data for a given installation failure",
@@ -47,7 +47,7 @@ to debug the installation failures`,
4747
return cmd.Help()
4848
},
4949
}
50-
cmd.AddCommand(newGatherBootstrapCmd())
50+
cmd.AddCommand(newGatherBootstrapCmd(ctx))
5151
return cmd
5252
}
5353

@@ -58,15 +58,15 @@ var gatherBootstrapOpts struct {
5858
skipAnalysis bool
5959
}
6060

61-
func newGatherBootstrapCmd() *cobra.Command {
61+
func newGatherBootstrapCmd(ctx context.Context) *cobra.Command {
6262
cmd := &cobra.Command{
6363
Use: "bootstrap",
6464
Short: "Gather debugging data for a failing-to-bootstrap control plane",
6565
Args: cobra.ExactArgs(0),
6666
Run: func(_ *cobra.Command, _ []string) {
6767
cleanup := command.SetupFileHook(command.RootOpts.Dir)
6868
defer cleanup()
69-
bundlePath, err := runGatherBootstrapCmd(command.RootOpts.Dir)
69+
bundlePath, err := runGatherBootstrapCmd(ctx, command.RootOpts.Dir)
7070
if err != nil {
7171
logrus.Fatal(err)
7272
}
@@ -87,14 +87,14 @@ func newGatherBootstrapCmd() *cobra.Command {
8787
return cmd
8888
}
8989

90-
func runGatherBootstrapCmd(directory string) (string, error) {
90+
func runGatherBootstrapCmd(ctx context.Context, directory string) (string, error) {
9191
assetStore, err := assetstore.NewStore(directory)
9292
if err != nil {
9393
return "", errors.Wrap(err, "failed to create asset store")
9494
}
9595
// add the default bootstrap key pair to the sshKeys list
9696
bootstrapSSHKeyPair := &tls.BootstrapSSHKeyPair{}
97-
if err := assetStore.Fetch(bootstrapSSHKeyPair); err != nil {
97+
if err := assetStore.Fetch(ctx, bootstrapSSHKeyPair); err != nil {
9898
return "", errors.Wrapf(err, "failed to fetch %s", bootstrapSSHKeyPair.Name())
9999
}
100100
tmpfile, err := os.CreateTemp("", "bootstrap-ssh")
@@ -118,7 +118,7 @@ func runGatherBootstrapCmd(directory string) (string, error) {
118118

119119
if ha.Bootstrap == "" && len(ha.Masters) == 0 {
120120
config := &installconfig.InstallConfig{}
121-
if err := assetStore.Fetch(config); err != nil {
121+
if err := assetStore.Fetch(ctx, config); err != nil {
122122
return "", errors.Wrapf(err, "failed to fetch %s", config.Name())
123123
}
124124

@@ -140,6 +140,7 @@ func runGatherBootstrapCmd(directory string) (string, error) {
140140

141141
func gatherBootstrap(bootstrap string, port int, masters []string, directory string) (string, error) {
142142
gatherID := time.Now().Format("20060102150405")
143+
archives := map[string]string{}
143144

144145
serialLogBundle := filepath.Join(directory, fmt.Sprintf("serial-log-bundle-%s.tar.gz", gatherID))
145146
serialLogBundlePath, err := filepath.Abs(serialLogBundle)
@@ -154,9 +155,32 @@ func gatherBootstrap(bootstrap string, port int, masters []string, directory str
154155
logrus.Info("Pulling VM console logs")
155156
if err := consoleGather.Run(); err != nil {
156157
logrus.Infof("Failed to gather VM console logs: %s", err.Error())
158+
} else {
159+
archives[serialLogBundlePath] = "serial"
157160
}
158161
}
159162

163+
clusterLogBundlePath, err := pullLogsFromBootstrap(gatherID, bootstrap, port, masters, directory)
164+
if err != nil {
165+
logrus.Infof("Failed to gather bootstrap logs: %s", err.Error())
166+
} else {
167+
archives[clusterLogBundlePath] = ""
168+
}
169+
170+
if len(archives) == 0 {
171+
return "", fmt.Errorf("failed to gather VM console and bootstrap logs")
172+
}
173+
174+
logBundlePath := filepath.Join(directory, fmt.Sprintf("log-bundle-%s.tar.gz", gatherID))
175+
err = serialgather.CombineArchives(logBundlePath, archives)
176+
if err != nil {
177+
return "", errors.Wrap(err, "failed to combine archives")
178+
}
179+
180+
return logBundlePath, nil
181+
}
182+
183+
func pullLogsFromBootstrap(gatherID string, bootstrap string, port int, masters []string, directory string) (string, error) {
160184
logrus.Info("Pulling debug logs from the bootstrap machine")
161185
client, err := ssh.NewClient("core", net.JoinHostPort(bootstrap, strconv.Itoa(port)), gatherBootstrapOpts.sshKeys)
162186
if err != nil {
@@ -180,14 +204,7 @@ func gatherBootstrap(bootstrap string, port int, masters []string, directory str
180204
return "", errors.Wrap(err, "failed to stat log file")
181205
}
182206

183-
logBundlePath := filepath.Join(filepath.Dir(clusterLogBundlePath), fmt.Sprintf("log-bundle-%s.tar.gz", gatherID))
184-
archives := map[string]string{serialLogBundlePath: "serial", clusterLogBundlePath: ""}
185-
err = serialgather.CombineArchives(logBundlePath, archives)
186-
if err != nil {
187-
return "", errors.Wrap(err, "failed to combine archives")
188-
}
189-
190-
return logBundlePath, nil
207+
return clusterLogBundlePath, nil
191208
}
192209

193210
func logClusterOperatorConditions(ctx context.Context, config *rest.Config) error {

cmd/openshift-install/main.go

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package main
22

33
import (
4+
"context"
45
"flag"
56
"io"
67
"os"
@@ -12,8 +13,10 @@ import (
1213
terminal "golang.org/x/term"
1314
"k8s.io/klog"
1415
klogv2 "k8s.io/klog/v2"
16+
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
1517

1618
"github.com/openshift/installer/cmd/openshift-install/command"
19+
"github.com/openshift/installer/pkg/clusterapi"
1720
)
1821

1922
func main() {
@@ -36,18 +39,22 @@ func main() {
3639
func installerMain() {
3740
rootCmd := newRootCmd()
3841

42+
// Perform a graceful shutdown upon interrupt or at exit.
43+
ctx := handleInterrupt(signals.SetupSignalHandler())
44+
logrus.RegisterExitHandler(shutdown)
45+
3946
for _, subCmd := range []*cobra.Command{
40-
newCreateCmd(),
47+
newCreateCmd(ctx),
4148
newDestroyCmd(),
4249
newWaitForCmd(),
43-
newGatherCmd(),
50+
newGatherCmd(ctx),
4451
newAnalyzeCmd(),
4552
newVersionCmd(),
4653
newGraphCmd(),
4754
newCoreOSCmd(),
4855
newCompletionCmd(),
4956
newExplainCmd(),
50-
newAgentCmd(),
57+
newAgentCmd(ctx),
5158
} {
5259
rootCmd.AddCommand(subCmd)
5360
}
@@ -96,3 +103,26 @@ func runRootCmd(cmd *cobra.Command, args []string) {
96103
logrus.Fatal(errors.Wrap(err, "invalid log-level"))
97104
}
98105
}
106+
107+
// handleInterrupt executes a graceful shutdown then exits in
108+
// the case of a user interrupt. It returns a new context that
109+
// will be cancelled upon interrupt.
110+
func handleInterrupt(signalCtx context.Context) context.Context {
111+
ctx, cancel := context.WithCancel(context.Background())
112+
113+
// If the context from the signal handler is done,
114+
// an interrupt has been received, so shutdown & exit.
115+
go func() {
116+
<-signalCtx.Done()
117+
logrus.Warn("Received interrupt signal")
118+
shutdown()
119+
cancel()
120+
logrus.Exit(exitCodeInterrupt)
121+
}()
122+
123+
return ctx
124+
}
125+
126+
func shutdown() {
127+
clusterapi.System().Teardown()
128+
}

data/data/aws/bootstrap/main.tf

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ resource "aws_instance" "bootstrap" {
161161
subnet_id = var.aws_publish_strategy == "External" ? var.public_subnet_ids[0] : var.private_subnet_ids[0]
162162
user_data = var.aws_bootstrap_stub_ignition
163163
vpc_security_group_ids = [var.master_sg_id, aws_security_group.bootstrap.id]
164-
associate_public_ip_address = local.public_endpoints
164+
associate_public_ip_address = local.public_endpoints && var.aws_public_ipv4_pool == ""
165165

166166
lifecycle {
167167
# Ignore changes in the AMI which force recreation of the resource. This
@@ -251,9 +251,17 @@ resource "aws_security_group_rule" "bootstrap_journald_gateway" {
251251
}
252252

253253
resource "aws_eip" "bootstrap" {
254+
count = var.aws_public_ipv4_pool == "" ? 0 : 1
254255
domain = "vpc"
255256
instance = aws_instance.bootstrap.id
256-
public_ipv4_pool = var.aws_public_ipv4_pool == "" ? null : var.aws_public_ipv4_pool
257+
public_ipv4_pool = var.aws_public_ipv4_pool
258+
259+
tags = merge(
260+
{
261+
"Name" = "${var.cluster_id}-bootstrap-eip"
262+
},
263+
local.tags,
264+
)
257265

258266
depends_on = [aws_instance.bootstrap]
259-
}
267+
}

data/data/aws/bootstrap/outputs.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
output "bootstrap_ip" {
2-
value = local.public_endpoints ? aws_instance.bootstrap.public_ip : aws_instance.bootstrap.private_ip
2+
value = var.aws_public_ipv4_pool != "" ? aws_eip.bootstrap[0].public_ip : local.public_endpoints ? aws_instance.bootstrap.public_ip : aws_instance.bootstrap.private_ip
33
}
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
{{if .Proxy -}}
22
[Manager]
33
{{if .Proxy.HTTPProxy -}}
4-
DefaultEnvironment=HTTP_PROXY="{{.Proxy.HTTPProxy}}"
4+
DefaultEnvironment=HTTP_PROXY="{{replace .Proxy.HTTPProxy "%" "%%"}}"
55
{{end -}}
66
{{if .Proxy.HTTPSProxy -}}
7-
DefaultEnvironment=HTTPS_PROXY="{{.Proxy.HTTPSProxy}}"
7+
DefaultEnvironment=HTTPS_PROXY="{{replace .Proxy.HTTPSProxy "%" "%%"}}"
88
{{end -}}
99
{{if .Proxy.NoProxy -}}
1010
DefaultEnvironment=NO_PROXY="{{.Proxy.NoProxy}}"
1111
{{end -}}
12-
{{end -}}
12+
{{end -}}

0 commit comments

Comments
 (0)