Skip to content

Commit 80b1c6e

Browse files
Merge pull request #29278 from neisw/revert-29058-in-cluster-fixes-v6
Revert "Reapply "OCPBUGS-18865: Reapply "Merge pull request #28944 from vrutkovs/in-cluster-fixes-v4"""
2 parents 5dff126 + 9d0bf5d commit 80b1c6e

File tree

34 files changed

+621
-1500
lines changed

34 files changed

+621
-1500
lines changed

pkg/clioptions/iooptions/io_options.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
package iooptions
22

33
import (
4-
"fmt"
54
"io"
65
"os"
7-
"path"
86

97
"github.com/spf13/pflag"
108
"k8s.io/cli-runtime/pkg/genericclioptions"
@@ -38,11 +36,6 @@ func (o *OutputFlags) ConfigureIOStreams(streams genericclioptions.IOStreams, st
3836
return doNothing, nil
3937
}
4038

41-
dir := path.Dir(o.OutFile)
42-
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
43-
return doNothing, fmt.Errorf("failed to create parentdir %q: %w", dir, err)
44-
}
45-
4639
f, err := os.OpenFile(o.OutFile, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
4740
if err != nil {
4841
return doNothing, err

pkg/cmd/openshift-tests/dev/dev.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ a running cluster.
160160
logrus.Infof("loaded %d intervals", len(intervals))
161161

162162
logrus.Info("running tests")
163-
junits := legacynetworkmonitortests.TestMultipleSingleSecondDisruptions(intervals, nil)
163+
junits := legacynetworkmonitortests.TestMultipleSingleSecondDisruptions(intervals)
164164
for _, junit := range junits {
165165
if junit.FailureOutput != nil {
166166
logrus.Errorf("FAIL: %s", junit.Name)

pkg/cmd/openshift-tests/monitor/run/run_monitor_command.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@ import (
1919

2020
"github.com/spf13/pflag"
2121

22-
"github.com/openshift/origin/pkg/defaultmonitortests"
23-
"github.com/openshift/origin/pkg/monitor"
2422
"github.com/spf13/cobra"
2523
"k8s.io/cli-runtime/pkg/genericclioptions"
2624
"k8s.io/kubectl/pkg/util/templates"
25+
26+
"github.com/openshift/origin/pkg/defaultmonitortests"
27+
"github.com/openshift/origin/pkg/monitor"
2728
)
2829

2930
type RunMonitorFlags struct {

pkg/cmd/openshift-tests/run-disruption/disruption.go

Lines changed: 116 additions & 182 deletions
Original file line numberDiff line numberDiff line change
@@ -6,51 +6,46 @@ import (
66
"io"
77
"os"
88
"os/signal"
9-
"sync"
9+
"path/filepath"
1010
"syscall"
11+
"time"
1112

12-
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
13+
"github.com/openshift/origin/pkg/clioptions/clusterinfo"
1314

14-
"k8s.io/apimachinery/pkg/fields"
15+
monitorserialization "github.com/openshift/origin/pkg/monitor/serialization"
1516

16-
"github.com/openshift/origin/pkg/clioptions/iooptions"
17-
"github.com/openshift/origin/pkg/disruption/backend"
18-
disruptionci "github.com/openshift/origin/pkg/disruption/ci"
19-
"github.com/openshift/origin/pkg/monitor"
20-
"github.com/openshift/origin/test/extended/util/disruption/controlplane"
21-
"github.com/spf13/cobra"
22-
"github.com/spf13/pflag"
23-
corev1 "k8s.io/api/core/v1"
24-
apimachinerywatch "k8s.io/apimachinery/pkg/watch"
2517
"k8s.io/cli-runtime/pkg/genericclioptions"
2618
"k8s.io/client-go/kubernetes"
2719
"k8s.io/client-go/rest"
28-
"k8s.io/client-go/tools/cache"
29-
"k8s.io/client-go/tools/watch"
20+
"k8s.io/klog/v2"
3021
"k8s.io/kubectl/pkg/util/templates"
31-
)
3222

33-
type RunAPIDisruptionMonitorFlags struct {
34-
ConfigFlags *genericclioptions.ConfigFlags
35-
OutputFlags *iooptions.OutputFlags
23+
"github.com/openshift/origin/pkg/disruption/backend"
24+
"github.com/openshift/origin/pkg/monitor"
25+
"github.com/openshift/origin/pkg/monitor/apiserveravailability"
26+
"github.com/openshift/origin/pkg/monitor/monitorapi"
27+
"github.com/openshift/origin/test/extended/util/disruption/controlplane"
28+
"github.com/spf13/cobra"
29+
)
3630

37-
ArtifactDir string
38-
LoadBalancerType string
39-
StopConfigMapName string
31+
// RunAPIDisruptionMonitorOptions sets options for api server disruption monitor
32+
type RunAPIDisruptionMonitorOptions struct {
33+
Out, ErrOut io.Writer
4034

41-
genericclioptions.IOStreams
35+
ArtifactDir string
36+
LoadBalancerType string
37+
ExtraMessage string
4238
}
4339

44-
func NewRunInClusterDisruptionMonitorFlags(ioStreams genericclioptions.IOStreams) *RunAPIDisruptionMonitorFlags {
45-
return &RunAPIDisruptionMonitorFlags{
46-
ConfigFlags: genericclioptions.NewConfigFlags(false),
47-
OutputFlags: iooptions.NewOutputOptions(),
48-
IOStreams: ioStreams,
40+
func NewRunInClusterDisruptionMonitorOptions(ioStreams genericclioptions.IOStreams) *RunAPIDisruptionMonitorOptions {
41+
return &RunAPIDisruptionMonitorOptions{
42+
Out: ioStreams.Out,
43+
ErrOut: ioStreams.ErrOut,
4944
}
5045
}
5146

5247
func NewRunInClusterDisruptionMonitorCommand(ioStreams genericclioptions.IOStreams) *cobra.Command {
53-
f := NewRunInClusterDisruptionMonitorFlags(ioStreams)
48+
disruptionOpt := NewRunInClusterDisruptionMonitorOptions(ioStreams)
5449
cmd := &cobra.Command{
5550
Use: "run-disruption",
5651
Short: "Run API server disruption monitor",
@@ -61,183 +56,122 @@ func NewRunInClusterDisruptionMonitorCommand(ioStreams genericclioptions.IOStrea
6156
SilenceUsage: true,
6257
SilenceErrors: true,
6358
RunE: func(cmd *cobra.Command, args []string) error {
64-
ctx, cancelFn := context.WithCancel(context.Background())
65-
defer cancelFn()
66-
abortCh := make(chan os.Signal, 2)
67-
go func() {
68-
<-abortCh
69-
fmt.Fprintf(f.ErrOut, "Interrupted, terminating\n")
70-
cancelFn()
71-
72-
sig := <-abortCh
73-
fmt.Fprintf(f.ErrOut, "Interrupted twice, exiting (%s)\n", sig)
74-
switch sig {
75-
case syscall.SIGINT:
76-
os.Exit(130)
77-
default:
78-
os.Exit(0)
79-
}
80-
}()
81-
signal.Notify(abortCh, syscall.SIGINT, syscall.SIGTERM)
82-
83-
if err := f.Validate(); err != nil {
84-
return err
85-
}
86-
87-
o, err := f.ToOptions()
88-
if err != nil {
89-
return err
90-
}
91-
92-
return o.Run(ctx)
59+
return disruptionOpt.Run()
9360
},
9461
}
95-
96-
f.AddFlags(cmd.Flags())
97-
62+
cmd.Flags().StringVar(&disruptionOpt.ArtifactDir,
63+
"artifact-dir", disruptionOpt.ArtifactDir,
64+
"The directory where monitor events will be stored.")
65+
cmd.Flags().StringVar(&disruptionOpt.LoadBalancerType,
66+
"lb-type", disruptionOpt.LoadBalancerType,
67+
"Set load balancer type, available options: internal-lb, service-network, external-lb (default)")
68+
cmd.Flags().StringVar(&disruptionOpt.ExtraMessage,
69+
"extra-message", disruptionOpt.ExtraMessage,
70+
"Add custom label to disruption event message")
9871
return cmd
9972
}
10073

101-
func (f *RunAPIDisruptionMonitorFlags) AddFlags(flags *pflag.FlagSet) {
102-
flags.StringVar(&f.LoadBalancerType, "lb-type", f.LoadBalancerType, "Set load balancer type, available options: internal-lb, service-network, external-lb (default)")
103-
flags.StringVar(&f.StopConfigMapName, "stop-configmap", f.StopConfigMapName, "the name of the configmap that indicates that this pod should stop all watchers.")
104-
105-
f.ConfigFlags.AddFlags(flags)
106-
f.OutputFlags.BindFlags(flags)
107-
}
108-
109-
func (f *RunAPIDisruptionMonitorFlags) SetIOStreams(streams genericclioptions.IOStreams) {
110-
f.IOStreams = streams
111-
}
112-
113-
func (f *RunAPIDisruptionMonitorFlags) Validate() error {
114-
if len(f.OutputFlags.OutFile) == 0 {
115-
return fmt.Errorf("output-file must be specified")
116-
}
117-
if len(f.StopConfigMapName) == 0 {
118-
return fmt.Errorf("stop-configmap must be specified")
119-
}
120-
121-
return nil
122-
}
123-
124-
func (f *RunAPIDisruptionMonitorFlags) ToOptions() (*RunAPIDisruptionMonitorOptions, error) {
125-
originalOutStream := f.IOStreams.Out
126-
closeFn, err := f.OutputFlags.ConfigureIOStreams(f.IOStreams, f)
74+
func (opt *RunAPIDisruptionMonitorOptions) Run() error {
75+
restConfig, err := clusterinfo.GetMonitorRESTConfig()
12776
if err != nil {
128-
return nil, err
77+
return err
12978
}
13079

131-
namespace, _, err := f.ConfigFlags.ToRawKubeConfigLoader().Namespace()
132-
if err != nil {
133-
return nil, err
134-
}
135-
if len(namespace) == 0 {
136-
return nil, fmt.Errorf("namespace must be specified")
137-
}
80+
lb := backend.ParseStringToLoadBalancerType(opt.LoadBalancerType)
13881

139-
restConfig, err := f.ConfigFlags.ToRESTConfig()
140-
if err != nil {
141-
return nil, err
142-
}
143-
kubeClient, err := kubernetes.NewForConfig(restConfig)
82+
ctx, cancelFn := context.WithCancel(context.Background())
83+
defer cancelFn()
84+
abortCh := make(chan os.Signal, 2)
85+
go func() {
86+
<-abortCh
87+
fmt.Fprintf(opt.ErrOut, "Interrupted, terminating\n")
88+
// Give some time to store intervals on disk
89+
time.Sleep(5 * time.Second)
90+
cancelFn()
91+
sig := <-abortCh
92+
fmt.Fprintf(opt.ErrOut, "Interrupted twice, exiting (%s)\n", sig)
93+
switch sig {
94+
case syscall.SIGINT:
95+
os.Exit(130)
96+
default:
97+
os.Exit(0)
98+
}
99+
}()
100+
signal.Notify(abortCh, syscall.SIGINT, syscall.SIGTERM)
101+
102+
recorder, err := StartAPIAvailability(ctx, restConfig, lb)
144103
if err != nil {
145-
return nil, err
104+
return err
146105
}
147106

148-
return &RunAPIDisruptionMonitorOptions{
149-
KubeClient: kubeClient,
150-
KubeClientConfig: restConfig,
151-
OutputFile: f.OutputFlags.OutFile,
152-
LoadBalancerType: f.LoadBalancerType,
153-
StopConfigMapName: f.StopConfigMapName,
154-
Namespace: namespace,
155-
CloseFn: closeFn,
156-
OriginalOutFile: originalOutStream,
157-
IOStreams: f.IOStreams,
158-
}, nil
159-
}
160-
161-
// RunAPIDisruptionMonitorOptions sets options for api server disruption monitor
162-
type RunAPIDisruptionMonitorOptions struct {
163-
KubeClient kubernetes.Interface
164-
KubeClientConfig *rest.Config
165-
OutputFile string
166-
LoadBalancerType string
167-
StopConfigMapName string
168-
Namespace string
169-
170-
OriginalOutFile io.Writer
171-
CloseFn iooptions.CloseFunc
172-
genericclioptions.IOStreams
173-
}
107+
go func() {
108+
ticker := time.NewTicker(100 * time.Millisecond)
109+
defer ticker.Stop()
110+
var last time.Time
111+
done := false
112+
for !done {
113+
select {
114+
case <-ticker.C:
115+
case <-ctx.Done():
116+
done = true
117+
}
118+
events := recorder.Intervals(last, time.Time{})
119+
if len(events) > 0 {
120+
for _, event := range events {
121+
if !event.From.Equal(event.To) {
122+
continue
123+
}
124+
fmt.Fprintln(opt.Out, event.String())
125+
}
126+
last = events[len(events)-1].From
127+
}
128+
}
129+
}()
174130

175-
func (o *RunAPIDisruptionMonitorOptions) Run(ctx context.Context) error {
176-
ctx, cancelFn := context.WithCancel(ctx)
177-
defer cancelFn()
131+
<-ctx.Done()
178132

179-
fmt.Fprintf(o.Out, "Starting up.")
133+
// Store intervals to artifact directory
134+
intervals := recorder.Intervals(time.Time{}, time.Time{})
135+
if len(opt.ExtraMessage) > 0 {
136+
fmt.Fprintf(opt.Out, "\nAppending %s to recorded event message\n", opt.ExtraMessage)
137+
for i, event := range intervals {
138+
intervals[i].Message.HumanMessage = fmt.Sprintf("%s user-provided-message=%s", event.Message.HumanMessage, opt.ExtraMessage)
139+
}
140+
}
180141

181-
startingContent, err := os.ReadFile(o.OutputFile)
182-
if err != nil && !os.IsNotExist(err) {
142+
eventDir := filepath.Join(opt.ArtifactDir, monitorapi.EventDir)
143+
if err := os.MkdirAll(eventDir, os.ModePerm); err != nil {
144+
fmt.Printf("Failed to create monitor-events directory, err: %v\n", err)
183145
return err
184146
}
185-
if len(startingContent) > 0 {
186-
// print starting content to the log so that we can simply scrape the log to find all entries at the end.
187-
o.OriginalOutFile.Write(startingContent)
188-
}
189-
190-
lb := backend.ParseStringToLoadBalancerType(o.LoadBalancerType)
191147

192-
recorder := monitor.WrapWithJSONLRecorder(monitor.NewRecorder(), o.IOStreams.Out, nil)
193-
samplers, err := controlplane.StartAPIMonitoringUsingNewBackend(ctx, recorder, o.KubeClientConfig, o.KubeClient, lb)
194-
if err != nil {
148+
timeSuffix := fmt.Sprintf("_%s", time.Now().UTC().Format("20060102-150405"))
149+
if err := monitorserialization.EventsToFile(filepath.Join(eventDir, fmt.Sprintf("e2e-events%s.json", timeSuffix)), intervals); err != nil {
150+
fmt.Printf("Failed to write event data, err: %v\n", err)
195151
return err
196152
}
153+
fmt.Fprintf(opt.Out, "\nEvent data written, exiting\n")
197154

198-
go func(ctx context.Context) {
199-
defer cancelFn()
200-
err := o.WaitForStopSignal(ctx)
201-
if err != nil {
202-
fmt.Fprintf(o.ErrOut, "failure waiting for stop: %v", err)
203-
}
204-
}(ctx)
155+
return nil
156+
}
205157

206-
<-ctx.Done()
158+
// StartAPIAvailability monitors just the cluster availability
159+
func StartAPIAvailability(ctx context.Context, restConfig *rest.Config, lb backend.LoadBalancerType) (monitorapi.Recorder, error) {
160+
recorder := monitor.NewRecorder()
207161

208-
fmt.Fprintf(o.Out, "waiting for samplers to stop")
209-
wg := sync.WaitGroup{}
210-
for i := range samplers {
211-
wg.Add(1)
212-
func(sampler disruptionci.Sampler) {
213-
defer wg.Done()
214-
sampler.Stop()
215-
}(samplers[i])
162+
client, err := kubernetes.NewForConfig(restConfig)
163+
if err != nil {
164+
return nil, err
165+
}
166+
if err := controlplane.StartAPIMonitoringUsingNewBackend(ctx, recorder, restConfig, lb); err != nil {
167+
return nil, err
216168
}
217-
wg.Wait()
218-
fmt.Fprintf(o.Out, "samplers stopped")
219-
220-
return nil
221-
}
222169

223-
func (o *RunAPIDisruptionMonitorOptions) WaitForStopSignal(ctx context.Context) error {
224-
defer utilruntime.HandleCrash()
225-
226-
_, err := watch.UntilWithSync(
227-
ctx,
228-
cache.NewListWatchFromClient(
229-
o.KubeClient.CoreV1().RESTClient(), "configmaps", o.Namespace, fields.OneTermEqualSelector("metadata.name", o.StopConfigMapName)),
230-
&corev1.ConfigMap{},
231-
nil,
232-
func(event apimachinerywatch.Event) (bool, error) {
233-
switch event.Type {
234-
case apimachinerywatch.Added:
235-
return true, nil
236-
case apimachinerywatch.Modified:
237-
return true, nil
238-
}
239-
return false, nil
240-
},
241-
)
242-
return err
170+
// read the state of the cluster apiserver client access issues *before* any test (like upgrade) begins
171+
intervals, err := apiserveravailability.APIServerAvailabilityIntervalsFromCluster(client, time.Time{}, time.Time{})
172+
if err != nil {
173+
klog.Errorf("error reading initial apiserver availability: %v", err)
174+
}
175+
recorder.AddIntervals(intervals...)
176+
return recorder, nil
243177
}

0 commit comments

Comments
 (0)