|
| 1 | +package admupgradestatus |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + "os" |
| 7 | + "path" |
| 8 | + "path/filepath" |
| 9 | + "strings" |
| 10 | + "time" |
| 11 | + |
| 12 | + clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned" |
| 13 | + "github.com/openshift/origin/pkg/monitortestframework" |
| 14 | + exutil "github.com/openshift/origin/test/extended/util" |
| 15 | + "k8s.io/apimachinery/pkg/util/errors" |
| 16 | + "k8s.io/apimachinery/pkg/util/wait" |
| 17 | + "k8s.io/client-go/kubernetes" |
| 18 | + "k8s.io/client-go/rest" |
| 19 | + |
| 20 | + "github.com/openshift/origin/pkg/monitor/monitorapi" |
| 21 | + "github.com/openshift/origin/pkg/test/ginkgo/junitapi" |
| 22 | +) |
| 23 | + |
| 24 | +type snapshot struct { |
| 25 | + when time.Time |
| 26 | + out string |
| 27 | + err error |
| 28 | +} |
| 29 | +type monitor struct { |
| 30 | + collectionDone chan struct{} |
| 31 | + ocAdmUpgradeStatus map[time.Time]*snapshot |
| 32 | + notSupportedReason error |
| 33 | + isSNO bool |
| 34 | +} |
| 35 | + |
| 36 | +func NewOcAdmUpgradeStatusChecker() monitortestframework.MonitorTest { |
| 37 | + return &monitor{ |
| 38 | + collectionDone: make(chan struct{}), |
| 39 | + ocAdmUpgradeStatus: map[time.Time]*snapshot{}, |
| 40 | + } |
| 41 | +} |
| 42 | + |
| 43 | +func (w *monitor) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error { |
| 44 | + kubeClient, err := kubernetes.NewForConfig(adminRESTConfig) |
| 45 | + if err != nil { |
| 46 | + return err |
| 47 | + } |
| 48 | + isMicroShift, err := exutil.IsMicroShiftCluster(kubeClient) |
| 49 | + if err != nil { |
| 50 | + return fmt.Errorf("unable to determine if cluster is MicroShift: %v", err) |
| 51 | + } |
| 52 | + if isMicroShift { |
| 53 | + w.notSupportedReason = &monitortestframework.NotSupportedError{Reason: "platform MicroShift not supported"} |
| 54 | + return w.notSupportedReason |
| 55 | + } |
| 56 | + clientconfigv1client, err := clientconfigv1.NewForConfig(adminRESTConfig) |
| 57 | + if err != nil { |
| 58 | + return err |
| 59 | + } |
| 60 | + |
| 61 | + if ok, err := exutil.IsHypershift(ctx, clientconfigv1client); err != nil { |
| 62 | + return fmt.Errorf("unable to determine if cluster is Hypershift: %v", err) |
| 63 | + } else if ok { |
| 64 | + w.notSupportedReason = &monitortestframework.NotSupportedError{Reason: "platform Hypershift not supported"} |
| 65 | + return w.notSupportedReason |
| 66 | + } |
| 67 | + |
| 68 | + if ok, err := exutil.IsSingleNode(ctx, clientconfigv1client); err != nil { |
| 69 | + return fmt.Errorf("unable to determine if cluster is single node: %v", err) |
| 70 | + } else { |
| 71 | + w.isSNO = ok |
| 72 | + } |
| 73 | + return nil |
| 74 | +} |
| 75 | + |
| 76 | +func snapshotOcAdmUpgradeStatus(ch chan *snapshot) { |
| 77 | + // TODO: I _think_ this should somehow use the adminRESTConfig given to StartCollection but I don't know how to |
| 78 | + // how to do pass that to exutil.NewCLI* or if it is even possible. It seems to work this way though. |
| 79 | + oc := exutil.NewCLIWithoutNamespace("adm-upgrade-status").AsAdmin() |
| 80 | + now := time.Now() |
| 81 | + |
| 82 | + var out string |
| 83 | + var err error |
| 84 | + // retry on brief apiserver unavailability |
| 85 | + if errWait := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 2*time.Minute, true, func(context.Context) (bool, error) { |
| 86 | + cmd := oc.Run("adm", "upgrade", "status").EnvVar("OC_ENABLE_CMD_UPGRADE_STATUS", "true") |
| 87 | + out, err = cmd.Output() |
| 88 | + if err != nil { |
| 89 | + return false, nil |
| 90 | + } |
| 91 | + return true, nil |
| 92 | + }); errWait != nil { |
| 93 | + out = "" |
| 94 | + err = errWait |
| 95 | + } |
| 96 | + ch <- &snapshot{when: now, out: out, err: err} |
| 97 | +} |
| 98 | + |
| 99 | +func (w *monitor) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error { |
| 100 | + if w.notSupportedReason != nil { |
| 101 | + return w.notSupportedReason |
| 102 | + } |
| 103 | + // TODO: The double goroutine spawn should probably be placed under some abstraction |
| 104 | + go func(ctx context.Context) { |
| 105 | + snapshots := make(chan *snapshot) |
| 106 | + go func() { |
| 107 | + for snap := range snapshots { |
| 108 | + // TODO: Maybe also collect some cluster resources (CV? COs?) through recorder? |
| 109 | + w.ocAdmUpgradeStatus[snap.when] = snap |
| 110 | + } |
| 111 | + w.collectionDone <- struct{}{} |
| 112 | + }() |
| 113 | + // TODO: Configurable interval? |
| 114 | + // TODO: Collect multiple invocations (--details)? Would need more another producer/consumer pair and likely |
| 115 | + // collectionDone would need to be a WaitGroup |
| 116 | + |
| 117 | + wait.UntilWithContext(ctx, func(ctx context.Context) { snapshotOcAdmUpgradeStatus(snapshots) }, time.Minute) |
| 118 | + // The UntilWithContext blocks until the framework cancels the context when it wants tests to stop -> when we |
| 119 | + // get here, we know last snapshotOcAdmUpgradeStatus producer wrote to the snapshots channel, we can close it |
| 120 | + // which in turn will allow the consumer to finish and signal collectionDone. |
| 121 | + close(snapshots) |
| 122 | + }(ctx) |
| 123 | + |
| 124 | + return nil |
| 125 | +} |
| 126 | + |
| 127 | +func (w *monitor) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) { |
| 128 | + if w.notSupportedReason != nil { |
| 129 | + return nil, nil, w.notSupportedReason |
| 130 | + } |
| 131 | + |
| 132 | + // The framework cancels the context it gave StartCollection before it calls CollectData, but we need to wait for |
| 133 | + // the collection goroutines spawned in StartedCollection to finish |
| 134 | + <-w.collectionDone |
| 135 | + |
| 136 | + noFailures := &junitapi.JUnitTestCase{ |
| 137 | + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc amd upgrade status never fails", |
| 138 | + } |
| 139 | + |
| 140 | + var failures []string |
| 141 | + var total int |
| 142 | + for when, observed := range w.ocAdmUpgradeStatus { |
| 143 | + total++ |
| 144 | + if observed.err != nil { |
| 145 | + failures = append(failures, fmt.Sprintf("- %s: %v", when.Format(time.RFC3339), observed.err)) |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + // Zero failures is too strict for at least SNO clusters |
| 150 | + p := (len(failures) / total) * 100 |
| 151 | + if (!w.isSNO && p > 0) || (w.isSNO && p > 10) { |
| 152 | + noFailures.FailureOutput = &junitapi.FailureOutput{ |
| 153 | + Message: fmt.Sprintf("oc adm upgrade status failed %d times (of %d)", len(failures), len(w.ocAdmUpgradeStatus)), |
| 154 | + Output: strings.Join(failures, "\n"), |
| 155 | + } |
| 156 | + } |
| 157 | + |
| 158 | + // TODO: Maybe utilize Intervals somehow and do tests in ComputeComputedIntervals and EvaluateTestsFromConstructedIntervals |
| 159 | + |
| 160 | + return nil, []*junitapi.JUnitTestCase{noFailures}, nil |
| 161 | +} |
| 162 | + |
| 163 | +func (w *monitor) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) { |
| 164 | + return nil, w.notSupportedReason |
| 165 | +} |
| 166 | + |
| 167 | +func (w *monitor) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) { |
| 168 | + if w.notSupportedReason != nil { |
| 169 | + return nil, w.notSupportedReason |
| 170 | + } |
| 171 | + return nil, nil |
| 172 | +} |
| 173 | + |
| 174 | +func (w *monitor) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error { |
| 175 | + folderPath := path.Join(storageDir, "adm-upgrade-status") |
| 176 | + if err := os.MkdirAll(folderPath, os.ModePerm); err != nil { |
| 177 | + return fmt.Errorf("unable to create directory %s: %w", folderPath, err) |
| 178 | + } |
| 179 | + |
| 180 | + var errs []error |
| 181 | + for when, observed := range w.ocAdmUpgradeStatus { |
| 182 | + outputFilename := fmt.Sprintf("adm-upgrade-status-%s_%s.txt", when, timeSuffix) |
| 183 | + outputFile := filepath.Join(folderPath, outputFilename) |
| 184 | + if err := os.WriteFile(outputFile, []byte(observed.out), 0644); err != nil { |
| 185 | + errs = append(errs, fmt.Errorf("failed to write %s: %w", outputFile, err)) |
| 186 | + } |
| 187 | + } |
| 188 | + return errors.NewAggregate(errs) |
| 189 | +} |
| 190 | + |
| 191 | +func (*monitor) Cleanup(ctx context.Context) error { |
| 192 | + return nil |
| 193 | +} |
0 commit comments