Skip to content

Commit 7e8f07d

Browse files
Merge pull request #30031 from hongkailiu/pr29977
OTA-1580: Monitortest framework test for oc adm upgrade status
2 parents 391f2e7 + de89f9e commit 7e8f07d

File tree

4 files changed

+206
-0
lines changed

4 files changed

+206
-0
lines changed

pkg/cmd/openshift-tests/monitor/run/run_monitor_command.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"github.com/openshift/origin/pkg/clioptions/imagesetup"
1515
"github.com/openshift/origin/pkg/monitortestframework"
16+
exutil "github.com/openshift/origin/test/extended/util"
1617

1718
"github.com/openshift/origin/pkg/monitor/monitorapi"
1819
"github.com/openshift/origin/test/extended/util/image"
@@ -92,6 +93,9 @@ func (f *RunMonitorFlags) BindFlags(flags *pflag.FlagSet) {
9293
}
9394

9495
func (f *RunMonitorFlags) ToOptions() (*RunMonitorOptions, error) {
96+
// This is to set testsStarted = true to avoid panic
97+
exutil.WithCleanup(func() {})
98+
9599
var displayFilterFn monitorapi.EventIntervalMatchesFunc
96100
if f.DisplayFromNow {
97101
now := time.Now()

pkg/defaultmonitortests/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"github.com/openshift/origin/pkg/monitortestframework"
77
"github.com/openshift/origin/pkg/monitortests/authentication/legacyauthenticationmonitortests"
88
"github.com/openshift/origin/pkg/monitortests/authentication/requiredsccmonitortests"
9+
admupgradestatus "github.com/openshift/origin/pkg/monitortests/cli/adm_upgrade/status"
910
azuremetrics "github.com/openshift/origin/pkg/monitortests/cloud/azure/metrics"
1011
"github.com/openshift/origin/pkg/monitortests/clusterversionoperator/legacycvomonitortests"
1112
"github.com/openshift/origin/pkg/monitortests/clusterversionoperator/operatorstateanalyzer"
@@ -207,5 +208,7 @@ func newUniversalMonitorTests(info monitortestframework.MonitorTestInitializatio
207208
monitorTestRegistry.AddMonitorTestOrDie("watch-namespaces", "Test Framework", watchnamespaces.NewNamespaceWatcher())
208209
monitorTestRegistry.AddMonitorTestOrDie("high-cpu-test-analyzer", "Test Framework", highcputestanalyzer.NewHighCPUTestAnalyzer())
209210

211+
monitorTestRegistry.AddMonitorTestOrDie("oc-adm-upgrade-status", "oc / update", admupgradestatus.NewOcAdmUpgradeStatusChecker())
212+
210213
return monitorTestRegistry
211214
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md
2+
3+
approvers:
4+
- cluster-version-operator-test-case-approvers
5+
reviewers:
6+
- cluster-version-operator-test-case-reviewers
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
package admupgradestatus
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"path"
8+
"path/filepath"
9+
"strings"
10+
"time"
11+
12+
clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned"
13+
"github.com/openshift/origin/pkg/monitortestframework"
14+
exutil "github.com/openshift/origin/test/extended/util"
15+
"k8s.io/apimachinery/pkg/util/errors"
16+
"k8s.io/apimachinery/pkg/util/wait"
17+
"k8s.io/client-go/kubernetes"
18+
"k8s.io/client-go/rest"
19+
20+
"github.com/openshift/origin/pkg/monitor/monitorapi"
21+
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
22+
)
23+
24+
type snapshot struct {
25+
when time.Time
26+
out string
27+
err error
28+
}
29+
type monitor struct {
30+
collectionDone chan struct{}
31+
ocAdmUpgradeStatus map[time.Time]*snapshot
32+
notSupportedReason error
33+
isSNO bool
34+
}
35+
36+
func NewOcAdmUpgradeStatusChecker() monitortestframework.MonitorTest {
37+
return &monitor{
38+
collectionDone: make(chan struct{}),
39+
ocAdmUpgradeStatus: map[time.Time]*snapshot{},
40+
}
41+
}
42+
43+
func (w *monitor) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
44+
kubeClient, err := kubernetes.NewForConfig(adminRESTConfig)
45+
if err != nil {
46+
return err
47+
}
48+
isMicroShift, err := exutil.IsMicroShiftCluster(kubeClient)
49+
if err != nil {
50+
return fmt.Errorf("unable to determine if cluster is MicroShift: %v", err)
51+
}
52+
if isMicroShift {
53+
w.notSupportedReason = &monitortestframework.NotSupportedError{Reason: "platform MicroShift not supported"}
54+
return w.notSupportedReason
55+
}
56+
clientconfigv1client, err := clientconfigv1.NewForConfig(adminRESTConfig)
57+
if err != nil {
58+
return err
59+
}
60+
61+
if ok, err := exutil.IsHypershift(ctx, clientconfigv1client); err != nil {
62+
return fmt.Errorf("unable to determine if cluster is Hypershift: %v", err)
63+
} else if ok {
64+
w.notSupportedReason = &monitortestframework.NotSupportedError{Reason: "platform Hypershift not supported"}
65+
return w.notSupportedReason
66+
}
67+
68+
if ok, err := exutil.IsSingleNode(ctx, clientconfigv1client); err != nil {
69+
return fmt.Errorf("unable to determine if cluster is single node: %v", err)
70+
} else {
71+
w.isSNO = ok
72+
}
73+
return nil
74+
}
75+
76+
func snapshotOcAdmUpgradeStatus(ch chan *snapshot) {
77+
// TODO: I _think_ this should somehow use the adminRESTConfig given to StartCollection but I don't know how to
78+
// how to do pass that to exutil.NewCLI* or if it is even possible. It seems to work this way though.
79+
oc := exutil.NewCLIWithoutNamespace("adm-upgrade-status").AsAdmin()
80+
now := time.Now()
81+
82+
var out string
83+
var err error
84+
// retry on brief apiserver unavailability
85+
if errWait := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 2*time.Minute, true, func(context.Context) (bool, error) {
86+
cmd := oc.Run("adm", "upgrade", "status").EnvVar("OC_ENABLE_CMD_UPGRADE_STATUS", "true")
87+
out, err = cmd.Output()
88+
if err != nil {
89+
return false, nil
90+
}
91+
return true, nil
92+
}); errWait != nil {
93+
out = ""
94+
err = errWait
95+
}
96+
ch <- &snapshot{when: now, out: out, err: err}
97+
}
98+
99+
func (w *monitor) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
100+
if w.notSupportedReason != nil {
101+
return w.notSupportedReason
102+
}
103+
// TODO: The double goroutine spawn should probably be placed under some abstraction
104+
go func(ctx context.Context) {
105+
snapshots := make(chan *snapshot)
106+
go func() {
107+
for snap := range snapshots {
108+
// TODO: Maybe also collect some cluster resources (CV? COs?) through recorder?
109+
w.ocAdmUpgradeStatus[snap.when] = snap
110+
}
111+
w.collectionDone <- struct{}{}
112+
}()
113+
// TODO: Configurable interval?
114+
// TODO: Collect multiple invocations (--details)? Would need more another producer/consumer pair and likely
115+
// collectionDone would need to be a WaitGroup
116+
117+
wait.UntilWithContext(ctx, func(ctx context.Context) { snapshotOcAdmUpgradeStatus(snapshots) }, time.Minute)
118+
// The UntilWithContext blocks until the framework cancels the context when it wants tests to stop -> when we
119+
// get here, we know last snapshotOcAdmUpgradeStatus producer wrote to the snapshots channel, we can close it
120+
// which in turn will allow the consumer to finish and signal collectionDone.
121+
close(snapshots)
122+
}(ctx)
123+
124+
return nil
125+
}
126+
127+
func (w *monitor) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
128+
if w.notSupportedReason != nil {
129+
return nil, nil, w.notSupportedReason
130+
}
131+
132+
// The framework cancels the context it gave StartCollection before it calls CollectData, but we need to wait for
133+
// the collection goroutines spawned in StartedCollection to finish
134+
<-w.collectionDone
135+
136+
noFailures := &junitapi.JUnitTestCase{
137+
Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc amd upgrade status never fails",
138+
}
139+
140+
var failures []string
141+
var total int
142+
for when, observed := range w.ocAdmUpgradeStatus {
143+
total++
144+
if observed.err != nil {
145+
failures = append(failures, fmt.Sprintf("- %s: %v", when.Format(time.RFC3339), observed.err))
146+
}
147+
}
148+
149+
// Zero failures is too strict for at least SNO clusters
150+
p := (len(failures) / total) * 100
151+
if (!w.isSNO && p > 0) || (w.isSNO && p > 10) {
152+
noFailures.FailureOutput = &junitapi.FailureOutput{
153+
Message: fmt.Sprintf("oc adm upgrade status failed %d times (of %d)", len(failures), len(w.ocAdmUpgradeStatus)),
154+
Output: strings.Join(failures, "\n"),
155+
}
156+
}
157+
158+
// TODO: Maybe utilize Intervals somehow and do tests in ComputeComputedIntervals and EvaluateTestsFromConstructedIntervals
159+
160+
return nil, []*junitapi.JUnitTestCase{noFailures}, nil
161+
}
162+
163+
func (w *monitor) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
164+
return nil, w.notSupportedReason
165+
}
166+
167+
func (w *monitor) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
168+
if w.notSupportedReason != nil {
169+
return nil, w.notSupportedReason
170+
}
171+
return nil, nil
172+
}
173+
174+
func (w *monitor) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
175+
folderPath := path.Join(storageDir, "adm-upgrade-status")
176+
if err := os.MkdirAll(folderPath, os.ModePerm); err != nil {
177+
return fmt.Errorf("unable to create directory %s: %w", folderPath, err)
178+
}
179+
180+
var errs []error
181+
for when, observed := range w.ocAdmUpgradeStatus {
182+
outputFilename := fmt.Sprintf("adm-upgrade-status-%s_%s.txt", when, timeSuffix)
183+
outputFile := filepath.Join(folderPath, outputFilename)
184+
if err := os.WriteFile(outputFile, []byte(observed.out), 0644); err != nil {
185+
errs = append(errs, fmt.Errorf("failed to write %s: %w", outputFile, err))
186+
}
187+
}
188+
return errors.NewAggregate(errs)
189+
}
190+
191+
func (*monitor) Cleanup(ctx context.Context) error {
192+
return nil
193+
}

0 commit comments

Comments
 (0)