Skip to content

Commit 038b90d

Browse files
committed
return error instead of fatalf
1 parent f214d8e commit 038b90d

File tree

1 file changed

+83
-53
lines changed

1 file changed

+83
-53
lines changed

test/integration/scheduler_perf/scheduler_perf.go

Lines changed: 83 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,7 +1194,10 @@ func RunBenchmarkPerfScheduling(b *testing.B, configFile string, topicName strin
11941194
b.Fatalf("workload %s is not valid: %v", w.Name, err)
11951195
}
11961196

1197-
results := runWorkload(tCtx, tc, w, informerFactory)
1197+
results, err := runWorkload(tCtx, tc, w, informerFactory)
1198+
if err != nil {
1199+
tCtx.Fatalf("%w: %s", w.Name, err)
1200+
}
11981201
dataItems.DataItems = append(dataItems.DataItems, results...)
11991202

12001203
if len(results) > 0 {
@@ -1410,7 +1413,7 @@ func checkEmptyInFlightEvents() error {
14101413
return nil
14111414
}
14121415

1413-
func startCollectingMetrics(tCtx ktesting.TContext, collectorWG *sync.WaitGroup, podInformer coreinformers.PodInformer, mcc *metricsCollectorConfig, throughputErrorMargin float64, opIndex int, name string, namespaces []string, labelSelector map[string]string) (ktesting.TContext, []testDataCollector) {
1416+
func startCollectingMetrics(tCtx ktesting.TContext, collectorWG *sync.WaitGroup, podInformer coreinformers.PodInformer, mcc *metricsCollectorConfig, throughputErrorMargin float64, opIndex int, name string, namespaces []string, labelSelector map[string]string) (ktesting.TContext, []testDataCollector, error) {
14141417
collectorCtx := ktesting.WithCancel(tCtx)
14151418
workloadName := tCtx.Name()
14161419
// The first part is the same for each workload, therefore we can strip it.
@@ -1421,20 +1424,20 @@ func startCollectingMetrics(tCtx ktesting.TContext, collectorWG *sync.WaitGroup,
14211424
collector := collector
14221425
err := collector.init()
14231426
if err != nil {
1424-
tCtx.Fatalf("op %d: Failed to initialize data collector: %v", opIndex, err)
1427+
return nil, nil, fmt.Errorf("op %d: Failed to initialize data collector: %v", opIndex, err)
14251428
}
14261429
collectorWG.Add(1)
14271430
go func() {
14281431
defer collectorWG.Done()
14291432
collector.run(collectorCtx)
14301433
}()
14311434
}
1432-
return collectorCtx, collectors
1435+
return collectorCtx, collectors, nil
14331436
}
14341437

1435-
func stopCollectingMetrics(tCtx ktesting.TContext, collectorCtx ktesting.TContext, collectorWG *sync.WaitGroup, threshold float64, tms thresholdMetricSelector, opIndex int, collectors []testDataCollector) []DataItem {
1438+
func stopCollectingMetrics(tCtx ktesting.TContext, collectorCtx ktesting.TContext, collectorWG *sync.WaitGroup, threshold float64, tms thresholdMetricSelector, opIndex int, collectors []testDataCollector) ([]DataItem, error) {
14361439
if collectorCtx == nil {
1437-
tCtx.Fatalf("op %d: Missing startCollectingMetrics operation before stopping", opIndex)
1440+
return nil, fmt.Errorf("op %d: Missing startCollectingMetrics operation before stopping", opIndex)
14381441
}
14391442
collectorCtx.Cancel("collecting metrics, collector must stop first")
14401443
collectorWG.Wait()
@@ -1447,7 +1450,7 @@ func stopCollectingMetrics(tCtx ktesting.TContext, collectorCtx ktesting.TContex
14471450
tCtx.Errorf("op %d: %s", opIndex, err)
14481451
}
14491452
}
1450-
return dataItems
1453+
return dataItems, nil
14511454
}
14521455

14531456
type WorkloadExecutor struct {
@@ -1465,7 +1468,7 @@ type WorkloadExecutor struct {
14651468
nextNodeIndex int
14661469
}
14671470

1468-
func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFactory informers.SharedInformerFactory) []DataItem {
1471+
func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFactory informers.SharedInformerFactory) ([]DataItem, error) {
14691472
b, benchmarking := tCtx.TB().(*testing.B)
14701473
if benchmarking {
14711474
start := time.Now()
@@ -1513,70 +1516,74 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
15131516
for opIndex, op := range unrollWorkloadTemplate(tCtx, tc.WorkloadTemplate, w) {
15141517
realOp, err := op.realOp.patchParams(w)
15151518
if err != nil {
1516-
tCtx.Fatalf("op %d: %v", opIndex, err)
1519+
return nil, fmt.Errorf("op %d: %v", opIndex, err)
15171520
}
15181521
select {
15191522
case <-tCtx.Done():
1520-
tCtx.Fatalf("op %d: %v", opIndex, context.Cause(tCtx))
1523+
return nil, fmt.Errorf("op %d: %v", opIndex, context.Cause(tCtx))
15211524
default:
15221525
}
15231526
switch concreteOp := realOp.(type) {
15241527
case *createNodesOp:
1525-
executor.runCreateNodesOp(opIndex, concreteOp)
1528+
err = executor.runCreateNodesOp(opIndex, concreteOp)
15261529
case *createNamespacesOp:
1527-
executor.runCreateNamespaceOp(opIndex, concreteOp)
1530+
err = executor.runCreateNamespaceOp(opIndex, concreteOp)
15281531
case *createPodsOp:
1529-
executor.runCreatePodsOp(opIndex, concreteOp)
1532+
err = executor.runCreatePodsOp(opIndex, concreteOp)
15301533
case *deletePodsOp:
1531-
executor.runDeletePodsOp(opIndex, concreteOp)
1534+
err = executor.runDeletePodsOp(opIndex, concreteOp)
15321535
case *churnOp:
1533-
executor.runChurnOp(opIndex, concreteOp)
1536+
err = executor.runChurnOp(opIndex, concreteOp)
15341537
case *barrierOp:
1535-
executor.runBarrierOp(opIndex, concreteOp)
1538+
err = executor.runBarrierOp(opIndex, concreteOp)
15361539
case *sleepOp:
15371540
executor.runSleepOp(concreteOp)
15381541
case *startCollectingMetricsOp:
1539-
executor.runStartCollectingMetricsOp(opIndex, concreteOp)
1542+
err = executor.runStartCollectingMetricsOp(opIndex, concreteOp)
15401543
case *stopCollectingMetricsOp:
1541-
executor.runStopCollectingMetrics(opIndex)
1544+
err = executor.runStopCollectingMetrics(opIndex)
15421545
default:
1543-
executor.runDefaultOp(opIndex, concreteOp)
1546+
err = executor.runDefaultOp(opIndex, concreteOp)
1547+
}
1548+
if err != nil {
1549+
return nil, err
15441550
}
15451551
}
15461552

15471553
// check unused params and inform users
15481554
unusedParams := w.unusedParams()
15491555
if len(unusedParams) != 0 {
1550-
tCtx.Fatalf("the parameters %v are defined on workload %s, but unused.\nPlease make sure there are no typos.", unusedParams, w.Name)
1556+
return nil, fmt.Errorf("the parameters %v are defined on workload %s, but unused.\nPlease make sure there are no typos.", unusedParams, w.Name)
15511557
}
15521558

15531559
// Some tests have unschedulable pods. Do not add an implicit barrier at the
15541560
// end as we do not want to wait for them.
1555-
return executor.dataItems
1561+
return executor.dataItems, nil
15561562
}
15571563

1558-
func (e *WorkloadExecutor) runCreateNodesOp(opIndex int, op *createNodesOp) {
1564+
func (e *WorkloadExecutor) runCreateNodesOp(opIndex int, op *createNodesOp) error {
15591565
nodePreparer, err := getNodePreparer(fmt.Sprintf("node-%d-", opIndex), op, e.tCtx.Client())
15601566
if err != nil {
1561-
e.tCtx.Fatalf("op %d: %v", opIndex, err)
1567+
return fmt.Errorf("op %d: %v", opIndex, err)
15621568
}
15631569
if err := nodePreparer.PrepareNodes(e.tCtx, e.nextNodeIndex); err != nil {
1564-
e.tCtx.Fatalf("op %d: %v", opIndex, err)
1570+
return fmt.Errorf("op %d: %v", opIndex, err)
15651571
}
15661572
e.nextNodeIndex += op.Count
1573+
return nil
15671574
}
15681575

1569-
func (e *WorkloadExecutor) runCreateNamespaceOp(opIndex int, op *createNamespacesOp) {
1576+
func (e *WorkloadExecutor) runCreateNamespaceOp(opIndex int, op *createNamespacesOp) error {
15701577
nsPreparer, err := newNamespacePreparer(e.tCtx, op)
15711578
if err != nil {
1572-
e.tCtx.Fatalf("op %d: %v", opIndex, err)
1579+
return fmt.Errorf("op %d: %v", opIndex, err)
15731580
}
15741581
if err := nsPreparer.prepare(e.tCtx); err != nil {
15751582
err2 := nsPreparer.cleanup(e.tCtx)
15761583
if err2 != nil {
15771584
err = fmt.Errorf("prepare: %w; cleanup: %w", err, err2)
15781585
}
1579-
e.tCtx.Fatalf("op %d: %v", opIndex, err)
1586+
return fmt.Errorf("op %d: %v", opIndex, err)
15801587
}
15811588
for _, n := range nsPreparer.namespaces() {
15821589
if _, ok := e.numPodsScheduledPerNamespace[n]; ok {
@@ -1585,25 +1592,26 @@ func (e *WorkloadExecutor) runCreateNamespaceOp(opIndex int, op *createNamespace
15851592
}
15861593
e.numPodsScheduledPerNamespace[n] = 0
15871594
}
1595+
return nil
15881596
}
15891597

1590-
func (e *WorkloadExecutor) runBarrierOp(opIndex int, op *barrierOp) {
1598+
func (e *WorkloadExecutor) runBarrierOp(opIndex int, op *barrierOp) error {
15911599
for _, namespace := range op.Namespaces {
15921600
if _, ok := e.numPodsScheduledPerNamespace[namespace]; !ok {
1593-
e.tCtx.Fatalf("op %d: unknown namespace %s", opIndex, namespace)
1601+
return fmt.Errorf("op %d: unknown namespace %s", opIndex, namespace)
15941602
}
15951603
}
15961604
switch op.StageRequirement {
15971605
case Attempted:
15981606
if err := waitUntilPodsAttempted(e.tCtx, e.podInformer, op.LabelSelector, op.Namespaces, e.numPodsScheduledPerNamespace); err != nil {
1599-
e.tCtx.Fatalf("op %d: %v", opIndex, err)
1607+
return fmt.Errorf("op %d: %v", opIndex, err)
16001608
}
16011609
case Scheduled:
16021610
// Default should be treated like "Scheduled", so handling both in the same way.
16031611
fallthrough
16041612
default:
16051613
if err := waitUntilPodsScheduled(e.tCtx, e.podInformer, op.LabelSelector, op.Namespaces, e.numPodsScheduledPerNamespace); err != nil {
1606-
e.tCtx.Fatalf("op %d: %v", opIndex, err)
1614+
return fmt.Errorf("op %d: %v", opIndex, err)
16071615
}
16081616
// At the end of the barrier, we can be sure that there are no pods
16091617
// pending scheduling in the namespaces that we just blocked on.
@@ -1615,6 +1623,7 @@ func (e *WorkloadExecutor) runBarrierOp(opIndex int, op *barrierOp) {
16151623
}
16161624
}
16171625
}
1626+
return nil
16181627
}
16191628

16201629
func (e *WorkloadExecutor) runSleepOp(op *sleepOp) {
@@ -1624,13 +1633,17 @@ func (e *WorkloadExecutor) runSleepOp(op *sleepOp) {
16241633
}
16251634
}
16261635

1627-
func (e *WorkloadExecutor) runStopCollectingMetrics(opIndex int) {
1628-
items := stopCollectingMetrics(e.tCtx, e.collectorCtx, &e.collectorWG, e.workload.Threshold, *e.workload.ThresholdMetricSelector, opIndex, e.collectors)
1636+
func (e *WorkloadExecutor) runStopCollectingMetrics(opIndex int) error {
1637+
items, err := stopCollectingMetrics(e.tCtx, e.collectorCtx, &e.collectorWG, e.workload.Threshold, *e.workload.ThresholdMetricSelector, opIndex, e.collectors)
1638+
if err != nil {
1639+
return err
1640+
}
16291641
e.dataItems = append(e.dataItems, items...)
16301642
e.collectorCtx = nil
1643+
return nil
16311644
}
16321645

1633-
func (e *WorkloadExecutor) runCreatePodsOp(opIndex int, op *createPodsOp) {
1646+
func (e *WorkloadExecutor) runCreatePodsOp(opIndex int, op *createPodsOp) error {
16341647
// define Pod's namespace automatically, and create that namespace.
16351648
namespace := fmt.Sprintf("namespace-%d", opIndex)
16361649
if op.Namespace != nil {
@@ -1643,17 +1656,21 @@ func (e *WorkloadExecutor) runCreatePodsOp(opIndex int, op *createPodsOp) {
16431656

16441657
if op.CollectMetrics {
16451658
if e.collectorCtx != nil {
1646-
e.tCtx.Fatalf("op %d: Metrics collection is overlapping. Probably second collector was started before stopping a previous one", opIndex)
1659+
return fmt.Errorf("op %d: Metrics collection is overlapping. Probably second collector was started before stopping a previous one", opIndex)
1660+
}
1661+
var err error
1662+
e.collectorCtx, e.collectors, err = startCollectingMetrics(e.tCtx, &e.collectorWG, e.podInformer, e.testCase.MetricsCollectorConfig, e.throughputErrorMargin, opIndex, namespace, []string{namespace}, nil)
1663+
if err != nil {
1664+
return err
16471665
}
1648-
e.collectorCtx, e.collectors = startCollectingMetrics(e.tCtx, &e.collectorWG, e.podInformer, e.testCase.MetricsCollectorConfig, e.throughputErrorMargin, opIndex, namespace, []string{namespace}, nil)
16491666
e.tCtx.TB().Cleanup(func() {
16501667
if e.collectorCtx != nil {
16511668
e.collectorCtx.Cancel("cleaning up")
16521669
}
16531670
})
16541671
}
16551672
if err := createPodsRapidly(e.tCtx, namespace, op); err != nil {
1656-
e.tCtx.Fatalf("op %d: %v", opIndex, err)
1673+
return fmt.Errorf("op %d: %v", opIndex, err)
16571674
}
16581675
switch {
16591676
case op.SkipWaitToCompletion:
@@ -1662,29 +1679,33 @@ func (e *WorkloadExecutor) runCreatePodsOp(opIndex int, op *createPodsOp) {
16621679
e.numPodsScheduledPerNamespace[namespace] += op.Count
16631680
case op.SteadyState:
16641681
if err := createPodsSteadily(e.tCtx, namespace, e.podInformer, op); err != nil {
1665-
e.tCtx.Fatalf("op %d: %v", opIndex, err)
1682+
return fmt.Errorf("op %d: %v", opIndex, err)
16661683
}
16671684
default:
16681685
if err := waitUntilPodsScheduledInNamespace(e.tCtx, e.podInformer, nil, namespace, op.Count); err != nil {
1669-
e.tCtx.Fatalf("op %d: error in waiting for pods to get scheduled: %v", opIndex, err)
1686+
return fmt.Errorf("op %d: error in waiting for pods to get scheduled: %v", opIndex, err)
16701687
}
16711688
}
16721689
if op.CollectMetrics {
16731690
// CollectMetrics and SkipWaitToCompletion can never be true at the
16741691
// same time, so if we're here, it means that all pods have been
16751692
// scheduled.
1676-
items := stopCollectingMetrics(e.tCtx, e.collectorCtx, &e.collectorWG, e.workload.Threshold, *e.workload.ThresholdMetricSelector, opIndex, e.collectors)
1693+
items, err := stopCollectingMetrics(e.tCtx, e.collectorCtx, &e.collectorWG, e.workload.Threshold, *e.workload.ThresholdMetricSelector, opIndex, e.collectors)
1694+
if err != nil {
1695+
return err
1696+
}
16771697
e.dataItems = append(e.dataItems, items...)
16781698
e.collectorCtx = nil
16791699
}
1700+
return nil
16801701
}
16811702

1682-
func (e *WorkloadExecutor) runDeletePodsOp(opIndex int, op *deletePodsOp) {
1703+
func (e *WorkloadExecutor) runDeletePodsOp(opIndex int, op *deletePodsOp) error {
16831704
labelSelector := labels.ValidatedSetSelector(op.LabelSelector)
16841705

16851706
podsToDelete, err := e.podInformer.Lister().Pods(op.Namespace).List(labelSelector)
16861707
if err != nil {
1687-
e.tCtx.Fatalf("op %d: error in listing pods in the namespace %s: %v", opIndex, op.Namespace, err)
1708+
return fmt.Errorf("op %d: error in listing pods in the namespace %s: %v", opIndex, op.Namespace, err)
16881709
}
16891710

16901711
deletePods := func(opIndex int) {
@@ -1727,9 +1748,10 @@ func (e *WorkloadExecutor) runDeletePodsOp(opIndex int, op *deletePodsOp) {
17271748
} else {
17281749
deletePods(opIndex)
17291750
}
1751+
return nil
17301752
}
17311753

1732-
func (e *WorkloadExecutor) runChurnOp(opIndex int, op *churnOp) {
1754+
func (e *WorkloadExecutor) runChurnOp(opIndex int, op *churnOp) error {
17331755
var namespace string
17341756
if op.Namespace != nil {
17351757
namespace = *op.Namespace
@@ -1740,20 +1762,20 @@ func (e *WorkloadExecutor) runChurnOp(opIndex int, op *churnOp) {
17401762
// Ensure the namespace exists.
17411763
nsObj := &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}
17421764
if _, err := e.tCtx.Client().CoreV1().Namespaces().Create(e.tCtx, nsObj, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) {
1743-
e.tCtx.Fatalf("op %d: unable to create namespace %v: %v", opIndex, namespace, err)
1765+
return fmt.Errorf("op %d: unable to create namespace %v: %v", opIndex, namespace, err)
17441766
}
17451767

17461768
var churnFns []func(name string) string
17471769

17481770
for i, path := range op.TemplatePaths {
17491771
unstructuredObj, gvk, err := getUnstructuredFromFile(path)
17501772
if err != nil {
1751-
e.tCtx.Fatalf("op %d: unable to parse the %v-th template path: %v", opIndex, i, err)
1773+
return fmt.Errorf("op %d: unable to parse the %v-th template path: %v", opIndex, i, err)
17521774
}
17531775
// Obtain GVR.
17541776
mapping, err := restMapper.RESTMapping(gvk.GroupKind(), gvk.Version)
17551777
if err != nil {
1756-
e.tCtx.Fatalf("op %d: unable to find GVR for %v: %v", opIndex, gvk, err)
1778+
return fmt.Errorf("op %d: unable to find GVR for %v: %v", opIndex, gvk, err)
17571779
}
17581780
gvr := mapping.Resource
17591781
// Distinguish cluster-scoped with namespaced API objects.
@@ -1833,41 +1855,49 @@ func (e *WorkloadExecutor) runChurnOp(opIndex int, op *churnOp) {
18331855
}
18341856
}()
18351857
}
1858+
return nil
18361859
}
18371860

1838-
func (e *WorkloadExecutor) runDefaultOp(opIndex int, op realOp) {
1861+
func (e *WorkloadExecutor) runDefaultOp(opIndex int, op realOp) error {
18391862
runable, ok := op.(runnableOp)
18401863
if !ok {
1841-
e.tCtx.Fatalf("op %d: invalid op %v", opIndex, op)
1864+
return fmt.Errorf("op %d: invalid op %v", opIndex, op)
18421865
}
18431866
for _, namespace := range runable.requiredNamespaces() {
18441867
createNamespaceIfNotPresent(e.tCtx, namespace, &e.numPodsScheduledPerNamespace)
18451868
}
18461869
runable.run(e.tCtx)
1870+
return nil
18471871
}
18481872

1849-
func (e *WorkloadExecutor) runStartCollectingMetricsOp(opIndex int, op *startCollectingMetricsOp) {
1873+
func (e *WorkloadExecutor) runStartCollectingMetricsOp(opIndex int, op *startCollectingMetricsOp) error {
18501874
if e.collectorCtx != nil {
1851-
e.tCtx.Fatalf("op %d: Metrics collection is overlapping. Probably second collector was started before stopping a previous one", opIndex)
1875+
return fmt.Errorf("op %d: Metrics collection is overlapping. Probably second collector was started before stopping a previous one", opIndex)
1876+
}
1877+
var err error
1878+
e.collectorCtx, e.collectors, err = startCollectingMetrics(e.tCtx, &e.collectorWG, e.podInformer, e.testCase.MetricsCollectorConfig, e.throughputErrorMargin, opIndex, op.Name, op.Namespaces, op.LabelSelector)
1879+
if err != nil {
1880+
return err
18521881
}
1853-
e.collectorCtx, e.collectors = startCollectingMetrics(e.tCtx, &e.collectorWG, e.podInformer, e.testCase.MetricsCollectorConfig, e.throughputErrorMargin, opIndex, op.Name, op.Namespaces, op.LabelSelector)
18541882
e.tCtx.TB().Cleanup(func() {
18551883
if e.collectorCtx != nil {
18561884
e.collectorCtx.Cancel("cleaning up")
18571885
}
18581886
})
1887+
return nil
18591888
}
18601889

1861-
func createNamespaceIfNotPresent(tCtx ktesting.TContext, namespace string, podsPerNamespace *map[string]int) {
1890+
func createNamespaceIfNotPresent(tCtx ktesting.TContext, namespace string, podsPerNamespace *map[string]int) error {
18621891
if _, ok := (*podsPerNamespace)[namespace]; !ok {
18631892
// The namespace has not created yet.
18641893
// So, create that and register it.
18651894
_, err := tCtx.Client().CoreV1().Namespaces().Create(tCtx, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}, metav1.CreateOptions{})
18661895
if err != nil {
1867-
tCtx.Fatalf("failed to create namespace for Pod: %v", namespace)
1896+
return fmt.Errorf("failed to create namespace for Pod: %v", namespace)
18681897
}
18691898
(*podsPerNamespace)[namespace] = 0
18701899
}
1900+
return nil
18711901
}
18721902

18731903
type testDataCollector interface {

0 commit comments

Comments
 (0)