Skip to content

Commit 08dd995

Browse files
authored
Merge pull request kubernetes#126886 from pohly/scheduler-perf-output
scheduler_perf: output
2 parents 95af4b0 + bf1188d commit 08dd995

File tree

7 files changed

+116
-22
lines changed

7 files changed

+116
-22
lines changed

cmd/kube-apiserver/app/testing/testserver.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,11 @@ func NewDefaultTestServerOptions() *TestServerInstanceOptions {
150150
// files that because Golang testing's call to os.Exit will not give a stop channel go routine
151151
// enough time to remove temporary files.
152152
func StartTestServer(t ktesting.TB, instanceOptions *TestServerInstanceOptions, customFlags []string, storageConfig *storagebackend.Config) (result TestServer, err error) {
153-
tCtx := ktesting.Init(t)
153+
// Some callers may have initialize ktesting already.
154+
tCtx, ok := t.(ktesting.TContext)
155+
if !ok {
156+
tCtx = ktesting.Init(t)
157+
}
154158

155159
if instanceOptions == nil {
156160
instanceOptions = NewDefaultTestServerOptions()

test/integration/framework/etcd.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"os/exec"
2727
"strconv"
2828
"strings"
29+
"sync"
2930
"syscall"
3031
"testing"
3132
"time"
@@ -146,7 +147,10 @@ func RunCustomEtcd(dataDir string, customFlags []string, output io.Writer) (url
146147
// try to exit etcd gracefully
147148
defer cancel()
148149
cmd.Process.Signal(syscall.SIGTERM)
150+
var wg sync.WaitGroup
151+
wg.Add(1)
149152
go func() {
153+
defer wg.Done()
150154
select {
151155
case <-ctx.Done():
152156
klog.Infof("etcd exited gracefully, context cancelled")
@@ -156,6 +160,7 @@ func RunCustomEtcd(dataDir string, customFlags []string, output io.Writer) (url
156160
}
157161
}()
158162
err := cmd.Wait()
163+
wg.Wait()
159164
klog.Infof("etcd exit status: %v", err)
160165
err = os.RemoveAll(etcdDataDir)
161166
if err != nil {

test/integration/framework/perf_utils.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package framework
1818

1919
import (
2020
"context"
21+
"fmt"
2122

2223
v1 "k8s.io/api/core/v1"
2324
"k8s.io/apimachinery/pkg/api/resource"
@@ -109,20 +110,19 @@ func (p *IntegrationTestNodePreparer) PrepareNodes(ctx context.Context, nextNode
109110
}
110111
}
111112
if err != nil {
112-
klog.Fatalf("Error creating node: %v", err)
113+
return fmt.Errorf("creating node: %w", err)
113114
}
114115
}
115116

116117
nodes, err := waitListAllNodes(p.client)
117118
if err != nil {
118-
klog.Fatalf("Error listing nodes: %v", err)
119+
return fmt.Errorf("listing nodes: %w", err)
119120
}
120121
index := nextNodeIndex
121122
for _, v := range p.countToStrategy {
122123
for i := 0; i < v.Count; i, index = i+1, index+1 {
123124
if err := testutils.DoPrepareNode(ctx, p.client, &nodes.Items[index], v.Strategy); err != nil {
124-
klog.Errorf("Aborting node preparation: %v", err)
125-
return err
125+
return fmt.Errorf("aborting node preparation: %w", err)
126126
}
127127
}
128128
}

test/integration/scheduler_perf/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,15 @@ performance.
102102
During interactive debugging sessions it is possible to enable per-test output
103103
via -use-testing-log.
104104

105+
Log output can be quite large, in particular when running the large benchmarks
106+
and when not using -use-testing-log. For benchmarks, we want to produce that
107+
log output in a realistic way (= write to disk using the normal logging
108+
backends) and only capture the output of a specific test as part of the job
109+
results when that test failed. Therefore each test redirects its own output if
110+
the ARTIFACTS env variable is set to a `$ARTIFACTS/<test name>.log` file and
111+
removes that file only if the test passed.
112+
113+
105114
### Integration tests
106115

107116
To run integration tests, use:

test/integration/scheduler_perf/main_test.go

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,15 @@ import (
2323
"strings"
2424
"testing"
2525

26-
"k8s.io/apimachinery/pkg/util/runtime"
27-
"k8s.io/component-base/featuregate"
2826
"k8s.io/component-base/logs"
2927
logsapi "k8s.io/component-base/logs/api/v1"
3028
_ "k8s.io/component-base/logs/json/register"
3129
"k8s.io/kubernetes/test/utils/ktesting"
3230
)
3331

34-
// Run with -v=2, this is the default log level in production.
35-
//
36-
// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf
37-
// with more log output.
38-
const defaultVerbosity = 2
39-
4032
func TestMain(m *testing.M) {
4133
// Run with -v=2, this is the default log level in production.
42-
ktesting.SetDefaultVerbosity(defaultVerbosity)
34+
ktesting.SetDefaultVerbosity(DefaultLoggingVerbosity)
4335

4436
// test/integration/framework/flags.go unconditionally initializes the
4537
// logging flags. That's correct for most tests, but in the
@@ -56,21 +48,17 @@ func TestMain(m *testing.M) {
5648
})
5749
flag.CommandLine = &fs
5850

59-
featureGate := featuregate.NewFeatureGate()
60-
runtime.Must(logsapi.AddFeatureGates(featureGate))
61-
flag.Var(featureGate, "feature-gate",
51+
flag.Var(LoggingFeatureGate, "feature-gate",
6252
"A set of key=value pairs that describe feature gates for alpha/experimental features. "+
63-
"Options are:\n"+strings.Join(featureGate.KnownFeatures(), "\n"))
64-
c := logsapi.NewLoggingConfiguration()
65-
c.Verbosity = defaultVerbosity
53+
"Options are:\n"+strings.Join(LoggingFeatureGate.KnownFeatures(), "\n"))
6654

6755
// This would fail if we hadn't removed the logging flags above.
68-
logsapi.AddGoFlags(c, flag.CommandLine)
56+
logsapi.AddGoFlags(LoggingConfig, flag.CommandLine)
6957

7058
flag.Parse()
7159

7260
logs.InitLogs()
73-
if err := logsapi.ValidateAndApply(c, featureGate); err != nil {
61+
if err := logsapi.ValidateAndApply(LoggingConfig, LoggingFeatureGate); err != nil {
7462
fmt.Fprintf(os.Stderr, "%v\n", err)
7563
os.Exit(1)
7664
}

test/integration/scheduler_perf/scheduler_perf.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"math"
2727
"os"
2828
"path"
29+
"regexp"
2930
"strings"
3031
"sync"
3132
"testing"
@@ -38,6 +39,7 @@ import (
3839
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3940
"k8s.io/apimachinery/pkg/labels"
4041
"k8s.io/apimachinery/pkg/runtime/schema"
42+
"k8s.io/apimachinery/pkg/util/runtime"
4143
"k8s.io/apimachinery/pkg/util/wait"
4244
utilfeature "k8s.io/apiserver/pkg/util/feature"
4345
cacheddiscovery "k8s.io/client-go/discovery/cached/memory"
@@ -48,6 +50,7 @@ import (
4850
"k8s.io/client-go/restmapper"
4951
"k8s.io/component-base/featuregate"
5052
featuregatetesting "k8s.io/component-base/featuregate/testing"
53+
logsapi "k8s.io/component-base/logs/api/v1"
5154
"k8s.io/component-base/metrics/legacyregistry"
5255
"k8s.io/klog/v2"
5356
"k8s.io/kubernetes/pkg/scheduler/apis/config"
@@ -94,6 +97,29 @@ const (
9497
pluginLabelName = "plugin"
9598
)
9699

100+
// Run with -v=2, this is the default log level in production.
101+
//
102+
// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf
103+
// with more log output.
104+
const DefaultLoggingVerbosity = 2
105+
106+
var LoggingFeatureGate FeatureGateFlag
107+
var LoggingConfig *logsapi.LoggingConfiguration
108+
109+
type FeatureGateFlag interface {
110+
featuregate.FeatureGate
111+
flag.Value
112+
}
113+
114+
func init() {
115+
f := featuregate.NewFeatureGate()
116+
runtime.Must(logsapi.AddFeatureGates(f))
117+
LoggingFeatureGate = f
118+
119+
LoggingConfig = logsapi.NewLoggingConfiguration()
120+
LoggingConfig.Verbosity = DefaultLoggingVerbosity
121+
}
122+
97123
var (
98124
defaultMetricsCollectorConfig = metricsCollectorConfig{
99125
Metrics: map[string][]*labelValues{
@@ -760,8 +786,62 @@ func initTestOutput(tb testing.TB) io.Writer {
760786

761787
var perfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")
762788

789+
var specialFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9-_]`)
790+
763791
func setupTestCase(t testing.TB, tc *testCase, output io.Writer, outOfTreePluginRegistry frameworkruntime.Registry) (informers.SharedInformerFactory, ktesting.TContext) {
764792
tCtx := ktesting.Init(t, initoption.PerTestOutput(*useTestingLog))
793+
artifacts, doArtifacts := os.LookupEnv("ARTIFACTS")
794+
if !*useTestingLog && doArtifacts {
795+
// Reconfigure logging so that it goes to a separate file per
796+
// test instead of stderr. If the test passes, the file gets
797+
// deleted. The overall output can be very large (> 200 MB for
798+
// ci-benchmark-scheduler-perf-master). With this approach, we
799+
// have log output for failures without having to store large
800+
// amounts of data that no-one is looking at. The performance
801+
// is the same as writing to stderr.
802+
if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil {
803+
t.Fatalf("Failed to reset the logging configuration: %v", err)
804+
}
805+
logfileName := path.Join(artifacts, specialFilenameChars.ReplaceAllString(t.Name(), "_")+".log")
806+
out, err := os.Create(logfileName)
807+
if err != nil {
808+
t.Fatalf("Failed to create per-test log output file: %v", err)
809+
}
810+
t.Cleanup(func() {
811+
// Everything should have stopped by now, checked below
812+
// by GoleakCheck (which runs first during test
813+
// shutdown!). Therefore we can clean up. Errors get logged
814+
// and fail the test, but cleanup tries to continue.
815+
//
816+
// Note that the race detector will flag any goroutine
817+
// as causing a race if there is no explicit wait for
818+
// that goroutine to stop. We know that they must have
819+
// stopped (GoLeakCheck!) but the race detector
820+
// doesn't.
821+
//
822+
// This is a major issue because many Kubernetes goroutines get
823+
// started without waiting for them to stop :-(
824+
if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil {
825+
t.Errorf("Failed to reset the logging configuration: %v", err)
826+
}
827+
if err := out.Close(); err != nil {
828+
t.Errorf("Failed to close the per-test log output file: %s: %v", logfileName, err)
829+
}
830+
if !t.Failed() {
831+
if err := os.Remove(logfileName); err != nil {
832+
t.Errorf("Failed to remove the per-test log output file: %v", err)
833+
}
834+
}
835+
})
836+
opts := &logsapi.LoggingOptions{
837+
ErrorStream: out,
838+
InfoStream: out,
839+
}
840+
if err := logsapi.ValidateAndApplyWithOptions(LoggingConfig, opts, LoggingFeatureGate); err != nil {
841+
t.Fatalf("Failed to apply the per-test logging configuration: %v", err)
842+
}
843+
844+
}
765845

766846
// Ensure that there are no leaked
767847
// goroutines. They could influence

test/utils/ktesting/contexthelper.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ func withTimeout(ctx context.Context, tb TB, timeout time.Duration, timeoutCause
5959
// No need to set a cause here. The cause or error of
6060
// the parent context will be used.
6161
case <-after.C:
62+
// Code using this tCtx may or may not log the
63+
// information above when it runs into the
64+
// cancellation. It's better if we do it, just to be on
65+
// the safe side.
66+
//
67+
// Would be nice to log this with the source code location
68+
// of our caller, but testing.Logf does not support that.
69+
tb.Logf("\nWARNING: %s\n", timeoutCause)
6270
cancel(canceledError(timeoutCause))
6371
}
6472
}()

0 commit comments

Comments
 (0)