Skip to content

Commit bf1188d

Browse files
committed
scheduler_perf: only store log output after failures
Reconfiguring the logging infrastructure with a per-test output file mimicks the behavior of per-test output (log output captured only on failures) while still using the normal logging code, which is important for benchmarking. To enable this behavior, the ARTIFACT env variable must be set.
1 parent 6cbdef8 commit bf1188d

File tree

3 files changed

+94
-17
lines changed

3 files changed

+94
-17
lines changed

test/integration/scheduler_perf/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,15 @@ performance.
102102
During interactive debugging sessions it is possible to enable per-test output
103103
via -use-testing-log.
104104

105+
Log output can be quite large, in particular when running the large benchmarks
106+
and when not using -use-testing-log. For benchmarks, we want to produce that
107+
log output in a realistic way (= write to disk using the normal logging
108+
backends) and only capture the output of a specific test as part of the job
109+
results when that test failed. Therefore each test redirects its own output if
110+
the ARTIFACTS env variable is set to a `$ARTIFACTS/<test name>.log` file and
111+
removes that file only if the test passed.
112+
113+
105114
### Integration tests
106115

107116
To run integration tests, use:

test/integration/scheduler_perf/main_test.go

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,15 @@ import (
2323
"strings"
2424
"testing"
2525

26-
"k8s.io/apimachinery/pkg/util/runtime"
27-
"k8s.io/component-base/featuregate"
2826
"k8s.io/component-base/logs"
2927
logsapi "k8s.io/component-base/logs/api/v1"
3028
_ "k8s.io/component-base/logs/json/register"
3129
"k8s.io/kubernetes/test/utils/ktesting"
3230
)
3331

34-
// Run with -v=2, this is the default log level in production.
35-
//
36-
// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf
37-
// with more log output.
38-
const defaultVerbosity = 2
39-
4032
func TestMain(m *testing.M) {
4133
// Run with -v=2, this is the default log level in production.
42-
ktesting.SetDefaultVerbosity(defaultVerbosity)
34+
ktesting.SetDefaultVerbosity(DefaultLoggingVerbosity)
4335

4436
// test/integration/framework/flags.go unconditionally initializes the
4537
// logging flags. That's correct for most tests, but in the
@@ -56,21 +48,17 @@ func TestMain(m *testing.M) {
5648
})
5749
flag.CommandLine = &fs
5850

59-
featureGate := featuregate.NewFeatureGate()
60-
runtime.Must(logsapi.AddFeatureGates(featureGate))
61-
flag.Var(featureGate, "feature-gate",
51+
flag.Var(LoggingFeatureGate, "feature-gate",
6252
"A set of key=value pairs that describe feature gates for alpha/experimental features. "+
63-
"Options are:\n"+strings.Join(featureGate.KnownFeatures(), "\n"))
64-
c := logsapi.NewLoggingConfiguration()
65-
c.Verbosity = defaultVerbosity
53+
"Options are:\n"+strings.Join(LoggingFeatureGate.KnownFeatures(), "\n"))
6654

6755
// This would fail if we hadn't removed the logging flags above.
68-
logsapi.AddGoFlags(c, flag.CommandLine)
56+
logsapi.AddGoFlags(LoggingConfig, flag.CommandLine)
6957

7058
flag.Parse()
7159

7260
logs.InitLogs()
73-
if err := logsapi.ValidateAndApply(c, featureGate); err != nil {
61+
if err := logsapi.ValidateAndApply(LoggingConfig, LoggingFeatureGate); err != nil {
7462
fmt.Fprintf(os.Stderr, "%v\n", err)
7563
os.Exit(1)
7664
}

test/integration/scheduler_perf/scheduler_perf.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"math"
2727
"os"
2828
"path"
29+
"regexp"
2930
"strings"
3031
"sync"
3132
"testing"
@@ -38,6 +39,7 @@ import (
3839
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3940
"k8s.io/apimachinery/pkg/labels"
4041
"k8s.io/apimachinery/pkg/runtime/schema"
42+
"k8s.io/apimachinery/pkg/util/runtime"
4143
"k8s.io/apimachinery/pkg/util/wait"
4244
utilfeature "k8s.io/apiserver/pkg/util/feature"
4345
cacheddiscovery "k8s.io/client-go/discovery/cached/memory"
@@ -48,6 +50,7 @@ import (
4850
"k8s.io/client-go/restmapper"
4951
"k8s.io/component-base/featuregate"
5052
featuregatetesting "k8s.io/component-base/featuregate/testing"
53+
logsapi "k8s.io/component-base/logs/api/v1"
5154
"k8s.io/component-base/metrics/legacyregistry"
5255
"k8s.io/klog/v2"
5356
"k8s.io/kubernetes/pkg/scheduler/apis/config"
@@ -94,6 +97,29 @@ const (
9497
pluginLabelName = "plugin"
9598
)
9699

100+
// Run with -v=2, this is the default log level in production.
101+
//
102+
// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf
103+
// with more log output.
104+
const DefaultLoggingVerbosity = 2
105+
106+
var LoggingFeatureGate FeatureGateFlag
107+
var LoggingConfig *logsapi.LoggingConfiguration
108+
109+
type FeatureGateFlag interface {
110+
featuregate.FeatureGate
111+
flag.Value
112+
}
113+
114+
func init() {
115+
f := featuregate.NewFeatureGate()
116+
runtime.Must(logsapi.AddFeatureGates(f))
117+
LoggingFeatureGate = f
118+
119+
LoggingConfig = logsapi.NewLoggingConfiguration()
120+
LoggingConfig.Verbosity = DefaultLoggingVerbosity
121+
}
122+
97123
var (
98124
defaultMetricsCollectorConfig = metricsCollectorConfig{
99125
Metrics: map[string][]*labelValues{
@@ -764,8 +790,62 @@ func initTestOutput(tb testing.TB) io.Writer {
764790

765791
var perfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")
766792

793+
var specialFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9-_]`)
794+
767795
func setupTestCase(t testing.TB, tc *testCase, output io.Writer, outOfTreePluginRegistry frameworkruntime.Registry) (informers.SharedInformerFactory, ktesting.TContext) {
768796
tCtx := ktesting.Init(t, initoption.PerTestOutput(*useTestingLog))
797+
artifacts, doArtifacts := os.LookupEnv("ARTIFACTS")
798+
if !*useTestingLog && doArtifacts {
799+
// Reconfigure logging so that it goes to a separate file per
800+
// test instead of stderr. If the test passes, the file gets
801+
// deleted. The overall output can be very large (> 200 MB for
802+
// ci-benchmark-scheduler-perf-master). With this approach, we
803+
// have log output for failures without having to store large
804+
// amounts of data that no-one is looking at. The performance
805+
// is the same as writing to stderr.
806+
if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil {
807+
t.Fatalf("Failed to reset the logging configuration: %v", err)
808+
}
809+
logfileName := path.Join(artifacts, specialFilenameChars.ReplaceAllString(t.Name(), "_")+".log")
810+
out, err := os.Create(logfileName)
811+
if err != nil {
812+
t.Fatalf("Failed to create per-test log output file: %v", err)
813+
}
814+
t.Cleanup(func() {
815+
// Everything should have stopped by now, checked below
816+
// by GoleakCheck (which runs first during test
817+
// shutdown!). Therefore we can clean up. Errors get logged
818+
// and fail the test, but cleanup tries to continue.
819+
//
820+
// Note that the race detector will flag any goroutine
821+
// as causing a race if there is no explicit wait for
822+
// that goroutine to stop. We know that they must have
823+
// stopped (GoLeakCheck!) but the race detector
824+
// doesn't.
825+
//
826+
// This is a major issue because many Kubernetes goroutines get
827+
// started without waiting for them to stop :-(
828+
if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil {
829+
t.Errorf("Failed to reset the logging configuration: %v", err)
830+
}
831+
if err := out.Close(); err != nil {
832+
t.Errorf("Failed to close the per-test log output file: %s: %v", logfileName, err)
833+
}
834+
if !t.Failed() {
835+
if err := os.Remove(logfileName); err != nil {
836+
t.Errorf("Failed to remove the per-test log output file: %v", err)
837+
}
838+
}
839+
})
840+
opts := &logsapi.LoggingOptions{
841+
ErrorStream: out,
842+
InfoStream: out,
843+
}
844+
if err := logsapi.ValidateAndApplyWithOptions(LoggingConfig, opts, LoggingFeatureGate); err != nil {
845+
t.Fatalf("Failed to apply the per-test logging configuration: %v", err)
846+
}
847+
848+
}
769849

770850
// Ensure that there are no leaked
771851
// goroutines. They could influence

0 commit comments

Comments
 (0)