scheduler_perf: only store log output after failures

pohly · pohly · commit bf1188d292ad · 2024-08-23T16:02:45.000+02:00
Reconfiguring the logging infrastructure with a per-test output file mimicks
the behavior of per-test output (log output captured only on failures) while
still using the normal logging code, which is important for benchmarking.

To enable this behavior, the ARTIFACT env variable must be set.
diff --git a/test/integration/scheduler_perf/README.md b/test/integration/scheduler_perf/README.md
@@ -102,6 +102,15 @@ performance.
 During interactive debugging sessions it is possible to enable per-test output
 via -use-testing-log.
 
+Log output can be quite large, in particular when running the large benchmarks
+and when not using -use-testing-log. For benchmarks, we want to produce that
+log output in a realistic way (= write to disk using the normal logging
+backends) and only capture the output of a specific test as part of the job
+results when that test failed. Therefore each test redirects its own output if
+the ARTIFACTS env variable is set to a `$ARTIFACTS/<test name>.log` file and
+removes that file only if the test passed.
+
+
 ### Integration tests
 
 To run integration tests, use:
diff --git a/test/integration/scheduler_perf/main_test.go b/test/integration/scheduler_perf/main_test.go
@@ -23,23 +23,15 @@ import (
 	"strings"
 	"testing"
 
-	"k8s.io/apimachinery/pkg/util/runtime"
-	"k8s.io/component-base/featuregate"
 	"k8s.io/component-base/logs"
 	logsapi "k8s.io/component-base/logs/api/v1"
 	_ "k8s.io/component-base/logs/json/register"
 	"k8s.io/kubernetes/test/utils/ktesting"
 )
 
-// Run with -v=2, this is the default log level in production.
-//
-// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf
-// with more log output.
-const defaultVerbosity = 2
-
 func TestMain(m *testing.M) {
 	// Run with -v=2, this is the default log level in production.
-	ktesting.SetDefaultVerbosity(defaultVerbosity)
+	ktesting.SetDefaultVerbosity(DefaultLoggingVerbosity)
 
 	// test/integration/framework/flags.go unconditionally initializes the
 	// logging flags. That's correct for most tests, but in the
@@ -56,21 +48,17 @@ func TestMain(m *testing.M) {
 	})
 	flag.CommandLine = &fs
 
-	featureGate := featuregate.NewFeatureGate()
-	runtime.Must(logsapi.AddFeatureGates(featureGate))
-	flag.Var(featureGate, "feature-gate",
+	flag.Var(LoggingFeatureGate, "feature-gate",
 		"A set of key=value pairs that describe feature gates for alpha/experimental features. "+
-			"Options are:\n"+strings.Join(featureGate.KnownFeatures(), "\n"))
-	c := logsapi.NewLoggingConfiguration()
-	c.Verbosity = defaultVerbosity
+			"Options are:\n"+strings.Join(LoggingFeatureGate.KnownFeatures(), "\n"))
 
 	// This would fail if we hadn't removed the logging flags above.
-	logsapi.AddGoFlags(c, flag.CommandLine)
+	logsapi.AddGoFlags(LoggingConfig, flag.CommandLine)
 
 	flag.Parse()
 
 	logs.InitLogs()
-	if err := logsapi.ValidateAndApply(c, featureGate); err != nil {
+	if err := logsapi.ValidateAndApply(LoggingConfig, LoggingFeatureGate); err != nil {
 		fmt.Fprintf(os.Stderr, "%v\n", err)
 		os.Exit(1)
 	}
diff --git a/test/integration/scheduler_perf/scheduler_perf.go b/test/integration/scheduler_perf/scheduler_perf.go
@@ -26,6 +26,7 @@ import (
 	"math"
 	"os"
 	"path"
+	"regexp"
 	"strings"
 	"sync"
 	"testing"
@@ -38,6 +39,7 @@ import (
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/util/runtime"
 	"k8s.io/apimachinery/pkg/util/wait"
 	utilfeature "k8s.io/apiserver/pkg/util/feature"
 	cacheddiscovery "k8s.io/client-go/discovery/cached/memory"
@@ -48,6 +50,7 @@ import (
 	"k8s.io/client-go/restmapper"
 	"k8s.io/component-base/featuregate"
 	featuregatetesting "k8s.io/component-base/featuregate/testing"
+	logsapi "k8s.io/component-base/logs/api/v1"
 	"k8s.io/component-base/metrics/legacyregistry"
 	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/scheduler/apis/config"
@@ -94,6 +97,29 @@ const (
 	pluginLabelName          = "plugin"
 )
 
+// Run with -v=2, this is the default log level in production.
+//
+// In a PR this can be bumped up temporarily to run pull-kubernetes-scheduler-perf
+// with more log output.
+const DefaultLoggingVerbosity = 2
+
+var LoggingFeatureGate FeatureGateFlag
+var LoggingConfig *logsapi.LoggingConfiguration
+
+type FeatureGateFlag interface {
+	featuregate.FeatureGate
+	flag.Value
+}
+
+func init() {
+	f := featuregate.NewFeatureGate()
+	runtime.Must(logsapi.AddFeatureGates(f))
+	LoggingFeatureGate = f
+
+	LoggingConfig = logsapi.NewLoggingConfiguration()
+	LoggingConfig.Verbosity = DefaultLoggingVerbosity
+}
+
 var (
 	defaultMetricsCollectorConfig = metricsCollectorConfig{
 		Metrics: map[string][]*labelValues{
@@ -764,8 +790,62 @@ func initTestOutput(tb testing.TB) io.Writer {
 
 var perfSchedulingLabelFilter = flag.String("perf-scheduling-label-filter", "performance", "comma-separated list of labels which a testcase must have (no prefix or +) or must not have (-), used by BenchmarkPerfScheduling")
 
+var specialFilenameChars = regexp.MustCompile(`[^a-zA-Z0-9-_]`)
+
 func setupTestCase(t testing.TB, tc *testCase, output io.Writer, outOfTreePluginRegistry frameworkruntime.Registry) (informers.SharedInformerFactory, ktesting.TContext) {
 	tCtx := ktesting.Init(t, initoption.PerTestOutput(*useTestingLog))
+	artifacts, doArtifacts := os.LookupEnv("ARTIFACTS")
+	if !*useTestingLog && doArtifacts {
+		// Reconfigure logging so that it goes to a separate file per
+		// test instead of stderr. If the test passes, the file gets
+		// deleted. The overall output can be very large (> 200 MB for
+		// ci-benchmark-scheduler-perf-master). With this approach, we
+		// have log output for failures without having to store large
+		// amounts of data that no-one is looking at. The performance
+		// is the same as writing to stderr.
+		if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil {
+			t.Fatalf("Failed to reset the logging configuration: %v", err)
+		}
+		logfileName := path.Join(artifacts, specialFilenameChars.ReplaceAllString(t.Name(), "_")+".log")
+		out, err := os.Create(logfileName)
+		if err != nil {
+			t.Fatalf("Failed to create per-test log output file: %v", err)
+		}
+		t.Cleanup(func() {
+			// Everything should have stopped by now, checked below
+			// by GoleakCheck (which runs first during test
+			// shutdown!). Therefore we can clean up. Errors get logged
+			// and fail the test, but cleanup tries to continue.
+			//
+			// Note that the race detector will flag any goroutine
+			// as causing a race if there is no explicit wait for
+			// that goroutine to stop.  We know that they must have
+			// stopped (GoLeakCheck!) but the race detector
+			// doesn't.
+			//
+			// This is a major issue because many Kubernetes goroutines get
+			// started without waiting for them to stop :-(
+			if err := logsapi.ResetForTest(LoggingFeatureGate); err != nil {
+				t.Errorf("Failed to reset the logging configuration: %v", err)
+			}
+			if err := out.Close(); err != nil {
+				t.Errorf("Failed to close the per-test log output file: %s: %v", logfileName, err)
+			}
+			if !t.Failed() {
+				if err := os.Remove(logfileName); err != nil {
+					t.Errorf("Failed to remove the per-test log output file: %v", err)
+				}
+			}
+		})
+		opts := &logsapi.LoggingOptions{
+			ErrorStream: out,
+			InfoStream:  out,
+		}
+		if err := logsapi.ValidateAndApplyWithOptions(LoggingConfig, opts, LoggingFeatureGate); err != nil {
+			t.Fatalf("Failed to apply the per-test logging configuration: %v", err)
+		}
+
+	}
 
 	// Ensure that there are no leaked
 	// goroutines.  They could influence