fixed mergeTestResults

lukaszcl · lukaszcl · commit d72264a4192b · 2025-03-11T13:42:47.000+01:00
diff --git a/tools/flakeguard/cmd/run.go b/tools/flakeguard/cmd/run.go
@@ -35,13 +35,32 @@ var RunTestsCmd = &cobra.Command{
 		tags, _ := cmd.Flags().GetStringArray("tags")
 		useRace, _ := cmd.Flags().GetBool("race")
 		outputPath, _ := cmd.Flags().GetString("output-json")
+		minPassRatio, _ := cmd.Flags().GetFloat64("min-pass-ratio")
+		// For backward compatibility, check if max-pass-ratio was used
 		maxPassRatio, _ := cmd.Flags().GetFloat64("max-pass-ratio")
+		maxPassRatioSpecified := cmd.Flags().Changed("max-pass-ratio")
 		skipTests, _ := cmd.Flags().GetStringSlice("skip-tests")
 		selectTests, _ := cmd.Flags().GetStringSlice("select-tests")
 		useShuffle, _ := cmd.Flags().GetBool("shuffle")
 		shuffleSeed, _ := cmd.Flags().GetString("shuffle-seed")
 		omitOutputsOnSuccess, _ := cmd.Flags().GetBool("omit-test-outputs-on-success")
 		ignoreParentFailuresOnSubtests, _ := cmd.Flags().GetBool("ignore-parent-failures-on-subtests")
+		rerunFailed, _ := cmd.Flags().GetInt("rerun-failed")
+		failFast, _ := cmd.Flags().GetBool("fail-fast")
+
+		// Handle the compatibility between min/max pass ratio
+		passRatioThreshold := minPassRatio
+		if maxPassRatioSpecified && maxPassRatio != 1.0 {
+			// If max-pass-ratio was explicitly set, use it (convert to min-pass-ratio)
+			log.Warn().Msg("--max-pass-ratio is deprecated, please use --min-pass-ratio instead")
+			passRatioThreshold = maxPassRatio
+		}
+
+		// Validate pass ratio
+		if passRatioThreshold < 0 || passRatioThreshold > 1 {
+			log.Error().Float64("pass ratio", passRatioThreshold).Msg("Error: pass ratio must be between 0 and 1")
+			os.Exit(ErrorExitCode)
+		}
 
 		outputDir := filepath.Dir(outputPath)
 		initialDirSize, err := getDirSize(outputDir)
@@ -50,11 +69,6 @@ var RunTestsCmd = &cobra.Command{
 			// intentionally don't exit here, as we can still proceed with the run
 		}
 
-		if maxPassRatio < 0 || maxPassRatio > 1 {
-			log.Error().Float64("max pass ratio", maxPassRatio).Msg("Error: max pass ratio must be between 0 and 1")
-			os.Exit(ErrorExitCode)
-		}
-
 		// Check if project dependencies are correctly set up
 		if err := checkDependencies(projectPath); err != nil {
 			log.Error().Err(err).Msg("Error checking project dependencies")
@@ -90,8 +104,10 @@ var RunTestsCmd = &cobra.Command{
 			UseShuffle:                     useShuffle,
 			ShuffleSeed:                    shuffleSeed,
 			OmitOutputsOnSuccess:           omitOutputsOnSuccess,
-			MaxPassRatio:                   maxPassRatio,
+			MaxPassRatio:                   passRatioThreshold, // Use the calculated threshold
 			IgnoreParentFailuresOnSubtests: ignoreParentFailuresOnSubtests,
+			RerunFailed:                    rerunFailed,
+			FailFast:                       failFast,
 		}
 
 		// Run the tests
@@ -134,20 +150,37 @@ var RunTestsCmd = &cobra.Command{
 
 		// Filter flaky tests using FilterTests
 		flakyTests := reports.FilterTests(testReport.Results, func(tr reports.TestResult) bool {
-			return !tr.Skipped && tr.PassRatio < maxPassRatio
+			return !tr.Skipped && tr.PassRatio < passRatioThreshold
 		})
 
 		finalDirSize, err := getDirSize(outputDir)
 		if err != nil {
-			log.Error().Err(err).Str("path", outputDir).Msg("Error getting initial directory size")
+			log.Error().Err(err).Str("path", outputDir).Msg("Error getting final directory size")
 			// intentionally don't exit here, as we can still proceed with the run
 		}
 		diskSpaceUsed := byteCountSI(finalDirSize - initialDirSize)
 
+		// Report with more detailed information about reruns
+		if rerunFailed > 0 {
+			log.Info().
+				Int("initial runs", runCount).
+				Int("reruns for failed tests", rerunFailed).
+				Str("disk space used", diskSpaceUsed).
+				Msg("Test execution completed")
+		} else {
+			log.Info().
+				Int("runs", runCount).
+				Str("disk space used", diskSpaceUsed).
+				Msg("Test execution completed")
+		}
+
 		if len(flakyTests) > 0 {
-			log.Info().Str("disk space used", diskSpaceUsed).Int("count", len(flakyTests)).Str("pass ratio threshold", fmt.Sprintf("%.2f%%", maxPassRatio*100)).Msg("Found flaky tests")
+			log.Info().
+				Int("count", len(flakyTests)).
+				Str("stability threshold", fmt.Sprintf("%.0f%%", passRatioThreshold*100)).
+				Msg("Found flaky tests")
 		} else {
-			log.Info().Str("disk space used", diskSpaceUsed).Msg("No flaky tests found")
+			log.Info().Msg("All tests passed stability requirements")
 		}
 
 		fmt.Printf("\nFlakeguard Summary\n")
@@ -178,9 +211,19 @@ func init() {
 	RunTestsCmd.Flags().String("output-json", "", "Path to output the test results in JSON format")
 	RunTestsCmd.Flags().StringSlice("skip-tests", nil, "Comma-separated list of test names to skip from running")
 	RunTestsCmd.Flags().StringSlice("select-tests", nil, "Comma-separated list of test names to specifically run")
-	RunTestsCmd.Flags().Float64("max-pass-ratio", 1.0, "The maximum pass ratio threshold for a test to be considered flaky. Any tests below this pass rate will be considered flaky.")
+
+	// Add the min-pass-ratio flag (new recommended approach)
+	RunTestsCmd.Flags().Float64("min-pass-ratio", 1.0, "The minimum pass ratio required for a test to be considered stable (0.0-1.0)")
+
+	// Keep max-pass-ratio for backward compatibility but mark as deprecated
+	RunTestsCmd.Flags().Float64("max-pass-ratio", 1.0, "DEPRECATED: Use min-pass-ratio instead")
+	RunTestsCmd.Flags().MarkDeprecated("max-pass-ratio", "use min-pass-ratio instead")
+
 	RunTestsCmd.Flags().Bool("omit-test-outputs-on-success", true, "Omit test outputs and package outputs for tests that pass")
 	RunTestsCmd.Flags().Bool("ignore-parent-failures-on-subtests", false, "Ignore failures in parent tests when only subtests fail")
+
+	// Add rerun failed tests flag
+	RunTestsCmd.Flags().Int("rerun-failed", 0, "Number of times to rerun failed tests (0 disables reruns)")
 }
 
 func checkDependencies(projectPath string) error {
diff --git a/tools/flakeguard/runner/example_test_package/example_tests_test.go b/tools/flakeguard/runner/example_test_package/example_tests_test.go
@@ -2,6 +2,7 @@ package exampletestpackage
 
 import (
 	"log"
+	"math/rand"
 	"os"
 	"sync"
 	"testing"
@@ -212,3 +213,23 @@ func TestTimeout(t *testing.T) {
 	time.Sleep(time.Until(deadline))
 	t.Logf("This test should have timed out")
 }
+
+// TestRandomFlaky is a truly random flaky test that will fail approximately 50% of the time
+func TestRandomFlaky(t *testing.T) {
+	t.Parallel()
+
+	// Seed random number generator with current time
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	// Generate a random number between 0 and 1
+	randomValue := r.Float64()
+
+	t.Logf("Random value generated: %f", randomValue)
+
+	// Fail the test approximately 90% of the time
+	if randomValue < 0.9 {
+		t.Fatal("This test randomly failed (90% probability)")
+	}
+
+	t.Log("This test randomly passed (90% probability)")
+}
diff --git a/tools/flakeguard/runner/runner.go b/tools/flakeguard/runner/runner.go
@@ -736,43 +736,67 @@ func prettyProjectPath(projectPath string) (string, error) {
 }
 
 func (r *Runner) rerunFailedTests(results []reports.TestResult) ([]reports.TestResult, error) {
-	var failingTests []reports.TestResult
+	// Group failing tests by package for more efficient reruns
+	failingTestsByPackage := make(map[string][]string)
 	for _, tr := range results {
 		if !tr.Skipped && tr.PassRatio < 1 {
-			failingTests = append(failingTests, tr)
+			if _, exists := failingTestsByPackage[tr.TestPackage]; !exists {
+				failingTestsByPackage[tr.TestPackage] = []string{}
+			}
+			failingTestsByPackage[tr.TestPackage] = append(failingTestsByPackage[tr.TestPackage], tr.TestName)
 		}
 	}
 
 	if r.Verbose {
-		log.Info().Msgf("Rerunning failing tests: %v", failingTests)
+		log.Info().Msgf("Rerunning failing tests grouped by package: %v", failingTestsByPackage)
 	}
 
-	var rerunResults []reports.TestResult
+	var rerunJsonFilePaths []string
 
-	// Rerun each failing test up to RerunFailed times
+	// Rerun each failing test package up to RerunFailed times
 	for i := 0; i < r.RerunFailed; i++ {
-		for _, fTest := range failingTests {
-			testCmd := r.buildGoTestCommandForTest(fTest)
-
-			if r.Verbose {
-				log.Info().Msgf("Rerun iteration %d for %s: %v", i+1, fTest.TestName, testCmd)
+		for pkg, tests := range failingTestsByPackage {
+			// Build regex pattern to match all failing tests in this package
+			testPattern := fmt.Sprintf("^(%s)$", strings.Join(tests, "|"))
+
+			cmd := []string{
+				"go", "test",
+				pkg,
+				"-run", testPattern,
+				"-json",
 			}
 
-			jsonFilePath, _, err := r.runCmd(testCmd, i)
-			if err != nil {
-				return nil, fmt.Errorf("error on rerunCmd for test %s: %w", fTest.TestName, err)
+			// Add other test flags
+			if r.UseRace {
+				cmd = append(cmd, "-race")
+			}
+			if r.Timeout > 0 {
+				cmd = append(cmd, fmt.Sprintf("-timeout=%s", r.Timeout.String()))
+			}
+			if len(r.Tags) > 0 {
+				cmd = append(cmd, fmt.Sprintf("-tags=%s", strings.Join(r.Tags, ",")))
+			}
+			if r.Verbose {
+				cmd = append(cmd, "-v")
+				log.Info().Msgf("Rerun iteration %d for package %s: %v", i+1, pkg, cmd)
 			}
 
-			additionalResults, err := r.parseTestResults([]string{jsonFilePath}, "rerun")
+			// Run the package tests
+			jsonFilePath, _, err := r.runCmd(cmd, i)
 			if err != nil {
-				return nil, fmt.Errorf("failed to parse rerun results: %w", err)
+				return nil, fmt.Errorf("error on rerunCmd for package %s: %w", pkg, err)
 			}
 
-			// Collect these rerun results in a slice; we'll merge them later.
-			rerunResults = append(rerunResults, additionalResults...)
+			rerunJsonFilePaths = append(rerunJsonFilePaths, jsonFilePath)
 		}
 	}
 
+	// Parse all rerun results at once with a consistent prefix
+	rerunResults, err := r.parseTestResults(rerunJsonFilePaths, "rerun")
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse rerun results: %w", err)
+	}
+
 	return rerunResults, nil
 }
 
@@ -796,7 +820,6 @@ func (r *Runner) buildGoTestCommandForTest(t reports.TestResult) []string {
 	return cmd
 }
 
-// mergeTestResults merges additional test results into the existing results slice.
 // mergeTestResults merges additional test results into the existing results slice.
 func mergeTestResults(mainResults *[]reports.TestResult, additional []reports.TestResult) {
 	for _, add := range additional {
@@ -809,30 +832,60 @@ func mergeTestResults(mainResults *[]reports.TestResult, additional []reports.Te
 				(*mainResults)[i].Failures += add.Failures
 				(*mainResults)[i].Skips += add.Skips
 
+				// Merge boolean flags (using OR operation)
+				(*mainResults)[i].Panic = (*mainResults)[i].Panic || add.Panic
+				(*mainResults)[i].Race = (*mainResults)[i].Race || add.Race
+				(*mainResults)[i].Timeout = (*mainResults)[i].Timeout || add.Timeout
+				(*mainResults)[i].Skipped = (*mainResults)[i].Skipped || add.Skipped
+				(*mainResults)[i].PackagePanic = (*mainResults)[i].PackagePanic || add.PackagePanic
+
 				// Merge durations
 				(*mainResults)[i].Durations = append((*mainResults)[i].Durations, add.Durations...)
 
+				// Merge maps for Outputs
+				if (*mainResults)[i].Outputs == nil {
+					(*mainResults)[i].Outputs = make(map[string][]string)
+				}
+				for runID, outputs := range add.Outputs {
+					if existing, ok := (*mainResults)[i].Outputs[runID]; ok {
+						(*mainResults)[i].Outputs[runID] = append(existing, outputs...)
+					} else {
+						(*mainResults)[i].Outputs[runID] = outputs
+					}
+				}
+
 				// Merge maps for PassedOutputs
 				if (*mainResults)[i].PassedOutputs == nil {
 					(*mainResults)[i].PassedOutputs = make(map[string][]string)
 				}
 				for runID, outputs := range add.PassedOutputs {
-					(*mainResults)[i].PassedOutputs[runID] = append((*mainResults)[i].PassedOutputs[runID], outputs...)
+					if existing, ok := (*mainResults)[i].PassedOutputs[runID]; ok {
+						(*mainResults)[i].PassedOutputs[runID] = append(existing, outputs...)
+					} else {
+						(*mainResults)[i].PassedOutputs[runID] = outputs
+					}
 				}
 
 				// Merge maps for FailedOutputs
 				if (*mainResults)[i].FailedOutputs == nil {
 					(*mainResults)[i].FailedOutputs = make(map[string][]string)
 				}
 				for runID, outputs := range add.FailedOutputs {
-					(*mainResults)[i].FailedOutputs[runID] = append((*mainResults)[i].FailedOutputs[runID], outputs...)
+					if existing, ok := (*mainResults)[i].FailedOutputs[runID]; ok {
+						(*mainResults)[i].FailedOutputs[runID] = append(existing, outputs...)
+					} else {
+						(*mainResults)[i].FailedOutputs[runID] = outputs
+					}
 				}
 
-				// Update pass ratio
+				// Merge PackageOutputs
+				(*mainResults)[i].PackageOutputs = append((*mainResults)[i].PackageOutputs, add.PackageOutputs...)
+
+				// Update pass ratio (consistent with parseTestResults default)
 				if (*mainResults)[i].Runs > 0 {
 					(*mainResults)[i].PassRatio = float64((*mainResults)[i].Successes) / float64((*mainResults)[i].Runs)
 				} else {
-					(*mainResults)[i].PassRatio = -1.0
+					(*mainResults)[i].PassRatio = 1.0 // Default to 1.0 if no runs
 				}
 
 				found = true
diff --git a/tools/flakeguard/runner/runner_test.go b/tools/flakeguard/runner/runner_test.go