@@ -44,6 +44,9 @@ import Glibc
4444 let skipLoadingBenchmarks = argumentExtractor. extractFlag ( named: " skip-loading-benchmark-targets " )
4545 let checkAbsoluteThresholds =
4646 checkAbsoluteThresholdsPath. count > 0 ? 1 : argumentExtractor. extractFlag ( named: " check-absolute " )
47+ let runCount = argumentExtractor. extractOption ( named: " run-count " )
48+ let relative = argumentExtractor. extractFlag ( named: " relative " )
49+ let range = argumentExtractor. extractFlag ( named: " range " )
4750 let groupingToUse = argumentExtractor. extractOption ( named: " grouping " )
4851 let metricsToUse = argumentExtractor. extractOption ( named: " metric " )
4952 let timeUnits = argumentExtractor. extractOption ( named: " time-units " )
@@ -234,6 +237,7 @@ import Glibc
234237 throw MyError . invalidArgument
235238 }
236239
240+ var totalRunCount = 1
237241 var skipLoadingBenchmarksFlagIsValid = skipLoadingBenchmarks == 0
238242 if commandToPerform == . thresholds {
239243 guard positionalArguments. count > 0 ,
@@ -267,6 +271,19 @@ import Glibc
267271 if positionalArguments. count > 0 {
268272 shouldBuildTargets = false
269273 }
274+ totalRunCount = runCount. first. flatMap { Int ( $0) } ?? 1
275+ /// These update the run count to 5 by default if it's set to 1.
276+ /// Using relative/range flags doesn't mean anything if we're not running multiple times.
277+ /// The benchmarks will need to be run multiple times in order to be able to calculate a
278+ /// relative/range of thresholds which satisfy all benchmark runs.
279+ if relative > 0 {
280+ totalRunCount = totalRunCount < 2 ? 5 : totalRunCount
281+ args. append ( " --wants-relative-thresholds " )
282+ }
283+ if range > 0 {
284+ totalRunCount = totalRunCount < 2 ? 5 : totalRunCount
285+ args. append ( " --wants-range-thresholds " )
286+ }
270287 break
271288 case . check:
272289 skipLoadingBenchmarksFlagIsValid = true
@@ -477,53 +494,108 @@ import Glibc
477494
478495 var failedBenchmarkCount = 0
479496
480- try withCStrings ( args ) { cArgs in
481- if debug > 0 {
482- print ( " To debug, start \( benchmarkToolName ) in LLDB using: " )
483- print ( " lldb \( benchmarkTool . string ) " )
484- print ( " " )
485- print ( " Then launch \( benchmarkToolName ) with: " )
486- print ( " run \( args . dropFirst ( ) . joined ( separator : " " ) ) " )
487- print ( " " )
488- return
489- }
497+ var allFailureCount = 0
498+ let results : [ Result < Void , Error > ] = ( 0 ..< max ( totalRunCount , 1 ) )
499+ . map { runIdx in
500+ // If we're running multiple times, we need to add the run count to the arguments
501+ var args = args
502+ if totalRunCount > 1 {
503+ args += [ " -- run-number " , " \( runIdx + 1 ) " ]
504+ if quietRunning == 0 {
505+ print (
506+ """
490507
491- var pid : pid_t = 0
492- var status = posix_spawn ( & pid, benchmarkTool. string, nil , nil , cArgs, environ)
493-
494- if status == 0 {
495- if waitpid ( pid, & status, 0 ) != - 1 {
496- // Ok, this sucks, but there is no way to get a C support target for plugins and
497- // the way the status is extracted portably is with macros - so we just need to
498- // reimplement the logic here in Swift according to the waitpid man page to
499- // get some nicer feedback on failure reason.
500- guard let waitStatus = ExitCode ( rawValue: ( status & 0xFF00 ) >> 8 ) else {
501- print ( " One or more benchmarks returned an unexpected return code \( status) " )
502- throw MyError . benchmarkUnexpectedReturnCode
508+ Running the command multiple times, round \( runIdx + 1 ) of \( totalRunCount) ...
509+ """
510+ )
503511 }
504- switch waitStatus {
505- case . success:
506- break
507- case . baselineNotFound:
508- throw MyError . baselineNotFound
509- case . genericFailure:
510- print ( " One or more benchmark suites crashed during runtime. " )
511- throw MyError . benchmarkCrashed
512- case . thresholdRegression:
513- throw MyError . benchmarkThresholdRegression
514- case . thresholdImprovement:
515- throw MyError . benchmarkThresholdImprovement
516- case . benchmarkJobFailed:
517- failedBenchmarkCount += 1
518- case . noPermissions:
519- throw MyError . noPermissions
512+ }
513+
514+ return Result < Void , Error > {
515+ try withCStrings ( args) { cArgs in
516+ /// We'll decrement this in the success path
517+ allFailureCount += 1
518+
519+ if debug > 0 {
520+ print ( " To debug, start \( benchmarkToolName) in LLDB using: " )
521+ print ( " lldb \( benchmarkTool. string) " )
522+ print ( " " )
523+ print ( " Then launch \( benchmarkToolName) with: " )
524+ print ( " run \( args. dropFirst ( ) . joined ( separator: " " ) ) " )
525+ print ( " " )
526+ return
527+ }
528+
529+ var pid : pid_t = 0
530+ var status = posix_spawn ( & pid, benchmarkTool. string, nil , nil , cArgs, environ)
531+
532+ if status == 0 {
533+ if waitpid ( pid, & status, 0 ) != - 1 {
534+ // Ok, this sucks, but there is no way to get a C support target for plugins and
535+ // the way the status is extracted portably is with macros - so we just need to
536+ // reimplement the logic here in Swift according to the waitpid man page to
537+ // get some nicer feedback on failure reason.
538+ guard let waitStatus = ExitCode ( rawValue: ( status & 0xFF00 ) >> 8 ) else {
539+ print ( " One or more benchmarks returned an unexpected return code \( status) " )
540+ throw MyError . benchmarkUnexpectedReturnCode
541+ }
542+ switch waitStatus {
543+ case . success:
544+ allFailureCount -= 1
545+ case . baselineNotFound:
546+ throw MyError . baselineNotFound
547+ case . genericFailure:
548+ print ( " One or more benchmark suites crashed during runtime. " )
549+ throw MyError . benchmarkCrashed
550+ case . thresholdRegression:
551+ throw MyError . benchmarkThresholdRegression
552+ case . thresholdImprovement:
553+ throw MyError . benchmarkThresholdImprovement
554+ case . benchmarkJobFailed:
555+ failedBenchmarkCount += 1
556+ case . noPermissions:
557+ throw MyError . noPermissions
558+ }
559+ } else {
560+ print (
561+ " waitpid() for pid \( pid) returned a non-zero exit code \( status) , errno = \( errno) "
562+ )
563+ exit ( errno)
564+ }
565+ } else {
566+ print ( " Failed to run BenchmarkTool, posix_spawn() returned [ \( status) ] " )
567+ }
520568 }
521- } else {
522- print ( " waitpid() for pid \( pid) returned a non-zero exit code \( status) , errno = \( errno) " )
523- exit ( errno)
524569 }
525- } else {
526- print ( " Failed to run BenchmarkTool, posix_spawn() returned [ \( status) ] " )
570+ }
571+
572+ switch results. count {
573+ case ... 0 :
574+ throw MyError . unknownFailure
575+ case 1 :
576+ try results [ 0 ] . get ( )
577+ default :
578+ if allFailureCount > 0 {
579+ print (
580+ """
581+ Ran BenchmarkTool \( results. count) times, but it failed \( allFailureCount) times.
582+ Will exit with the first failure.
583+
584+ """
585+ )
586+ guard
587+ let failure = results. first ( where: { result in
588+ switch result {
589+ case . failure:
590+ return true
591+ case . success:
592+ return false
593+ }
594+ } )
595+ else {
596+ throw MyError . unknownFailure
597+ }
598+ try failure. get ( )
527599 }
528600 }
529601
@@ -543,5 +615,6 @@ import Glibc
543615 case noPermissions = 6
544616 case invalidArgument = 101
545617 case buildFailed = 102
618+ case unknownFailure = 103
546619 }
547620}
0 commit comments