Print a completion message when all tests are done (#192)

nickrobinson251 · web-flow · commit b90c053f1c1d · 2025-04-06T23:17:03.000+01:00
* Print completion message when all tests are done * More debug logs * Log a message when each worker has completed tests * Fix import for `at-debugv` * Debug log after `close(worker)` * Use at-debug in workers.jl * Add worker num to all worker restart messages * Fix typo in debug message * Make worker debug messages consistent * Replace LoggingExtras.jl dep with our own debug macro (#195) * Replace LoggingExtras.jl dep with our own debug macro * Update debug log syntax in tests * fixup! Replace LoggingExtras.jl dep with our own debug macro * fixup! Replace LoggingExtras.jl dep with our own debug macro
diff --git a/Project.toml b/Project.toml
@@ -5,7 +5,6 @@ version = "1.30.0"
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
@@ -15,7 +14,6 @@ TestEnv = "1e6cf692-eddd-4d53-88a5-2d735e33781b"
 [compat]
 Dates = "1"
 Logging = "1"
-LoggingExtras = "1"
 Pkg = "1"
 Profile = "1"
 Random = "1"
diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
@@ -6,8 +6,7 @@ using Test: Test, DefaultTestSet, TestSetException
 using .Threads: @spawn, nthreads
 using Pkg: Pkg
 using TestEnv
-using Logging
-using LoggingExtras
+using Logging: current_logger, with_logger
 
 export runtests, runtestitem
 export @testsetup, @testitem
@@ -66,6 +65,7 @@ function softscope_all!(@nospecialize ex)
     end
 end
 
+include("debug.jl")
 include("workers.jl")
 using .Workers
 include("macros.jl")
@@ -304,7 +304,7 @@ function runtests(
     cfg = _Config(; nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, testitem_failfast, failfast, retries, logs, report, verbose_results, timeout_profile_wait, memory_threshold, gc_between_testitems)
     debuglvl = Int(debug)
     if debuglvl > 0
-        LoggingExtras.withlevel(LoggingExtras.Debug; verbosity=debuglvl) do
+        withdebug(debuglvl) do
             _runtests(ti_filter, paths′, cfg)
         end
     else
@@ -443,23 +443,24 @@ function _runtests_in_current_env(
                 ti = starting[i]
                 @spawn begin
                     with_logger(original_logger) do
-                        manage_worker($w, $proj_name, $testitems, $ti, $cfg)
+                        manage_worker($w, $proj_name, $testitems, $ti, $cfg; worker_num=$i)
                     end
                 end
             end
         end
         Test.TESTSET_PRINT_ENABLE[] = true # reenable printing so our `finish` prints
+        # Let users know if tests are done, and if all of them ran (or if we failed fast).
+        # Print this above the final report as there might have been other logs printed
+        # since a failfast-cancellation was printed, but print it ASAP after tests finish
+        # in case any of the recording/reporting steps have an issue.
+        print_completion_summary(testitems; failedfast=(cfg.failfast && is_cancelled(testitems)))
         record_results!(testitems)
         cfg.report && write_junit_file(proj_name, dirname(projectfile), testitems.graph.junit)
-        if cfg.failfast && is_cancelled(testitems)
-            # Let users know if not all tests ran. Print this just above the final report as
-            # there might have been other logs printed since the cancellation was printed.
-            print_failfast_summary(testitems)
-        end
+        @debugv 1 "Calling Test.finish(testitems)"
         Test.finish(testitems) # print summary of total passes/failures/errors
     finally
         Test.TESTSET_PRINT_ENABLE[] = true
-        # Cleanup test setup logs
+        @debugv 1 "Cleaning up test setup logs"
         foreach(Iterators.filter(endswith(".log"), readdir(RETESTITEMS_TEMP_FOLDER[], join=true))) do logfile
             try
                 # See https://github.com/JuliaTesting/ReTestItems.jl/issues/124
@@ -468,7 +469,9 @@ function _runtests_in_current_env(
                 @debug "Error while attempting to remove $(logfile)" err
             end
         end
+        @debugv 1 "Done cleaning up test setup logs"
     end
+    @debugv 1 "DONE"
     return nothing
 end
 
@@ -577,16 +580,18 @@ function record_test_error!(testitem, msg, elapsed_seconds::Real=0.0)
     return testitem
 end
 
+# The provided `worker_num` is only for logging purposes, and not persisted as part of the worker.
 function manage_worker(
-    worker::Worker, proj_name::AbstractString, testitems::TestItems, testitem::Union{TestItem,Nothing}, cfg::_Config,
+    worker::Worker, proj_name::AbstractString, testitems::TestItems, testitem::Union{TestItem,Nothing}, cfg::_Config;
+    worker_num::Int
 )
     ntestitems = length(testitems.testitems)
     run_number = 1
     memory_threshold_percent = 100 * cfg.memory_threshold
     while testitem !== nothing
         ch = Channel{TestItemResult}(1)
         if memory_percent() > memory_threshold_percent
-            @warn "Memory usage ($(Base.Ryu.writefixed(memory_percent(), 1))%) is higher than threshold ($(Base.Ryu.writefixed(memory_threshold_percent, 1))%). Restarting worker process to try to free memory."
+            @warn "Memory usage ($(Base.Ryu.writefixed(memory_percent(), 1))%) is higher than threshold ($(Base.Ryu.writefixed(memory_threshold_percent, 1))%). Restarting process for worker $worker_num to try to free memory."
             terminate!(worker)
             wait(worker)
             worker = robust_start_worker(proj_name, cfg.nworker_threads, cfg.worker_init_expr, ntestitems)
@@ -642,7 +647,7 @@ function manage_worker(
                 close(timer)
             end
         catch e
-            @debugv 2 "Error" exception=e
+            @debugv 2 "Error: $e"
             # Handle the exception
             if e isa TimeoutException
                 if cfg.timeout_profile_wait > 0
@@ -684,7 +689,7 @@ function manage_worker(
                 run_number = 1
             else
                 run_number += 1
-                @info "Retrying $(repr(testitem.name)) on a new worker process. Run=$run_number."
+                @info "Retrying $(repr(testitem.name)) on a new worker $worker_num process. Run=$run_number."
             end
             # The worker was terminated, so replace it unless there are no more testitems to run
             if testitem !== nothing
@@ -694,7 +699,9 @@ function manage_worker(
             continue
         end
     end
+    @info "All tests on worker $worker_num completed. Closing $worker."
     close(worker)
+    @debugv 1 "Worker $worker_num closed: $(worker)"
     return nothing
 end
 
diff --git a/src/debug.jl b/src/debug.jl
@@ -0,0 +1,36 @@
+DEBUG_LEVEL::Int = 0
+
+function setdebug!(level::Int)
+    global DEBUG_LEVEL = level
+    return nothing
+end
+
+"""
+    withdebug(level::Int) do
+        func()
+    end
+"""
+function withdebug(f, level::Int)
+    old = DEBUG_LEVEL
+    try
+        setdebug!(level)
+        f()
+    finally
+        setdebug!(old)
+    end
+end
+
+"""
+    @debugv 1 "msg"
+"""
+macro debugv(level::Int, messsage)
+    quote
+        if DEBUG_LEVEL >= $level
+            _full_file = $String($(QuoteNode(__source__.file)))
+            _file = $last($splitdir(_full_file))
+            _line = $(QuoteNode(__source__.line))
+            msg = $(esc(messsage))
+            $print("DEBUG @ $(_file):$(_line) | $msg\n")
+        end
+    end
+end
diff --git a/src/junit_xml.jl b/src/junit_xml.jl
@@ -191,6 +191,7 @@ function write_junit_file(path::AbstractString, junit::Union{JUnitTestSuites,JUn
     open(path, "w") do io
         write_junit_file(io, junit)
     end
+    @debugv 1 "Done writing JUnit XML file to $(repr(path))"
     return nothing
 end
 
@@ -201,6 +202,7 @@ function write_junit_file(io::IO, junit::Union{JUnitTestSuites,JUnitTestSuite})
 end
 
 function write_junit_xml(io, junit::JUnitTestSuites)
+    @debugv 2 "Writing JUnit XML for testsuites $(junit.name)"
     write(io, "\n<testsuites")
     write_counts(io, junit.counts)
     write(io, ">")
@@ -212,6 +214,7 @@ function write_junit_xml(io, junit::JUnitTestSuites)
 end
 
 function write_junit_xml(io, ts::JUnitTestSuite)
+    @debugv 2 "Writing JUnit XML for testsuite $(ts.name)"
     write(io, "\n<testsuite name=", xml_markup(ts.name))
     write_counts(io, ts.counts)
     write(io, ">")
@@ -258,6 +261,7 @@ function write_dd_tags(io, tc::JUnitTestCase)
 end
 
 function write_junit_xml(io, tc::JUnitTestCase)
+    @debugv 2 "Writing JUnit XML for testcase $(tc.name)"
     write(io, "\n\t<testcase name=", xml_markup(tc.name))
     write_counts(io, tc.counts)
     write(io, ">")
diff --git a/src/log_capture.jl b/src/log_capture.jl
@@ -310,9 +310,13 @@ end
 # So that the user is warned that not all tests were run.
 # We don't use loglock here, because this is only called once on the coordinator after all
 # tasks running tests have stopped and we're printing the final test report.
-function print_failfast_summary(t::TestItems)
+function print_completion_summary(t::TestItems; failedfast::Bool)
     io = DEFAULT_STDOUT[]
-    printstyled(io, "[ Fail Fast: "; bold=true, color=Base.warn_color())
+    if failedfast
+        printstyled(io, "[ Fail Fast: "; bold=true, color=Base.warn_color())
+    else
+        printstyled(io, "[ Tests Completed: "; bold=true, color=Base.info_color())
+    end
     println(io, "$(t.count)/$(length(t.testitems)) test items were run.")
     return nothing
 end
diff --git a/src/testcontext.jl b/src/testcontext.jl
@@ -108,9 +108,12 @@ is_cancelled(t::TestItems) = @atomic t.cancelled
 ###
 
 function record_results!(ti::TestItems)
+    @debugv 1 "Recording testitem results"
     foreach(ti.graph.children) do child
         record_results!(ti.graph, child)
     end
+    @debugv 1 "Done recording testitem results"
+    return ti
 end
 
 function record_results!(dir::DirNode, child_dir::DirNode)
@@ -153,7 +156,7 @@ function get_starting_testitems(ti::TestItems, n)
     len = length(ti.testitems)
     step = max(1, len / n)
     testitems = [ti.testitems[round(Int, i)] for i in 1:step:len]
-    @debugv 2 "get_starting_testitems" len n allunique(testitems)
+    @debugv 2 "get_starting_testitems len=$len n=$n allunique=$(allunique(testitems))"
     @assert length(testitems) == min(n, len) && allunique(testitems)
     for (i, t) in enumerate(testitems)
         @atomic t.scheduled_for_evaluation.value = true
diff --git a/src/workers.jl b/src/workers.jl
@@ -73,7 +73,7 @@ end
 function terminate!(w::Worker, from::Symbol=:manual)
     already_terminated = @atomicswap :monotonic w.terminated = true
     if !already_terminated
-        @debug "terminating worker $(w.pid) from $from"
+        @debug "terminating $(w) from $(from)"
     end
     wte = WorkerTerminatedException(w)
     @lock w.lock begin
@@ -114,7 +114,7 @@ end
 # Called when timeout_profile_wait is non-zero.
 function trigger_profile(w::Worker, timeout_profile_wait, from::Symbol=:manual)
     if !Sys.iswindows()
-        @debug "sending profile request to worker $(w.pid) from $from"
+        @debug "sending profile request to $(w) from $(from)"
         if Sys.islinux()
             kill(w.process, 10)  # SIGUSR1
         elseif Sys.isbsd()
@@ -128,13 +128,15 @@ end
 # gracefully terminate a worker by sending a shutdown message
 # and waiting for the other tasks to perform worker shutdown
 function Base.close(w::Worker)
+    @debug "closing $(w)"
     if !w.terminated && isopen(w.socket)
         req = Request(Symbol(), :(), rand(UInt64), true)
         @lock w.lock begin
             serialize(w.socket, req)
             flush(w.socket)
         end
     end
+    @debug "waiting for $(w) to terminate"
     wait(w)
     return
 end
@@ -231,7 +233,7 @@ function redirect_worker_output(io::IO, w::Worker, fn, proc, ev::Threads.Event)
             end
         end
     catch e
-        # @error "Error redirecting worker output $(w.pid)" exception=(e, catch_backtrace())
+        # @error "Error redirecting $(w) output" exception=(e, catch_backtrace())
         terminate!(w, :redirect_worker_output)
         e isa EOFError || e isa Base.IOError || rethrow()
     finally
@@ -250,13 +252,13 @@ function process_responses(w::Worker, ev::Threads.Event)
         while isopen(w.socket) && !w.terminated
             # get the next Response from the worker
             r = deserialize(w.socket)
-            @assert r isa Response "Received invalid response from worker $(w.pid): $(r)"
-            # println("Received response $(r) from worker $(w.pid)")
+            @assert r isa Response "Received invalid response from $(w): $(r)"
+            # println("Received response $(r) from $(w)")
             @lock lock begin
-                @assert haskey(reqs, r.id) "Received response for unknown request $(r.id) from worker $(w.pid)"
+                @assert haskey(reqs, r.id) "Received response for unknown request $(r.id) from $(w)"
                 # look up the Future for this request
                 fut = pop!(reqs, r.id)
-                @assert !isready(fut.value) "Received duplicate response for request $(r.id) from worker $(w.pid)"
+                @assert !isready(fut.value) "Received duplicate response for request $(r.id) from $(w)"
                 if r.error !== nothing
                     # this allows rethrowing the exception from the worker to the caller
                     close(fut.value, r.error)
@@ -266,7 +268,7 @@ function process_responses(w::Worker, ev::Threads.Event)
             end
         end
     catch e
-        # @error "Error processing responses from worker $(w.pid)" exception=(e, catch_backtrace())
+        # @error "Error processing responses from $(w)" exception=(e, catch_backtrace())
         terminate!(w, :process_responses)
         e isa EOFError || e isa Base.IOError || rethrow()
     end
diff --git a/test/integrationtests.jl b/test/integrationtests.jl
@@ -45,18 +45,24 @@ end
 
 # test we can call runtests manually w/ directory
 @testset "manual `runtests(dir)`" begin
-    results = encased_testset() do
-        runtests(joinpath(TEST_PKG_DIR, "NoDeps.jl"))
+    using IOCapture
+    c = IOCapture.capture() do
+        encased_testset(() -> runtests(joinpath(TEST_PKG_DIR, "NoDeps.jl")))
     end
+    results = c.value
     @test n_passed(results) == 2  # NoDeps has two test files with a test each
+    @test contains(c.output, "[ Tests Completed: 2/2 test items were run.")
 end
 
 @testset "manual `runtests(file)`" begin
     # test we can point to a file at the base of the package (not just in `src` or `test`)
-    results = encased_testset() do
-        runtests(joinpath(TEST_PKG_DIR, "NoDeps.jl", "toplevel_tests.jl"))
+    using IOCapture
+    c = IOCapture.capture() do
+        encased_testset(() -> runtests(joinpath(TEST_PKG_DIR, "NoDeps.jl", "toplevel_tests.jl")))
     end
+    results = c.value
     @test n_passed(results) == 1
+    @test contains(c.output, "[ Tests Completed: 1/1 test items were run.")
 end
 
 @testset "`runtests(path)` auto finds testsetups" begin
@@ -273,20 +279,28 @@ end
 nworkers = 2
 @testset "runtests with nworkers = $nworkers" verbose=true begin
     @testset "Pkg.test() $pkg" for pkg in TEST_PKGS
-        results = with_test_package(pkg) do
-            withenv("RETESTITEMS_NWORKERS" => nworkers) do
-                Pkg.test()
+        c = IOCapture.capture() do
+            with_test_package(pkg) do
+                withenv("RETESTITEMS_NWORKERS" => nworkers) do
+                    Pkg.test()
+                end
             end
         end
+        results = c.value
         @test all_passed(results)
+        @test contains(c.output, "[ Tests Completed")
     end
     @testset "Pkg.test() DontPass.jl" begin
-        results = with_test_package("DontPass.jl") do
-            withenv("RETESTITEMS_NWORKERS" => 2) do
-                Pkg.test()
+        c = IOCapture.capture() do
+            with_test_package("DontPass.jl") do
+                withenv("RETESTITEMS_NWORKERS" => 2) do
+                    Pkg.test()
+                end
             end
         end
+        results = c.value
         @test length(non_passes(results)) > 0
+        @test contains(c.output, "[ Tests Completed")
     end
 end
 
@@ -447,9 +461,9 @@ end
                 @test !contains(c.output, "tests done")
             end
             if debug
-                @test contains(c.output, "Debug:")
+                @test contains(c.output, "DEBUG @")
             else
-                @test !contains(c.output, "Debug:")
+                @test !contains(c.output, "DEBUG @")
             end
             # Test we have the expected summary table
             testset = c.value
@@ -1198,7 +1212,7 @@ end
         # monkey-patch the internal `memory_percent` function to return a fixed value, so we
         # can control if we hit the `memory_threshold`.
         @eval ReTestItems.memory_percent() = 83.1
-        expected_warning = "Warning: Memory usage (83.1%) is higher than threshold (7.0%). Restarting worker process to try to free memory."
+        expected_warning = "Warning: Memory usage (83.1%) is higher than threshold (7.0%). Restarting process for worker 1 to try to free memory."
 
         # Pass `memory_threshold` keyword, and hit the memory threshold.
         c1 = IOCapture.capture() do