From ee90041693a1f1fc28a8276608619d2491c2d6d6 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Fri, 10 Oct 2025 22:15:01 +0200
Subject: [PATCH 1/4] Use ParallelTestRunner.jl

---
 test/Project.toml |   6 +-
 test/runtests.jl  | 414 ++--------------------------------------------
 test/setup.jl     |  82 ---------
 3 files changed, 17 insertions(+), 485 deletions(-)
 delete mode 100644 test/setup.jl

diff --git a/test/Project.toml b/test/Project.toml
index 71311c11..32edd7d4 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,15 +1,12 @@
 [deps]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
 LLVM_jll = "86de99a1-58d6-5da7-8064-bd56ce2e322c"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SPIRV_LLVM_Backend_jll = "4376b9bf-cff8-51b6-bb48-39421dff0d0c"
 SPIRV_LLVM_Translator_jll = "4a5d46fc-d8cf-5151-a261-86b458210efb"
@@ -20,6 +17,7 @@ demumble_jll = "1e29f10c-031c-5a83-9565-69cddfc27673"
 
 [compat]
 Aqua = "0.8"
+ParallelTestRunner = "0.1.1"
 
 [extras]
 GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
diff --git a/test/runtests.jl b/test/runtests.jl
index 8fc7d827..4c3ac201 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,408 +1,24 @@
-using Distributed
-using Dates
-import REPL
-using Printf: @sprintf
-using Base.Filesystem: path_separator
+using ParallelTestRunner 
 
-# parse some command-line arguments
-function extract_flag!(args, flag, default=nothing)
-    for f in args
-        if startswith(f, flag)
-            # Check if it's just `--flag` or if it's `--flag=foo`
-            if f != flag
-                val = split(f, '=')[2]
-                if default !== nothing && !(typeof(default) <: AbstractString)
-                  val = parse(typeof(default), val)
-                end
-            else
-                val = default
-            end
+const init_code = quote
+    using Test, GPUCompiler, LLVM
+    using SPIRV_LLVM_Backend_jll, SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll
 
-            # Drop this value from our args
-            filter!(x -> x != f, args)
-            return (true, val)
+    # include all helpers
+    include(joinpath(@__DIR__, "helpers", "runtime.jl"))
+    for file in readdir(joinpath(@__DIR__, "helpers"))
+        if endswith(file, ".jl") && file != "runtime.jl"
+            include(joinpath(@__DIR__, "helpers", file))
         end
     end
-    return (false, default)
+    using .FileCheck
 end
-do_help, _ = extract_flag!(ARGS, "--help")
-if do_help
-    println("""
-        Usage: runtests.jl [--help] [--list] [--jobs=N] [TESTS...]
 
-               --help             Show this text.
-               --list             List all available tests.
-               --quickfail        Fail the entire run as soon as a single test errored.
-               --jobs=N           Launch `N` processes to perform tests (default: Sys.CPU_THREADS).
-
-               Remaining arguments filter the tests that will be executed.""")
-    exit(0)
-end
-_, jobs = extract_flag!(ARGS, "--jobs", Sys.CPU_THREADS)
-do_quickfail, _ = extract_flag!(ARGS, "--quickfail")
-
-include("setup.jl")     # make sure everything is precompiled
-
-@info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable."
-
-# discover tests
-const all_tests = []
-const test_runners = Dict()
-## files in the test folder
-for (rootpath, dirs, files) in walkdir(@__DIR__)
-  # find Julia files
-  filter!(files) do file
-    endswith(file, ".jl") && file !== "setup.jl" && file !== "runtests.jl"
-  end
-  isempty(files) && continue
-  basename(rootpath) == "helpers" && continue
-
-  # strip extension
-  files = map(files) do file
-    file[1:end-3]
-  end
-
-  # prepend subdir
-  subdir = relpath(rootpath, @__DIR__)
-  if subdir != "."
-    files = map(files) do file
-      joinpath(subdir, file)
-    end
-  end
-
-  # unify path separators
-  files = map(files) do file
-    replace(file, path_separator => '/')
-  end
-
-  append!(all_tests, files)
-  for file in files
-    test_runners[file] = ()->include("$(@__DIR__)/$file.jl")
-  end
-end
-
-# parse some more command-line arguments
-## --list to list all available tests
-do_list, _ = extract_flag!(ARGS, "--list")
-if do_list
-    println("Available tests:")
-    for test in sort(all_tests)
-        println(" - $test")
-    end
-    exit(0)
-end
-## no options should remain
-optlike_args = filter(startswith("-"), ARGS)
-if !isempty(optlike_args)
-    error("Unknown test options `$(join(optlike_args, " "))` (try `--help` for usage instructions)")
-end
-## the remaining args filter tests
-tests = copy(all_tests)
-if !isempty(ARGS)
-  filter!(tests) do test
-    any(arg->startswith(test, arg), ARGS)
-  end
-end
-
-# determine tests to skip based on the environment
-skip_tests = []
-if LLVM.is_asserts()
-    # XXX: GCN's non-0 stack address space triggers LLVM assertions due to Julia bugs
-    push!(skip_tests, "gcn")
-end
-if VERSION < v"1.11"
-    append!(skip_tests, ["ptx/precompile", "native/precompile"])
-end
-## finalize
-skip_tests = filter(test->any(skip->occursin(skip,test), skip_tests), tests)
-if !isempty(skip_tests)
-    @info "Skipping the following tests: $(join(skip_tests, ", "))"
-    filter!(!in(skip_tests), tests)
-end
-sort!(tests; by=(file)->stat("$(@__DIR__)/$file.jl").size, rev=true)
-unique!(tests)
-
-# add workers
-const test_exeflags = Base.julia_cmd()
-filter!(test_exeflags.exec) do c
-    return !(startswith(c, "--depwarn") || startswith(c, "--check-bounds"))
-end
-push!(test_exeflags.exec, "--check-bounds=yes")
-push!(test_exeflags.exec, "--startup-file=no")
-push!(test_exeflags.exec, "--depwarn=yes")
-push!(test_exeflags.exec, "--project=$(Base.active_project())")
-const test_exename = popfirst!(test_exeflags.exec)
-function addworker(X; kwargs...)
-    withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do
-        procs = addprocs(X; exename=test_exename, exeflags=test_exeflags, kwargs...)
-        @everywhere procs include($(joinpath(@__DIR__, "setup.jl")))
-        procs
-    end
-end
-addworker(min(jobs, length(tests)))
-
-# pretty print information about gc and mem usage
-testgroupheader = "Test"
-workerheader = "(Worker)"
-name_align        = maximum([textwidth(testgroupheader) + textwidth(" ") +
-                             textwidth(workerheader); map(x -> textwidth(x) +
-                             3 + ndigits(nworkers()), tests)])
-elapsed_align     = textwidth("Time (s)")
-gc_align      = textwidth("GC (s)")
-percent_align = textwidth("GC %")
-alloc_align   = textwidth("Alloc (MB)")
-rss_align     = textwidth("RSS (MB)")
-printstyled(" "^(name_align + textwidth(testgroupheader) - 3), " | ")
-printstyled("         | ---------------- CPU ---------------- |\n", color=:white)
-printstyled(testgroupheader, color=:white)
-printstyled(lpad(workerheader, name_align - textwidth(testgroupheader) + 1), " | ", color=:white)
-printstyled("Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) |\n", color=:white)
-print_lock = stdout isa Base.LibuvStream ? stdout.lock : ReentrantLock()
-if stderr isa Base.LibuvStream
-    stderr.lock = print_lock
-end
-function print_testworker_stats(test, wrkr, resp)
-    @nospecialize resp
-    lock(print_lock)
-    try
-        printstyled(test, color=:white)
-        printstyled(lpad("($wrkr)", name_align - textwidth(test) + 1, " "), " | ", color=:white)
-        time_str = @sprintf("%7.2f",resp[2])
-        printstyled(lpad(time_str, elapsed_align, " "), " | ", color=:white)
-
-        cpu_gc_str = @sprintf("%5.2f", resp[4])
-        printstyled(lpad(cpu_gc_str, gc_align, " "), " | ", color=:white)
-        # since there may be quite a few digits in the percentage,
-        # the left-padding here is less to make sure everything fits
-        cpu_percent_str = @sprintf("%4.1f", 100 * resp[4] / resp[2])
-        printstyled(lpad(cpu_percent_str, percent_align, " "), " | ", color=:white)
-        cpu_alloc_str = @sprintf("%5.2f", resp[3] / 2^20)
-        printstyled(lpad(cpu_alloc_str, alloc_align, " "), " | ", color=:white)
-
-        cpu_rss_str = @sprintf("%5.2f", resp[6] / 2^20)
-        printstyled(lpad(cpu_rss_str, rss_align, " "), " |\n", color=:white)
-    finally
-        unlock(print_lock)
-    end
-end
-global print_testworker_started = (name, wrkr)->begin
-end
-function print_testworker_errored(name, wrkr)
-    lock(print_lock)
-    try
-        printstyled(name, color=:red)
-        printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |",
-            " "^elapsed_align, " failed at $(now())\n", color=:red)
-    finally
-        unlock(print_lock)
+function testfilter(test)
+    if startswith(test, "helpers/")
+        return false
     end
+    return true
 end
 
-# run tasks
-t0 = now()
-results = []
-all_tasks = Task[]
-try
-    # Monitor stdin and kill this task on ^C
-    # but don't do this on Windows, because it may deadlock in the kernel
-    t = current_task()
-    running_tests = Dict{String, DateTime}()
-    if !Sys.iswindows() && isa(stdin, Base.TTY)
-        stdin_monitor = @async begin
-            term = REPL.Terminals.TTYTerminal("xterm", stdin, stdout, stderr)
-            try
-                REPL.Terminals.raw!(term, true)
-                while true
-                    c = read(term, Char)
-                    if c == '\x3'
-                        Base.throwto(t, InterruptException())
-                        break
-                    elseif c == '?'
-                        println("Currently running: ")
-                        tests = sort(collect(running_tests), by=x->x[2])
-                        foreach(tests) do (test, date)
-                            println(test, " (running for ", round(now()-date, Minute), ")")
-                        end
-                    end
-                end
-            catch e
-                isa(e, InterruptException) || rethrow()
-            finally
-                REPL.Terminals.raw!(term, false)
-            end
-        end
-    end
-    @sync begin
-        function recycle_worker(p)
-            rmprocs(p, waitfor=30)
-            return nothing
-        end
-
-        for p in workers()
-            @async begin
-                push!(all_tasks, current_task())
-                while length(tests) > 0
-                    test = popfirst!(tests)
-
-                    # sometimes a worker failed, and we need to spawn a new one
-                    if p === nothing
-                        p = addworker(1)[1]
-                    end
-                    wrkr = p
-
-                    local resp
-
-                    # run the test
-                    running_tests[test] = now()
-                    try
-                        resp = remotecall_fetch(runtests, wrkr, test_runners[test], test)
-                    catch e
-                        isa(e, InterruptException) && return
-                        resp = Any[e]
-                    end
-                    delete!(running_tests, test)
-                    push!(results, (test, resp))
-
-                    # act on the results
-                    if resp[1] isa Exception
-                        print_testworker_errored(test, wrkr)
-                        do_quickfail && Base.throwto(t, InterruptException())
-
-                        # the worker encountered some failure, recycle it
-                        # so future tests get a fresh environment
-                        p = recycle_worker(p)
-                    else
-                        print_testworker_stats(test, wrkr, resp)
-
-                        cpu_rss = resp[6]
-                        if haskey(ENV, "CI") && cpu_rss > 3*2^30
-                            # XXX: collecting garbage
-                            #      after each test, we are leaking CPU memory somewhere.
-                            #      this is a problem on CI, where2 we don't have much RAM.
-                            #      work around this by periodically recycling the worker.
-                            p = recycle_worker(p)
-                        end
-                    end
-                end
-
-                if p !== nothing
-                    recycle_worker(p)
-                end
-            end
-        end
-    end
-catch e
-    isa(e, InterruptException) || rethrow()
-    # If the test suite was merely interrupted, still print the
-    # summary, which can be useful to diagnose what's going on
-    foreach(task -> begin
-            istaskstarted(task) || return
-            istaskdone(task) && return
-            try
-                schedule(task, InterruptException(); error=true)
-            catch ex
-                @error "InterruptException" exception=ex,catch_backtrace()
-            end
-        end, all_tasks)
-    for t in all_tasks
-        # NOTE: we can't just wait, but need to discard the exception,
-        #       because the throwto for --quickfail also kills the worker.
-        try
-            wait(t)
-        catch e
-            showerror(stderr, e)
-        end
-    end
-finally
-    if @isdefined stdin_monitor
-        schedule(stdin_monitor, InterruptException(); error=true)
-    end
-end
-t1 = now()
-elapsed = canonicalize(Dates.CompoundPeriod(t1-t0))
-println("Testing finished in $elapsed")
-
-# construct a testset to render the test results
-o_ts = Test.DefaultTestSet("Overall")
-function with_testset(f, testset)
-    @static if VERSION >= v"1.13.0-DEV.1044"
-        Test.@with_testset testset f()
-    else
-        Test.push_testset(testset)
-        try
-            f()
-        finally
-            Test.pop_testset()
-        end
-    end
-end
-with_testset(o_ts) do
-    completed_tests = Set{String}()
-    for (testname, (resp,)) in results
-        push!(completed_tests, testname)
-        if isa(resp, Test.DefaultTestSet)
-            with_testset(resp) do
-                Test.record(o_ts, resp)
-            end
-        elseif isa(resp, Tuple{Int,Int})
-            fake = Test.DefaultTestSet(testname)
-            for i in 1:resp[1]
-                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing))
-            end
-            for i in 1:resp[2]
-                Test.record(fake, Test.Broken(:test, nothing))
-            end
-            with_testset(fake) do
-                Test.record(o_ts, fake)
-            end
-        elseif isa(resp, RemoteException) && isa(resp.captured.ex, Test.TestSetException)
-            println("Worker $(resp.pid) failed running test $(testname):")
-            Base.showerror(stdout, resp.captured)
-            println()
-            fake = Test.DefaultTestSet(testname)
-            for i in 1:resp.captured.ex.pass
-                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing))
-            end
-            for i in 1:resp.captured.ex.broken
-                Test.record(fake, Test.Broken(:test, nothing))
-            end
-            for t in resp.captured.ex.errors_and_fails
-                Test.record(fake, t)
-            end
-            with_testset(fake) do
-                Test.record(o_ts, fake)
-            end
-        else
-            if !isa(resp, Exception)
-                resp = ErrorException(string("Unknown result type : ", typeof(resp)))
-            end
-            # If this test raised an exception that is not a remote testset exception,
-            # i.e. not a RemoteException capturing a TestSetException that means
-            # the test runner itself had some problem, so we may have hit a segfault,
-            # deserialization errors or something similar.  Record this testset as Errored.
-            fake = Test.DefaultTestSet(testname)
-            Test.record(fake, Test.Error(:nontest_error, testname, nothing, Base.ExceptionStack([(exception=resp,backtrace=[])]), LineNumberNode(1)))
-            with_testset(fake) do
-                Test.record(o_ts, fake)
-            end
-        end
-    end
-    for test in tests
-        (test in completed_tests) && continue
-        fake = Test.DefaultTestSet(test)
-        Test.record(fake, Test.Error(:test_interrupted, test, nothing, Base.ExceptionStack([(exception="skipped",backtrace=[])]), LineNumberNode(1)))
-        with_testset(fake) do
-            Test.record(o_ts, fake)
-        end
-    end
-end
-println()
-Test.print_test_results(o_ts, 1)
-if (VERSION >= v"1.13.0-DEV.1037" && !Test.anynonpass(o_ts)) ||
-   (VERSION < v"1.13.0-DEV.1037" && !o_ts.anynonpass)
-    println("    \033[32;1mSUCCESS\033[0m")
-else
-    println("    \033[31;1mFAILURE\033[0m\n")
-    Test.print_test_errors(o_ts)
-    throw(Test.FallbackTestSetException("Test run finished with errors"))
-end
+runtests(ARGS; init_code, testfilter)
diff --git a/test/setup.jl b/test/setup.jl
deleted file mode 100644
index b73323aa..00000000
--- a/test/setup.jl
+++ /dev/null
@@ -1,82 +0,0 @@
-using Distributed, Test, GPUCompiler, LLVM
-
-using SPIRV_LLVM_Backend_jll, SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll
-
-# include all helpers
-include(joinpath(@__DIR__, "helpers", "runtime.jl"))
-for file in readdir(joinpath(@__DIR__, "helpers"))
-    if endswith(file, ".jl") && file != "runtime.jl"
-        include(joinpath(@__DIR__, "helpers", file))
-    end
-end
-using .FileCheck
-
-if VERSION >= v"1.13.0-DEV.1044"
-using Base.ScopedValues
-end
-
-
-## entry point
-
-function runtests(f, name)
-    function inner()
-        # generate a temporary module to execute the tests in
-        mod_name = Symbol("Test", rand(1:100), "Main_", replace(name, '/' => '_'))
-        mod = @eval(Main, module $mod_name end)
-        @eval(mod, using Test, Random, GPUCompiler)
-
-        let id = myid()
-            wait(@spawnat 1 print_testworker_started(name, id))
-        end
-
-        ex = quote
-            GC.gc(true)
-            Random.seed!(1)
-
-            @timed @testset $"$name" begin
-                $f()
-            end
-        end
-        data = Core.eval(mod, ex)
-        #data[1] is the testset
-
-        # process results
-        cpu_rss = Sys.maxrss()
-        if VERSION >= v"1.11.0-DEV.1529"
-            tc = Test.get_test_counts(data[1])
-            passes,fails,error,broken,c_passes,c_fails,c_errors,c_broken =
-                tc.passes, tc.fails, tc.errors, tc.broken, tc.cumulative_passes,
-                tc.cumulative_fails, tc.cumulative_errors, tc.cumulative_broken
-        else
-            passes,fails,errors,broken,c_passes,c_fails,c_errors,c_broken =
-                Test.get_test_counts(data[1])
-        end
-        if data[1].anynonpass == false
-            data = ((passes+c_passes,broken+c_broken),
-                    data[2],
-                    data[3],
-                    data[4],
-                    data[5])
-        end
-        res = vcat(collect(data), cpu_rss)
-
-        GC.gc(true)
-        res
-    end
-
-    @static if VERSION >= v"1.13.0-DEV.1044"
-        @with Test.TESTSET_PRINT_ENABLE=>false begin
-            inner()
-        end
-    else
-        old_print_setting = Test.TESTSET_PRINT_ENABLE[]
-        Test.TESTSET_PRINT_ENABLE[] = false
-        try
-            inner()
-        finally
-            Test.TESTSET_PRINT_ENABLE[] = old_print_setting
-        end
-    end
-end
-
-nothing # File is loaded via a remotecall to "include". Ensure it returns "nothing".

From 321f2bcad8d8150914b270b849d5429de04d32af Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Sat, 11 Oct 2025 02:11:36 +0200
Subject: [PATCH 2/4] Update test/runtests.jl

---
 test/runtests.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 4c3ac201..8bfcd50a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,6 +18,13 @@ function testfilter(test)
     if startswith(test, "helpers/")
         return false
     end
+    if LLVM.is_asserts() && test == "gcn" 
+        # XXX: GCN's non-0 stack address space triggers LLVM assertions due to Julia bugs
+        return false
+     end
+     if VERSION < v"1.11" && test in ("ptx/precompile", "native/precompile")
+         return false   
+     end
     return true
 end
 

From a7da8e259782099dd14683acd397c27f76ae98d0 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Sat, 11 Oct 2025 02:15:19 +0200
Subject: [PATCH 3/4] fixup

---
 test/runtests.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 8bfcd50a..b03fe540 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,5 @@
 using ParallelTestRunner 
+import LLVM
 
 const init_code = quote
     using Test, GPUCompiler, LLVM

From e826b693a190d28dfbe28a6c90f983045913b139 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Sat, 11 Oct 2025 07:35:46 -0400
Subject: [PATCH 4/4] Update test/Project.toml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Mosè Giordano <765740+giordano@users.noreply.github.com>
---
 test/Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/Project.toml b/test/Project.toml
index 32edd7d4..fd0a2f0d 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -17,7 +17,7 @@ demumble_jll = "1e29f10c-031c-5a83-9565-69cddfc27673"
 
 [compat]
 Aqua = "0.8"
-ParallelTestRunner = "0.1.1"
+ParallelTestRunner = "0.1.2"
 
 [extras]
 GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"