diff --git a/Project.toml b/Project.toml index 97d5dd23..4fdf1971 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,7 @@ Preferences = "21216c6a-2e73-6563-6e65-726566657250" Scratch = "6c6a2e73-6563-6170-7368-637461726353" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" -TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +Tracy = "e689c965-62c8-4b79-b2c5-8359227902fd" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] @@ -28,6 +28,6 @@ Preferences = "1" Scratch = "1" Serialization = "1" TOML = "1" -TimerOutputs = "0.5" +Tracy = "0.1.4" UUIDs = "1" julia = "1.10" diff --git a/src/GPUCompiler.jl b/src/GPUCompiler.jl index a02ba8b3..5d337e5e 100644 --- a/src/GPUCompiler.jl +++ b/src/GPUCompiler.jl @@ -3,7 +3,7 @@ module GPUCompiler using LLVM using LLVM.Interop -using TimerOutputs +using Tracy using ExprTools: splitdef, combinedef @@ -64,6 +64,8 @@ function __init__() end mkpath(dir) global compile_cache = dir + + Tracy.@register_tracepoints() end end # module diff --git a/src/driver.jl b/src/driver.jl index 3b372e01..d4c6ac70 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -82,7 +82,7 @@ function compile_unhooked(output::Symbol, @nospecialize(job::CompilerJob); kwarg error("No active LLVM context. Use `JuliaContext()` do-block syntax to create one.") end - @timeit_debug to "Validation" begin + @tracepoint "Validation" begin check_method(job) # not optional job.config.validate && check_invocation(job) end @@ -96,7 +96,7 @@ function compile_unhooked(output::Symbol, @nospecialize(job::CompilerJob); kwarg if output == :llvm if job.config.strip - @timeit_debug to "strip debug info" strip_debuginfo!(ir) + @tracepoint "strip debug info" strip_debuginfo!(ir) end return ir, ir_meta @@ -168,7 +168,7 @@ const __llvm_initialized = Ref(false) __llvm_initialized[] = true end - @timeit_debug to "IR generation" begin + @tracepoint "IR generation" begin ir, compiled = irgen(job) if job.config.entry_abi === :specfunc entry_fn = compiled[job.source].specfunc @@ -267,21 +267,21 @@ const __llvm_initialized = Ref(false) runtime_intrinsics = ["julia.gc_alloc_obj"] end - @timeit_debug to "Library linking" begin + @tracepoint "Library linking" begin # target-specific libraries undefined_fns = LLVM.name.(decls(ir)) - @timeit_debug to "target libraries" link_libraries!(job, ir, undefined_fns) + @tracepoint "target libraries" link_libraries!(job, ir, undefined_fns) # GPU run-time library if !uses_julia_runtime(job) && any(fn -> fn in runtime_fns || fn in runtime_intrinsics, undefined_fns) - @timeit_debug to "runtime library" link_library!(ir, runtime) + @tracepoint "runtime library" link_library!(ir, runtime) end end end - @timeit_debug to "IR post-processing" begin + @tracepoint "IR post-processing" begin # mark everything internal except for entrypoints and any exported # global variables. this makes sure that the optimizer can, e.g., # rewrite function signatures. @@ -312,7 +312,7 @@ const __llvm_initialized = Ref(false) end if job.config.toplevel && job.config.optimize - @timeit_debug to "optimization" begin + @tracepoint "optimization" begin optimize!(job, ir; job.config.opt_level) # deferred codegen has some special optimization requirements, @@ -339,7 +339,7 @@ const __llvm_initialized = Ref(false) end if job.config.toplevel && job.config.cleanup - @timeit_debug to "clean-up" begin + @tracepoint "clean-up" begin @dispose pb=NewPMPassBuilder() begin add!(pb, RecomputeGlobalsAAPass()) add!(pb, GlobalOptPass()) @@ -379,13 +379,13 @@ const __llvm_initialized = Ref(false) end if job.config.toplevel && job.config.validate - @timeit_debug to "Validation" begin + @tracepoint "Validation" begin check_ir(job, ir) end end if should_verify() - @timeit_debug to "verification" verify(ir) + @tracepoint "verification" verify(ir) end return ir, (; entry, compiled) @@ -395,13 +395,13 @@ end format::LLVM.API.LLVMCodeGenFileType) # NOTE: strip after validation to get better errors if job.config.strip - @timeit_debug to "Debug info removal" strip_debuginfo!(ir) + @tracepoint "Debug info removal" strip_debuginfo!(ir) end - @timeit_debug to "LLVM back-end" begin - @timeit_debug to "preparation" prepare_execution!(job, ir) + @tracepoint "LLVM back-end" begin + @tracepoint "preparation" prepare_execution!(job, ir) - code = @timeit_debug to "machine-code generation" mcgen(job, ir, format) + code = @tracepoint "machine-code generation" mcgen(job, ir, format) end return code, () diff --git a/src/gcn.jl b/src/gcn.jl index 0774a203..6030773e 100644 --- a/src/gcn.jl +++ b/src/gcn.jl @@ -61,7 +61,7 @@ end function lower_throw_extra!(mod::LLVM.Module) job = current_job::CompilerJob changed = false - @timeit_debug to "lower throw (extra)" begin + @tracepoint "lower throw (extra)" begin throw_functions = [ r"julia_bounds_error.*", diff --git a/src/irgen.jl b/src/irgen.jl index cde75090..e9ac9490 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -1,7 +1,7 @@ # LLVM IR generation function irgen(@nospecialize(job::CompilerJob)) - mod, compiled = @timeit_debug to "emission" compile_method_instance(job) + mod, compiled = @tracepoint "emission" compile_method_instance(job) if job.config.entry_abi === :specfunc entry_fn = compiled[job.source].specfunc else @@ -11,7 +11,7 @@ function irgen(@nospecialize(job::CompilerJob)) entry = functions(mod)[entry_fn] # clean up incompatibilities - @timeit_debug to "clean-up" begin + @tracepoint "clean-up" begin for llvmf in functions(mod) if Base.isdebugbuild() # only occurs in debug builds @@ -81,7 +81,7 @@ function irgen(@nospecialize(job::CompilerJob)) (; compiled[job.source].ci, func, specfunc) # minimal required optimization - @timeit_debug to "rewrite" begin + @tracepoint "rewrite" begin if job.config.kernel && needs_byval(job) # pass all bitstypes by value; by default Julia passes aggregates by reference # (this improves performance, and is mandated by certain back-ends like SPIR-V). @@ -136,7 +136,7 @@ end function lower_throw!(mod::LLVM.Module) job = current_job::CompilerJob changed = false - @timeit_debug to "lower throw" begin + @tracepoint "lower throw" begin throw_functions = [ # unsupported runtime functions that are used to throw specific exceptions @@ -370,7 +370,7 @@ end # https://reviews.llvm.org/D79744 function lower_byval(@nospecialize(job::CompilerJob), mod::LLVM.Module, f::LLVM.Function) ft = function_type(f) - @timeit_debug to "lower byval" begin + @tracepoint "lower byval" begin # classify the arguments args = classify_arguments(job, ft) diff --git a/src/ptx.jl b/src/ptx.jl index 4b772f7f..0c0a556b 100644 --- a/src/ptx.jl +++ b/src/ptx.jl @@ -387,7 +387,7 @@ function nvvm_reflect!(fun::LLVM.Function) job = current_job::CompilerJob mod = LLVM.parent(fun) changed = false - @timeit_debug to "nvvmreflect" begin + @tracepoint "nvvmreflect" begin # find and sanity check the nnvm-reflect function # TODO: also handle the llvm.nvvm.reflect intrinsic diff --git a/src/spirv.jl b/src/spirv.jl index 7e27c346..d4a4fd10 100644 --- a/src/spirv.jl +++ b/src/spirv.jl @@ -212,7 +212,7 @@ end function rm_trap!(mod::LLVM.Module) job = current_job::CompilerJob changed = false - @timeit_debug to "remove trap" begin + @tracepoint "remove trap" begin if haskey(functions(mod), "llvm.trap") trap = functions(mod)["llvm.trap"] @@ -238,7 +238,7 @@ end function rm_freeze!(mod::LLVM.Module) job = current_job::CompilerJob changed = false - @timeit_debug to "remove freeze" begin + @tracepoint "remove freeze" begin for f in functions(mod), bb in blocks(f), inst in instructions(bb) if inst isa LLVM.FreezeInst diff --git a/src/utils.jl b/src/utils.jl index 04995428..cbd1a67b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -2,15 +2,6 @@ defs(mod::LLVM.Module) = filter(f -> !isdeclaration(f), collect(functions(mod)) decls(mod::LLVM.Module) = filter(f -> isdeclaration(f) && !LLVM.isintrinsic(f), collect(functions(mod))) -## timings - -const to = TimerOutput() - -timings() = (TimerOutputs.print_timer(to); println()) - -enable_timings() = (TimerOutputs.enable_debug_timings(GPUCompiler); return) - - ## debug verification should_verify() = ccall(:jl_is_debugbuild, Cint, ()) == 1 ||