diff --git a/external/sources/tracy/LICENSE b/external/sources/tracy/LICENSE index 72a6fe1c38..6201608219 100644 --- a/external/sources/tracy/LICENSE +++ b/external/sources/tracy/LICENSE @@ -1,7 +1,7 @@ Tracy Profiler (https://github.com/wolfpld/tracy) is licensed under the 3-clause BSD license. -Copyright (c) 2017-2023, Bartosz Taudul +Copyright (c) 2017-2025, Bartosz Taudul All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/external/sources/tracy/meson.build b/external/sources/tracy/meson.build index 7756490e86..bf2d8c4304 100644 --- a/external/sources/tracy/meson.build +++ b/external/sources/tracy/meson.build @@ -1,103 +1,138 @@ -project('tracy', ['cpp'], version: '0.10.0') +project('tracy', ['cpp'], version: '0.12.2', meson_version: '>=1.3.0', default_options : ['cpp_std=c++11']) + +# internal compiler flags +tracy_compile_args = [] + +# compiler flags shared between the capture library itself and the code using it +tracy_common_args = [] + +# dependencies that will be propagated to the users of the capture library +tracy_public_deps = [] if get_option('tracy_enable') - add_project_arguments('-DTRACY_ENABLE', language : 'cpp') + tracy_common_args += ['-DTRACY_ENABLE'] endif -if get_option('tracy_on_demand') - add_project_arguments('-DTRACY_ON_DEMAND', language : 'cpp') +if get_option('on_demand') + tracy_common_args += ['-DTRACY_ON_DEMAND'] endif -if get_option('tracy_callstack') - add_project_arguments('-DTRACY_CALLSTACK', language : 'cpp') +if get_option('callstack') + tracy_common_args += ['-DTRACY_CALLSTACK'] endif -if get_option('tracy_no_callstack') - add_project_arguments('-DTRACY_NO_CALLSTACK', language : 'cpp') +if get_option('no_callstack') + tracy_common_args += ['-DTRACY_NO_CALLSTACK'] endif -if get_option('tracy_no_callstack_inlines') - add_project_arguments('-DTRACY_NO_CALLSTACK_INLINES', language : 'cpp') +if get_option('no_callstack_inlines') + tracy_common_args += ['-DTRACY_NO_CALLSTACK_INLINES'] endif -if get_option('tracy_only_localhost') - add_project_arguments('-DTRACY_ONLY_LOCALHOST', language : 'cpp') +if get_option('only_localhost') + tracy_common_args += ['-DTRACY_ONLY_LOCALHOST'] endif -if get_option('tracy_no_broadcast') - add_project_arguments('-DTRACY_NO_BROADCAST', language : 'cpp') +if get_option('no_broadcast') + tracy_common_args += ['-DTRACY_NO_BROADCAST'] endif -if get_option('tracy_only_ipv4') - add_project_arguments('-DTRACY_ONLY_IPV4', language : 'cpp') +if get_option('only_ipv4') + tracy_common_args += ['-DTRACY_ONLY_IPV4'] endif -if get_option('tracy_no_code_transfer') - add_project_arguments('-DTRACY_NO_CODE_TRANSFER', language : 'cpp') +if get_option('no_code_transfer') + tracy_common_args += ['-DTRACY_NO_CODE_TRANSFER'] endif -if get_option('tracy_no_context_switch') - add_project_arguments('-DTRACY_NO_CONTEXT_SWITCH', language : 'cpp') +if get_option('no_context_switch') + tracy_common_args += ['-DTRACY_NO_CONTEXT_SWITCH'] endif -if get_option('tracy_no_exit') - add_project_arguments('-DTRACY_NO_EXIT', language : 'cpp') +if get_option('no_exit') + tracy_common_args += ['-DTRACY_NO_EXIT'] endif -if get_option('tracy_no_sampling') - add_project_arguments('-DTRACY_NO_SAMPLING', language : 'cpp') +if get_option('no_sampling') + tracy_common_args += ['-DTRACY_NO_SAMPLING'] endif -if get_option('tracy_no_verify') - add_project_arguments('-DTRACY_NO_VERIFY', language : 'cpp') +if get_option('no_verify') + tracy_common_args += ['-DTRACY_NO_VERIFY'] endif -if get_option('tracy_no_vsync_capture') - add_project_arguments('-DTRACY_NO_VSYNC_CAPTURE', language : 'cpp') +if get_option('no_vsync_capture') + tracy_common_args += ['-DTRACY_NO_VSYNC_CAPTURE'] endif -if get_option('tracy_no_frame_image') - add_project_arguments('-DTRACY_NO_FRAME_IMAGE', language : 'cpp') +if get_option('no_frame_image') + tracy_common_args += ['-DTRACY_NO_FRAME_IMAGE'] endif -if get_option('tracy_no_system_tracing') - add_project_arguments('-DTRACY_NO_SYSTEM_TRACING', language : 'cpp') +if get_option('no_system_tracing') + tracy_common_args += ['-DTRACY_NO_SYSTEM_TRACING'] endif -if get_option('tracy_patchable_nopsleds') - add_project_arguments('-DTRACY_PATCHABLE_NOPSLEDS', language : 'cpp') +if get_option('patchable_nopsleds') + tracy_common_args += ['-DTRACY_PATCHABLE_NOPSLEDS'] endif -if get_option('tracy_delayed_init') - add_project_arguments('-DTRACY_DELAYED_INIT', language : 'cpp') +if get_option('delayed_init') + tracy_common_args += ['-DTRACY_DELAYED_INIT'] endif -if get_option('tracy_manual_lifetime') - add_project_arguments('-DTRACY_MANUAL_LIFETIME', language : 'cpp') +if get_option('manual_lifetime') + tracy_common_args += ['-DTRACY_MANUAL_LIFETIME'] endif -if get_option('tracy_fibers') - add_project_arguments('-DTRACY_FIBERS', language : 'cpp') +if get_option('fibers') + tracy_common_args += ['-DTRACY_FIBERS'] endif -if get_option('tracy_timer_fallback') - add_project_arguments('-DTRACY_TIMER_FALLBACK', language : 'cpp') +if get_option('timer_fallback') + tracy_common_args += ['-DTRACY_TIMER_FALLBACK'] endif -tracy_shared_libs = get_option('tracy_shared_libs') -if tracy_shared_libs - add_project_arguments('-DTRACY_EXPORTS', language : 'cpp') +if get_option('no_crash_handler') + tracy_common_args += ['-DTRACY_NO_CRASH_HANDLER'] +endif + +if get_option('libunwind_backtrace') + tracy_common_args += ['-DTRACY_LIBUNWIND_BACKTRACE'] + tracy_public_deps += dependency('libunwind') +endif + +if get_option('symbol_offline_resolve') + tracy_compile_args += ['-DTRACY_SYMBOL_OFFLINE_RESOLVE'] +endif + +if get_option('libbacktrace_elf_dynload_support') + tracy_compile_args += ['-DTRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT'] +endif + +if get_option('verbose') + tracy_common_args += ['-DTRACY_VERBOSE'] +endif + +if get_option('debuginfod') + tracy_common_args += ['-DTRACY_DEBUGINFOD'] + tracy_public_deps += dependency('libdebuginfod') endif -if get_option('tracy_no_crash_handler') - add_project_arguments('-DTRACY_NO_CRASH_HANDLER', language : 'cpp') +tracy_shared_libs = get_option('default_library') == 'shared' + +if tracy_shared_libs + tracy_compile_args += ['-DTRACY_EXPORTS'] endif -threads_dep = dependency('threads') +if host_machine.system() == 'windows' + tracy_compile_args += ['-DWINVER=0x0601', '-D_WIN32_WINNT=0x0601'] +endif includes = [ 'public/tracy/TracyC.h', 'public/tracy/Tracy.hpp', + 'public/tracy/TracyCUDA.hpp', 'public/tracy/TracyD3D11.hpp', 'public/tracy/TracyD3D12.hpp', 'public/tracy/TracyLua.hpp', @@ -116,11 +151,13 @@ client_includes = [ 'public/client/TracyDebug.hpp', 'public/client/TracyDxt1.hpp', 'public/client/TracyFastVector.hpp', + 'public/client/TracyKCore.hpp', 'public/client/TracyLock.hpp', 'public/client/TracyProfiler.hpp', 'public/client/TracyRingBuffer.hpp', 'public/client/TracyScoped.hpp', 'public/client/TracyStringHelpers.hpp', + 'public/client/TracySysPower.hpp', 'public/client/TracySysTime.hpp', 'public/client/TracySysTrace.hpp', 'public/client/TracyThread.hpp' @@ -130,7 +167,6 @@ common_includes = [ 'public/common/tracy_lz4.hpp', 'public/common/tracy_lz4hc.hpp', 'public/common/TracyAlign.hpp', - 'public/common/TracyAlign.hpp', 'public/common/TracyAlloc.hpp', 'public/common/TracyApi.h', 'public/common/TracyColor.hpp', @@ -155,40 +191,40 @@ tracy_public_include_dirs = include_directories('public') compiler = meson.get_compiler('cpp') override_options = [] -if compiler.get_id() != 'msvc' - override_options += 'cpp_std=c++11' +# MSVC c++ lib does not work properly with C++11 and compilation may fail +if compiler.has_define('_MSC_VER') and get_option('cpp_std') == 'c++11' + override_options += 'cpp_std=c++14' endif -if tracy_shared_libs - tracy = shared_library('tracy', tracy_src, tracy_header_files, - dependencies : [ threads_dep ], - include_directories : tracy_public_include_dirs, - override_options : override_options, - install : true) -else - tracy = static_library('tracy', tracy_src, tracy_header_files, - dependencies : [ threads_dep ], +tracy_compile_args += tracy_common_args + +tracy_deps = [dependency('threads')] + tracy_public_deps + +tracy = library('tracy', tracy_src, tracy_header_files, + dependencies : tracy_deps, include_directories : tracy_public_include_dirs, + cpp_args : tracy_compile_args, override_options : override_options, install : true) -endif -install_headers(includes) -install_headers(common_includes, subdir : 'common') -install_headers(client_includes, subdir : 'client') +install_headers(includes, subdir : 'tracy/tracy') +install_headers(common_includes, subdir : 'tracy/common') +install_headers(client_includes, subdir : 'tracy/client') -tracy_dep_compile_args = [] +tracy_dep_compile_args = tracy_common_args if tracy_shared_libs tracy_dep_compile_args += [ '-DTRACY_IMPORTS' ] endif +pkg = import('pkgconfig') +pkg.generate(tracy, + extra_cflags : tracy_dep_compile_args, + requires : tracy_public_deps) + tracy_dep = declare_dependency( compile_args : tracy_dep_compile_args, link_with : tracy, include_directories : tracy_public_include_dirs) -tracy_dep_dynamic = declare_dependency( - include_directories : tracy_public_include_dirs) - meson.override_dependency('tracy', tracy_dep) diff --git a/external/sources/tracy/meson.options b/external/sources/tracy/meson.options new file mode 100644 index 0000000000..938aef3864 --- /dev/null +++ b/external/sources/tracy/meson.options @@ -0,0 +1,27 @@ +option('tracy_enable', type : 'boolean', value : true, description : 'Enable profiling', yield: true) +option('on_demand', type : 'boolean', value : false, description : 'On-demand profiling') +option('callstack', type : 'boolean', value : false, description : 'Enfore callstack collection for tracy regions') +option('no_callstack', type : 'boolean', value : false, description : 'Disable all callstack related functionality') +option('no_callstack_inlines', type : 'boolean', value : false, description : 'Disables the inline functions in callstacks') +option('only_localhost', type : 'boolean', value : false, description : 'Only listen on the localhost interface') +option('no_broadcast', type : 'boolean', value : false, description : 'Disable client discovery by broadcast to local network') +option('only_ipv4', type : 'boolean', value : false, description : 'Tracy will only accept connections on IPv4 addresses (disable IPv6)') +option('no_code_transfer', type : 'boolean', value : false, description : 'Disable collection of source code') +option('no_context_switch', type : 'boolean', value : false, description : 'Disable capture of context switches') +option('no_exit', type : 'boolean', value : false, description : 'Client executable does not exit until all profile data is sent to server') +option('no_sampling', type : 'boolean', value : false, description : 'Disable call stack sampling') +option('no_verify', type : 'boolean', value : false, description : 'Disable zone validation for C API') +option('no_vsync_capture', type : 'boolean', value : false, description : 'Disable capture of hardware Vsync events') +option('no_frame_image', type : 'boolean', value : false, description : 'Disable the frame image support and its thread') +option('no_system_tracing', type : 'boolean', value : false, description : 'Disable systrace sampling') +option('patchable_nopsleds', type : 'boolean', value : false, description : 'Enable nopsleds for efficient patching by system-level tools (e.g. rr)') +option('timer_fallback', type : 'boolean', value : false, description : 'Use lower resolution timers') +option('libunwind_backtrace', type : 'boolean', value : false, description : 'Use libunwind backtracing where supported') +option('symbol_offline_resolve', type : 'boolean', value : false, description : 'Instead of full runtime symbol resolution, only resolve the image path and offset to enable offline symbol resolution') +option('libbacktrace_elf_dynload_support', type : 'boolean', value : false, description : 'Enable libbacktrace to support dynamically loaded elfs in symbol resolution resolution after the first symbol resolve operation') +option('delayed_init', type : 'boolean', value : false, description : 'Enable delayed initialization of the library (init on first call)') +option('manual_lifetime', type : 'boolean', value : false, description : 'Enable the manual lifetime management of the profile') +option('fibers', type : 'boolean', value : false, description : 'Enable fibers support') +option('no_crash_handler', type : 'boolean', value : false, description : 'Disable crash handling') +option('verbose', type : 'boolean', value : false, description : 'Enable verbose logging') +option('debuginfod', type : 'boolean', value : false, description : 'Enable debuginfod support') \ No newline at end of file diff --git a/external/sources/tracy/meson_options.txt b/external/sources/tracy/meson_options.txt deleted file mode 100644 index 3fad341c83..0000000000 --- a/external/sources/tracy/meson_options.txt +++ /dev/null @@ -1,23 +0,0 @@ -option('tracy_enable', type : 'boolean', value : true, description : 'Enable profiling') -option('tracy_on_demand', type : 'boolean', value : false, description : 'On-demand profiling') -option('tracy_callstack', type : 'boolean', value : false, description : 'Enfore callstack collection for tracy regions') -option('tracy_no_callstack', type : 'boolean', value : false, description : 'Disable all callstack related functionality') -option('tracy_no_callstack_inlines', type : 'boolean', value : false, description : 'Disables the inline functions in callstacks') -option('tracy_only_localhost', type : 'boolean', value : false, description : 'Only listen on the localhost interface') -option('tracy_no_broadcast', type : 'boolean', value : false, description : 'Disable client discovery by broadcast to local network') -option('tracy_only_ipv4', type : 'boolean', value : false, description : 'Tracy will only accept connections on IPv4 addresses (disable IPv6)') -option('tracy_no_code_transfer', type : 'boolean', value : false, description : 'Disable collection of source code') -option('tracy_no_context_switch', type : 'boolean', value : false, description : 'Disable capture of context switches') -option('tracy_no_exit', type : 'boolean', value : false, description : 'Client executable does not exit until all profile data is sent to server') -option('tracy_no_sampling', type : 'boolean', value : false, description : 'Disable call stack sampling') -option('tracy_no_verify', type : 'boolean', value : false, description : 'Disable zone validation for C API') -option('tracy_no_vsync_capture', type : 'boolean', value : false, description : 'Disable capture of hardware Vsync events') -option('tracy_no_frame_image', type : 'boolean', value : false, description : 'Disable the frame image support and its thread') -option('tracy_no_system_tracing', type : 'boolean', value : false, description : 'Disable systrace sampling') -option('tracy_patchable_nopsleds', type : 'boolean', value : false, description : 'Enable nopsleds for efficient patching by system-level tools (e.g. rr)') -option('tracy_timer_fallback', type : 'boolean', value : false, description : 'Use lower resolution timers') -option('tracy_delayed_init', type : 'boolean', value : false, description : 'Enable delayed initialization of the library (init on first call)') -option('tracy_manual_lifetime', type : 'boolean', value : false, description : 'Enable the manual lifetime management of the profile') -option('tracy_fibers', type : 'boolean', value : false, description : 'Enable fibers support') -option('tracy_shared_libs', type : 'boolean', value : false, description : 'Builds Tracy as a shared object') -option('tracy_no_crash_handler', type : 'boolean', value : false, description : 'Disable crash handling') diff --git a/external/sources/tracy/public/TracyClient.F90 b/external/sources/tracy/public/TracyClient.F90 new file mode 100644 index 0000000000..7c24648aa7 --- /dev/null +++ b/external/sources/tracy/public/TracyClient.F90 @@ -0,0 +1,1292 @@ +module tracy + use, intrinsic :: iso_c_binding, only: c_ptr, c_loc, c_char, c_null_char, & + & c_size_t, c_int8_t, c_int16_t, c_int32_t, c_int64_t, c_int, c_float, c_double, c_null_ptr + implicit none + private + + integer(c_int32_t), parameter, public :: TRACY_PLOTFORMAT_NUMBER = 0 + integer(c_int32_t), parameter, public :: TRACY_PLOTFORMAT_MEMORY = 1 + integer(c_int32_t), parameter, public :: TRACY_PLOTFORMAT_PERCENTAGE = 2 + integer(c_int32_t), parameter, public :: TRACY_PLOTFORMAT_WATT = 3 + + character(c_char), parameter, public :: tracy_null_char = c_null_char + + type, bind(C) :: TracyColors_t + integer(c_int32_t) :: Snow = int(Z'fffafa', kind=c_int32_t) + integer(c_int32_t) :: GhostWhite = int(Z'f8f8ff', kind=c_int32_t) + integer(c_int32_t) :: WhiteSmoke = int(Z'f5f5f5', kind=c_int32_t) + integer(c_int32_t) :: Gainsboro = int(Z'dcdcdc', kind=c_int32_t) + integer(c_int32_t) :: FloralWhite = int(Z'fffaf0', kind=c_int32_t) + integer(c_int32_t) :: OldLace = int(Z'fdf5e6', kind=c_int32_t) + integer(c_int32_t) :: Linen = int(Z'faf0e6', kind=c_int32_t) + integer(c_int32_t) :: AntiqueWhite = int(Z'faebd7', kind=c_int32_t) + integer(c_int32_t) :: PapayaWhip = int(Z'ffefd5', kind=c_int32_t) + integer(c_int32_t) :: BlanchedAlmond = int(Z'ffebcd', kind=c_int32_t) + integer(c_int32_t) :: Bisque = int(Z'ffe4c4', kind=c_int32_t) + integer(c_int32_t) :: PeachPuff = int(Z'ffdab9', kind=c_int32_t) + integer(c_int32_t) :: NavajoWhite = int(Z'ffdead', kind=c_int32_t) + integer(c_int32_t) :: Moccasin = int(Z'ffe4b5', kind=c_int32_t) + integer(c_int32_t) :: Cornsilk = int(Z'fff8dc', kind=c_int32_t) + integer(c_int32_t) :: Ivory = int(Z'fffff0', kind=c_int32_t) + integer(c_int32_t) :: LemonChiffon = int(Z'fffacd', kind=c_int32_t) + integer(c_int32_t) :: Seashell = int(Z'fff5ee', kind=c_int32_t) + integer(c_int32_t) :: Honeydew = int(Z'f0fff0', kind=c_int32_t) + integer(c_int32_t) :: MintCream = int(Z'f5fffa', kind=c_int32_t) + integer(c_int32_t) :: Azure = int(Z'f0ffff', kind=c_int32_t) + integer(c_int32_t) :: AliceBlue = int(Z'f0f8ff', kind=c_int32_t) + integer(c_int32_t) :: Lavender = int(Z'e6e6fa', kind=c_int32_t) + integer(c_int32_t) :: LavenderBlush = int(Z'fff0f5', kind=c_int32_t) + integer(c_int32_t) :: MistyRose = int(Z'ffe4e1', kind=c_int32_t) + integer(c_int32_t) :: White = int(Z'ffffff', kind=c_int32_t) + integer(c_int32_t) :: Black = int(Z'000000', kind=c_int32_t) + integer(c_int32_t) :: DarkSlateGray = int(Z'2f4f4f', kind=c_int32_t) + integer(c_int32_t) :: DarkSlateGrey = int(Z'2f4f4f', kind=c_int32_t) + integer(c_int32_t) :: DimGray = int(Z'696969', kind=c_int32_t) + integer(c_int32_t) :: DimGrey = int(Z'696969', kind=c_int32_t) + integer(c_int32_t) :: SlateGray = int(Z'708090', kind=c_int32_t) + integer(c_int32_t) :: SlateGrey = int(Z'708090', kind=c_int32_t) + integer(c_int32_t) :: LightSlateGray = int(Z'778899', kind=c_int32_t) + integer(c_int32_t) :: LightSlateGrey = int(Z'778899', kind=c_int32_t) + integer(c_int32_t) :: Gray = int(Z'bebebe', kind=c_int32_t) + integer(c_int32_t) :: Grey = int(Z'bebebe', kind=c_int32_t) + integer(c_int32_t) :: X11Gray = int(Z'bebebe', kind=c_int32_t) + integer(c_int32_t) :: X11Grey = int(Z'bebebe', kind=c_int32_t) + integer(c_int32_t) :: WebGray = int(Z'808080', kind=c_int32_t) + integer(c_int32_t) :: WebGrey = int(Z'808080', kind=c_int32_t) + integer(c_int32_t) :: LightGrey = int(Z'd3d3d3', kind=c_int32_t) + integer(c_int32_t) :: LightGray = int(Z'd3d3d3', kind=c_int32_t) + integer(c_int32_t) :: MidnightBlue = int(Z'191970', kind=c_int32_t) + integer(c_int32_t) :: Navy = int(Z'000080', kind=c_int32_t) + integer(c_int32_t) :: NavyBlue = int(Z'000080', kind=c_int32_t) + integer(c_int32_t) :: CornflowerBlue = int(Z'6495ed', kind=c_int32_t) + integer(c_int32_t) :: DarkSlateBlue = int(Z'483d8b', kind=c_int32_t) + integer(c_int32_t) :: SlateBlue = int(Z'6a5acd', kind=c_int32_t) + integer(c_int32_t) :: MediumSlateBlue = int(Z'7b68ee', kind=c_int32_t) + integer(c_int32_t) :: LightSlateBlue = int(Z'8470ff', kind=c_int32_t) + integer(c_int32_t) :: MediumBlue = int(Z'0000cd', kind=c_int32_t) + integer(c_int32_t) :: RoyalBlue = int(Z'4169e1', kind=c_int32_t) + integer(c_int32_t) :: Blue = int(Z'0000ff', kind=c_int32_t) + integer(c_int32_t) :: DodgerBlue = int(Z'1e90ff', kind=c_int32_t) + integer(c_int32_t) :: DeepSkyBlue = int(Z'00bfff', kind=c_int32_t) + integer(c_int32_t) :: SkyBlue = int(Z'87ceeb', kind=c_int32_t) + integer(c_int32_t) :: LightSkyBlue = int(Z'87cefa', kind=c_int32_t) + integer(c_int32_t) :: SteelBlue = int(Z'4682b4', kind=c_int32_t) + integer(c_int32_t) :: LightSteelBlue = int(Z'b0c4de', kind=c_int32_t) + integer(c_int32_t) :: LightBlue = int(Z'add8e6', kind=c_int32_t) + integer(c_int32_t) :: PowderBlue = int(Z'b0e0e6', kind=c_int32_t) + integer(c_int32_t) :: PaleTurquoise = int(Z'afeeee', kind=c_int32_t) + integer(c_int32_t) :: DarkTurquoise = int(Z'00ced1', kind=c_int32_t) + integer(c_int32_t) :: MediumTurquoise = int(Z'48d1cc', kind=c_int32_t) + integer(c_int32_t) :: Turquoise = int(Z'40e0d0', kind=c_int32_t) + integer(c_int32_t) :: Cyan = int(Z'00ffff', kind=c_int32_t) + integer(c_int32_t) :: Aqua = int(Z'00ffff', kind=c_int32_t) + integer(c_int32_t) :: LightCyan = int(Z'e0ffff', kind=c_int32_t) + integer(c_int32_t) :: CadetBlue = int(Z'5f9ea0', kind=c_int32_t) + integer(c_int32_t) :: MediumAquamarine = int(Z'66cdaa', kind=c_int32_t) + integer(c_int32_t) :: Aquamarine = int(Z'7fffd4', kind=c_int32_t) + integer(c_int32_t) :: DarkGreen = int(Z'006400', kind=c_int32_t) + integer(c_int32_t) :: DarkOliveGreen = int(Z'556b2f', kind=c_int32_t) + integer(c_int32_t) :: DarkSeaGreen = int(Z'8fbc8f', kind=c_int32_t) + integer(c_int32_t) :: SeaGreen = int(Z'2e8b57', kind=c_int32_t) + integer(c_int32_t) :: MediumSeaGreen = int(Z'3cb371', kind=c_int32_t) + integer(c_int32_t) :: LightSeaGreen = int(Z'20b2aa', kind=c_int32_t) + integer(c_int32_t) :: PaleGreen = int(Z'98fb98', kind=c_int32_t) + integer(c_int32_t) :: SpringGreen = int(Z'00ff7f', kind=c_int32_t) + integer(c_int32_t) :: LawnGreen = int(Z'7cfc00', kind=c_int32_t) + integer(c_int32_t) :: Green = int(Z'00ff00', kind=c_int32_t) + integer(c_int32_t) :: Lime = int(Z'00ff00', kind=c_int32_t) + integer(c_int32_t) :: X11Green = int(Z'00ff00', kind=c_int32_t) + integer(c_int32_t) :: WebGreen = int(Z'008000', kind=c_int32_t) + integer(c_int32_t) :: Chartreuse = int(Z'7fff00', kind=c_int32_t) + integer(c_int32_t) :: MediumSpringGreen = int(Z'00fa9a', kind=c_int32_t) + integer(c_int32_t) :: GreenYellow = int(Z'adff2f', kind=c_int32_t) + integer(c_int32_t) :: LimeGreen = int(Z'32cd32', kind=c_int32_t) + integer(c_int32_t) :: YellowGreen = int(Z'9acd32', kind=c_int32_t) + integer(c_int32_t) :: ForestGreen = int(Z'228b22', kind=c_int32_t) + integer(c_int32_t) :: OliveDrab = int(Z'6b8e23', kind=c_int32_t) + integer(c_int32_t) :: DarkKhaki = int(Z'bdb76b', kind=c_int32_t) + integer(c_int32_t) :: Khaki = int(Z'f0e68c', kind=c_int32_t) + integer(c_int32_t) :: PaleGoldenrod = int(Z'eee8aa', kind=c_int32_t) + integer(c_int32_t) :: LightGoldenrodYellow = int(Z'fafad2', kind=c_int32_t) + integer(c_int32_t) :: LightYellow = int(Z'ffffe0', kind=c_int32_t) + integer(c_int32_t) :: Yellow = int(Z'ffff00', kind=c_int32_t) + integer(c_int32_t) :: Gold = int(Z'ffd700', kind=c_int32_t) + integer(c_int32_t) :: LightGoldenrod = int(Z'eedd82', kind=c_int32_t) + integer(c_int32_t) :: Goldenrod = int(Z'daa520', kind=c_int32_t) + integer(c_int32_t) :: DarkGoldenrod = int(Z'b8860b', kind=c_int32_t) + integer(c_int32_t) :: RosyBrown = int(Z'bc8f8f', kind=c_int32_t) + integer(c_int32_t) :: IndianRed = int(Z'cd5c5c', kind=c_int32_t) + integer(c_int32_t) :: SaddleBrown = int(Z'8b4513', kind=c_int32_t) + integer(c_int32_t) :: Sienna = int(Z'a0522d', kind=c_int32_t) + integer(c_int32_t) :: Peru = int(Z'cd853f', kind=c_int32_t) + integer(c_int32_t) :: Burlywood = int(Z'deb887', kind=c_int32_t) + integer(c_int32_t) :: Beige = int(Z'f5f5dc', kind=c_int32_t) + integer(c_int32_t) :: Wheat = int(Z'f5deb3', kind=c_int32_t) + integer(c_int32_t) :: SandyBrown = int(Z'f4a460', kind=c_int32_t) + integer(c_int32_t) :: Tan = int(Z'd2b48c', kind=c_int32_t) + integer(c_int32_t) :: Chocolate = int(Z'd2691e', kind=c_int32_t) + integer(c_int32_t) :: Firebrick = int(Z'b22222', kind=c_int32_t) + integer(c_int32_t) :: Brown = int(Z'a52a2a', kind=c_int32_t) + integer(c_int32_t) :: DarkSalmon = int(Z'e9967a', kind=c_int32_t) + integer(c_int32_t) :: Salmon = int(Z'fa8072', kind=c_int32_t) + integer(c_int32_t) :: LightSalmon = int(Z'ffa07a', kind=c_int32_t) + integer(c_int32_t) :: Orange = int(Z'ffa500', kind=c_int32_t) + integer(c_int32_t) :: DarkOrange = int(Z'ff8c00', kind=c_int32_t) + integer(c_int32_t) :: Coral = int(Z'ff7f50', kind=c_int32_t) + integer(c_int32_t) :: LightCoral = int(Z'f08080', kind=c_int32_t) + integer(c_int32_t) :: Tomato = int(Z'ff6347', kind=c_int32_t) + integer(c_int32_t) :: OrangeRed = int(Z'ff4500', kind=c_int32_t) + integer(c_int32_t) :: Red = int(Z'ff0000', kind=c_int32_t) + integer(c_int32_t) :: HotPink = int(Z'ff69b4', kind=c_int32_t) + integer(c_int32_t) :: DeepPink = int(Z'ff1493', kind=c_int32_t) + integer(c_int32_t) :: Pink = int(Z'ffc0cb', kind=c_int32_t) + integer(c_int32_t) :: LightPink = int(Z'ffb6c1', kind=c_int32_t) + integer(c_int32_t) :: PaleVioletRed = int(Z'db7093', kind=c_int32_t) + integer(c_int32_t) :: Maroon = int(Z'b03060', kind=c_int32_t) + integer(c_int32_t) :: X11Maroon = int(Z'b03060', kind=c_int32_t) + integer(c_int32_t) :: WebMaroon = int(Z'800000', kind=c_int32_t) + integer(c_int32_t) :: MediumVioletRed = int(Z'c71585', kind=c_int32_t) + integer(c_int32_t) :: VioletRed = int(Z'd02090', kind=c_int32_t) + integer(c_int32_t) :: Magenta = int(Z'ff00ff', kind=c_int32_t) + integer(c_int32_t) :: Fuchsia = int(Z'ff00ff', kind=c_int32_t) + integer(c_int32_t) :: Violet = int(Z'ee82ee', kind=c_int32_t) + integer(c_int32_t) :: Plum = int(Z'dda0dd', kind=c_int32_t) + integer(c_int32_t) :: Orchid = int(Z'da70d6', kind=c_int32_t) + integer(c_int32_t) :: MediumOrchid = int(Z'ba55d3', kind=c_int32_t) + integer(c_int32_t) :: DarkOrchid = int(Z'9932cc', kind=c_int32_t) + integer(c_int32_t) :: DarkViolet = int(Z'9400d3', kind=c_int32_t) + integer(c_int32_t) :: BlueViolet = int(Z'8a2be2', kind=c_int32_t) + integer(c_int32_t) :: Purple = int(Z'a020f0', kind=c_int32_t) + integer(c_int32_t) :: X11Purple = int(Z'a020f0', kind=c_int32_t) + integer(c_int32_t) :: WebPurple = int(Z'800080', kind=c_int32_t) + integer(c_int32_t) :: MediumPurple = int(Z'9370db', kind=c_int32_t) + integer(c_int32_t) :: Thistle = int(Z'd8bfd8', kind=c_int32_t) + integer(c_int32_t) :: Snow1 = int(Z'fffafa', kind=c_int32_t) + integer(c_int32_t) :: Snow2 = int(Z'eee9e9', kind=c_int32_t) + integer(c_int32_t) :: Snow3 = int(Z'cdc9c9', kind=c_int32_t) + integer(c_int32_t) :: Snow4 = int(Z'8b8989', kind=c_int32_t) + integer(c_int32_t) :: Seashell1 = int(Z'fff5ee', kind=c_int32_t) + integer(c_int32_t) :: Seashell2 = int(Z'eee5de', kind=c_int32_t) + integer(c_int32_t) :: Seashell3 = int(Z'cdc5bf', kind=c_int32_t) + integer(c_int32_t) :: Seashell4 = int(Z'8b8682', kind=c_int32_t) + integer(c_int32_t) :: AntiqueWhite1 = int(Z'ffefdb', kind=c_int32_t) + integer(c_int32_t) :: AntiqueWhite2 = int(Z'eedfcc', kind=c_int32_t) + integer(c_int32_t) :: AntiqueWhite3 = int(Z'cdc0b0', kind=c_int32_t) + integer(c_int32_t) :: AntiqueWhite4 = int(Z'8b8378', kind=c_int32_t) + integer(c_int32_t) :: Bisque1 = int(Z'ffe4c4', kind=c_int32_t) + integer(c_int32_t) :: Bisque2 = int(Z'eed5b7', kind=c_int32_t) + integer(c_int32_t) :: Bisque3 = int(Z'cdb79e', kind=c_int32_t) + integer(c_int32_t) :: Bisque4 = int(Z'8b7d6b', kind=c_int32_t) + integer(c_int32_t) :: PeachPuff1 = int(Z'ffdab9', kind=c_int32_t) + integer(c_int32_t) :: PeachPuff2 = int(Z'eecbad', kind=c_int32_t) + integer(c_int32_t) :: PeachPuff3 = int(Z'cdaf95', kind=c_int32_t) + integer(c_int32_t) :: PeachPuff4 = int(Z'8b7765', kind=c_int32_t) + integer(c_int32_t) :: NavajoWhite1 = int(Z'ffdead', kind=c_int32_t) + integer(c_int32_t) :: NavajoWhite2 = int(Z'eecfa1', kind=c_int32_t) + integer(c_int32_t) :: NavajoWhite3 = int(Z'cdb38b', kind=c_int32_t) + integer(c_int32_t) :: NavajoWhite4 = int(Z'8b795e', kind=c_int32_t) + integer(c_int32_t) :: LemonChiffon1 = int(Z'fffacd', kind=c_int32_t) + integer(c_int32_t) :: LemonChiffon2 = int(Z'eee9bf', kind=c_int32_t) + integer(c_int32_t) :: LemonChiffon3 = int(Z'cdc9a5', kind=c_int32_t) + integer(c_int32_t) :: LemonChiffon4 = int(Z'8b8970', kind=c_int32_t) + integer(c_int32_t) :: Cornsilk1 = int(Z'fff8dc', kind=c_int32_t) + integer(c_int32_t) :: Cornsilk2 = int(Z'eee8cd', kind=c_int32_t) + integer(c_int32_t) :: Cornsilk3 = int(Z'cdc8b1', kind=c_int32_t) + integer(c_int32_t) :: Cornsilk4 = int(Z'8b8878', kind=c_int32_t) + integer(c_int32_t) :: Ivory1 = int(Z'fffff0', kind=c_int32_t) + integer(c_int32_t) :: Ivory2 = int(Z'eeeee0', kind=c_int32_t) + integer(c_int32_t) :: Ivory3 = int(Z'cdcdc1', kind=c_int32_t) + integer(c_int32_t) :: Ivory4 = int(Z'8b8b83', kind=c_int32_t) + integer(c_int32_t) :: Honeydew1 = int(Z'f0fff0', kind=c_int32_t) + integer(c_int32_t) :: Honeydew2 = int(Z'e0eee0', kind=c_int32_t) + integer(c_int32_t) :: Honeydew3 = int(Z'c1cdc1', kind=c_int32_t) + integer(c_int32_t) :: Honeydew4 = int(Z'838b83', kind=c_int32_t) + integer(c_int32_t) :: LavenderBlush1 = int(Z'fff0f5', kind=c_int32_t) + integer(c_int32_t) :: LavenderBlush2 = int(Z'eee0e5', kind=c_int32_t) + integer(c_int32_t) :: LavenderBlush3 = int(Z'cdc1c5', kind=c_int32_t) + integer(c_int32_t) :: LavenderBlush4 = int(Z'8b8386', kind=c_int32_t) + integer(c_int32_t) :: MistyRose1 = int(Z'ffe4e1', kind=c_int32_t) + integer(c_int32_t) :: MistyRose2 = int(Z'eed5d2', kind=c_int32_t) + integer(c_int32_t) :: MistyRose3 = int(Z'cdb7b5', kind=c_int32_t) + integer(c_int32_t) :: MistyRose4 = int(Z'8b7d7b', kind=c_int32_t) + integer(c_int32_t) :: Azure1 = int(Z'f0ffff', kind=c_int32_t) + integer(c_int32_t) :: Azure2 = int(Z'e0eeee', kind=c_int32_t) + integer(c_int32_t) :: Azure3 = int(Z'c1cdcd', kind=c_int32_t) + integer(c_int32_t) :: Azure4 = int(Z'838b8b', kind=c_int32_t) + integer(c_int32_t) :: SlateBlue1 = int(Z'836fff', kind=c_int32_t) + integer(c_int32_t) :: SlateBlue2 = int(Z'7a67ee', kind=c_int32_t) + integer(c_int32_t) :: SlateBlue3 = int(Z'6959cd', kind=c_int32_t) + integer(c_int32_t) :: SlateBlue4 = int(Z'473c8b', kind=c_int32_t) + integer(c_int32_t) :: RoyalBlue1 = int(Z'4876ff', kind=c_int32_t) + integer(c_int32_t) :: RoyalBlue2 = int(Z'436eee', kind=c_int32_t) + integer(c_int32_t) :: RoyalBlue3 = int(Z'3a5fcd', kind=c_int32_t) + integer(c_int32_t) :: RoyalBlue4 = int(Z'27408b', kind=c_int32_t) + integer(c_int32_t) :: Blue1 = int(Z'0000ff', kind=c_int32_t) + integer(c_int32_t) :: Blue2 = int(Z'0000ee', kind=c_int32_t) + integer(c_int32_t) :: Blue3 = int(Z'0000cd', kind=c_int32_t) + integer(c_int32_t) :: Blue4 = int(Z'00008b', kind=c_int32_t) + integer(c_int32_t) :: DodgerBlue1 = int(Z'1e90ff', kind=c_int32_t) + integer(c_int32_t) :: DodgerBlue2 = int(Z'1c86ee', kind=c_int32_t) + integer(c_int32_t) :: DodgerBlue3 = int(Z'1874cd', kind=c_int32_t) + integer(c_int32_t) :: DodgerBlue4 = int(Z'104e8b', kind=c_int32_t) + integer(c_int32_t) :: SteelBlue1 = int(Z'63b8ff', kind=c_int32_t) + integer(c_int32_t) :: SteelBlue2 = int(Z'5cacee', kind=c_int32_t) + integer(c_int32_t) :: SteelBlue3 = int(Z'4f94cd', kind=c_int32_t) + integer(c_int32_t) :: SteelBlue4 = int(Z'36648b', kind=c_int32_t) + integer(c_int32_t) :: DeepSkyBlue1 = int(Z'00bfff', kind=c_int32_t) + integer(c_int32_t) :: DeepSkyBlue2 = int(Z'00b2ee', kind=c_int32_t) + integer(c_int32_t) :: DeepSkyBlue3 = int(Z'009acd', kind=c_int32_t) + integer(c_int32_t) :: DeepSkyBlue4 = int(Z'00688b', kind=c_int32_t) + integer(c_int32_t) :: SkyBlue1 = int(Z'87ceff', kind=c_int32_t) + integer(c_int32_t) :: SkyBlue2 = int(Z'7ec0ee', kind=c_int32_t) + integer(c_int32_t) :: SkyBlue3 = int(Z'6ca6cd', kind=c_int32_t) + integer(c_int32_t) :: SkyBlue4 = int(Z'4a708b', kind=c_int32_t) + integer(c_int32_t) :: LightSkyBlue1 = int(Z'b0e2ff', kind=c_int32_t) + integer(c_int32_t) :: LightSkyBlue2 = int(Z'a4d3ee', kind=c_int32_t) + integer(c_int32_t) :: LightSkyBlue3 = int(Z'8db6cd', kind=c_int32_t) + integer(c_int32_t) :: LightSkyBlue4 = int(Z'607b8b', kind=c_int32_t) + integer(c_int32_t) :: SlateGray1 = int(Z'c6e2ff', kind=c_int32_t) + integer(c_int32_t) :: SlateGray2 = int(Z'b9d3ee', kind=c_int32_t) + integer(c_int32_t) :: SlateGray3 = int(Z'9fb6cd', kind=c_int32_t) + integer(c_int32_t) :: SlateGray4 = int(Z'6c7b8b', kind=c_int32_t) + integer(c_int32_t) :: LightSteelBlue1 = int(Z'cae1ff', kind=c_int32_t) + integer(c_int32_t) :: LightSteelBlue2 = int(Z'bcd2ee', kind=c_int32_t) + integer(c_int32_t) :: LightSteelBlue3 = int(Z'a2b5cd', kind=c_int32_t) + integer(c_int32_t) :: LightSteelBlue4 = int(Z'6e7b8b', kind=c_int32_t) + integer(c_int32_t) :: LightBlue1 = int(Z'bfefff', kind=c_int32_t) + integer(c_int32_t) :: LightBlue2 = int(Z'b2dfee', kind=c_int32_t) + integer(c_int32_t) :: LightBlue3 = int(Z'9ac0cd', kind=c_int32_t) + integer(c_int32_t) :: LightBlue4 = int(Z'68838b', kind=c_int32_t) + integer(c_int32_t) :: LightCyan1 = int(Z'e0ffff', kind=c_int32_t) + integer(c_int32_t) :: LightCyan2 = int(Z'd1eeee', kind=c_int32_t) + integer(c_int32_t) :: LightCyan3 = int(Z'b4cdcd', kind=c_int32_t) + integer(c_int32_t) :: LightCyan4 = int(Z'7a8b8b', kind=c_int32_t) + integer(c_int32_t) :: PaleTurquoise1 = int(Z'bbffff', kind=c_int32_t) + integer(c_int32_t) :: PaleTurquoise2 = int(Z'aeeeee', kind=c_int32_t) + integer(c_int32_t) :: PaleTurquoise3 = int(Z'96cdcd', kind=c_int32_t) + integer(c_int32_t) :: PaleTurquoise4 = int(Z'668b8b', kind=c_int32_t) + integer(c_int32_t) :: CadetBlue1 = int(Z'98f5ff', kind=c_int32_t) + integer(c_int32_t) :: CadetBlue2 = int(Z'8ee5ee', kind=c_int32_t) + integer(c_int32_t) :: CadetBlue3 = int(Z'7ac5cd', kind=c_int32_t) + integer(c_int32_t) :: CadetBlue4 = int(Z'53868b', kind=c_int32_t) + integer(c_int32_t) :: Turquoise1 = int(Z'00f5ff', kind=c_int32_t) + integer(c_int32_t) :: Turquoise2 = int(Z'00e5ee', kind=c_int32_t) + integer(c_int32_t) :: Turquoise3 = int(Z'00c5cd', kind=c_int32_t) + integer(c_int32_t) :: Turquoise4 = int(Z'00868b', kind=c_int32_t) + integer(c_int32_t) :: Cyan1 = int(Z'00ffff', kind=c_int32_t) + integer(c_int32_t) :: Cyan2 = int(Z'00eeee', kind=c_int32_t) + integer(c_int32_t) :: Cyan3 = int(Z'00cdcd', kind=c_int32_t) + integer(c_int32_t) :: Cyan4 = int(Z'008b8b', kind=c_int32_t) + integer(c_int32_t) :: DarkSlateGray1 = int(Z'97ffff', kind=c_int32_t) + integer(c_int32_t) :: DarkSlateGray2 = int(Z'8deeee', kind=c_int32_t) + integer(c_int32_t) :: DarkSlateGray3 = int(Z'79cdcd', kind=c_int32_t) + integer(c_int32_t) :: DarkSlateGray4 = int(Z'528b8b', kind=c_int32_t) + integer(c_int32_t) :: Aquamarine1 = int(Z'7fffd4', kind=c_int32_t) + integer(c_int32_t) :: Aquamarine2 = int(Z'76eec6', kind=c_int32_t) + integer(c_int32_t) :: Aquamarine3 = int(Z'66cdaa', kind=c_int32_t) + integer(c_int32_t) :: Aquamarine4 = int(Z'458b74', kind=c_int32_t) + integer(c_int32_t) :: DarkSeaGreen1 = int(Z'c1ffc1', kind=c_int32_t) + integer(c_int32_t) :: DarkSeaGreen2 = int(Z'b4eeb4', kind=c_int32_t) + integer(c_int32_t) :: DarkSeaGreen3 = int(Z'9bcd9b', kind=c_int32_t) + integer(c_int32_t) :: DarkSeaGreen4 = int(Z'698b69', kind=c_int32_t) + integer(c_int32_t) :: SeaGreen1 = int(Z'54ff9f', kind=c_int32_t) + integer(c_int32_t) :: SeaGreen2 = int(Z'4eee94', kind=c_int32_t) + integer(c_int32_t) :: SeaGreen3 = int(Z'43cd80', kind=c_int32_t) + integer(c_int32_t) :: SeaGreen4 = int(Z'2e8b57', kind=c_int32_t) + integer(c_int32_t) :: PaleGreen1 = int(Z'9aff9a', kind=c_int32_t) + integer(c_int32_t) :: PaleGreen2 = int(Z'90ee90', kind=c_int32_t) + integer(c_int32_t) :: PaleGreen3 = int(Z'7ccd7c', kind=c_int32_t) + integer(c_int32_t) :: PaleGreen4 = int(Z'548b54', kind=c_int32_t) + integer(c_int32_t) :: SpringGreen1 = int(Z'00ff7f', kind=c_int32_t) + integer(c_int32_t) :: SpringGreen2 = int(Z'00ee76', kind=c_int32_t) + integer(c_int32_t) :: SpringGreen3 = int(Z'00cd66', kind=c_int32_t) + integer(c_int32_t) :: SpringGreen4 = int(Z'008b45', kind=c_int32_t) + integer(c_int32_t) :: Green1 = int(Z'00ff00', kind=c_int32_t) + integer(c_int32_t) :: Green2 = int(Z'00ee00', kind=c_int32_t) + integer(c_int32_t) :: Green3 = int(Z'00cd00', kind=c_int32_t) + integer(c_int32_t) :: Green4 = int(Z'008b00', kind=c_int32_t) + integer(c_int32_t) :: Chartreuse1 = int(Z'7fff00', kind=c_int32_t) + integer(c_int32_t) :: Chartreuse2 = int(Z'76ee00', kind=c_int32_t) + integer(c_int32_t) :: Chartreuse3 = int(Z'66cd00', kind=c_int32_t) + integer(c_int32_t) :: Chartreuse4 = int(Z'458b00', kind=c_int32_t) + integer(c_int32_t) :: OliveDrab1 = int(Z'c0ff3e', kind=c_int32_t) + integer(c_int32_t) :: OliveDrab2 = int(Z'b3ee3a', kind=c_int32_t) + integer(c_int32_t) :: OliveDrab3 = int(Z'9acd32', kind=c_int32_t) + integer(c_int32_t) :: OliveDrab4 = int(Z'698b22', kind=c_int32_t) + integer(c_int32_t) :: DarkOliveGreen1 = int(Z'caff70', kind=c_int32_t) + integer(c_int32_t) :: DarkOliveGreen2 = int(Z'bcee68', kind=c_int32_t) + integer(c_int32_t) :: DarkOliveGreen3 = int(Z'a2cd5a', kind=c_int32_t) + integer(c_int32_t) :: DarkOliveGreen4 = int(Z'6e8b3d', kind=c_int32_t) + integer(c_int32_t) :: Khaki1 = int(Z'fff68f', kind=c_int32_t) + integer(c_int32_t) :: Khaki2 = int(Z'eee685', kind=c_int32_t) + integer(c_int32_t) :: Khaki3 = int(Z'cdc673', kind=c_int32_t) + integer(c_int32_t) :: Khaki4 = int(Z'8b864e', kind=c_int32_t) + integer(c_int32_t) :: LightGoldenrod1 = int(Z'ffec8b', kind=c_int32_t) + integer(c_int32_t) :: LightGoldenrod2 = int(Z'eedc82', kind=c_int32_t) + integer(c_int32_t) :: LightGoldenrod3 = int(Z'cdbe70', kind=c_int32_t) + integer(c_int32_t) :: LightGoldenrod4 = int(Z'8b814c', kind=c_int32_t) + integer(c_int32_t) :: LightYellow1 = int(Z'ffffe0', kind=c_int32_t) + integer(c_int32_t) :: LightYellow2 = int(Z'eeeed1', kind=c_int32_t) + integer(c_int32_t) :: LightYellow3 = int(Z'cdcdb4', kind=c_int32_t) + integer(c_int32_t) :: LightYellow4 = int(Z'8b8b7a', kind=c_int32_t) + integer(c_int32_t) :: Yellow1 = int(Z'ffff00', kind=c_int32_t) + integer(c_int32_t) :: Yellow2 = int(Z'eeee00', kind=c_int32_t) + integer(c_int32_t) :: Yellow3 = int(Z'cdcd00', kind=c_int32_t) + integer(c_int32_t) :: Yellow4 = int(Z'8b8b00', kind=c_int32_t) + integer(c_int32_t) :: Gold1 = int(Z'ffd700', kind=c_int32_t) + integer(c_int32_t) :: Gold2 = int(Z'eec900', kind=c_int32_t) + integer(c_int32_t) :: Gold3 = int(Z'cdad00', kind=c_int32_t) + integer(c_int32_t) :: Gold4 = int(Z'8b7500', kind=c_int32_t) + integer(c_int32_t) :: Goldenrod1 = int(Z'ffc125', kind=c_int32_t) + integer(c_int32_t) :: Goldenrod2 = int(Z'eeb422', kind=c_int32_t) + integer(c_int32_t) :: Goldenrod3 = int(Z'cd9b1d', kind=c_int32_t) + integer(c_int32_t) :: Goldenrod4 = int(Z'8b6914', kind=c_int32_t) + integer(c_int32_t) :: DarkGoldenrod1 = int(Z'ffb90f', kind=c_int32_t) + integer(c_int32_t) :: DarkGoldenrod2 = int(Z'eead0e', kind=c_int32_t) + integer(c_int32_t) :: DarkGoldenrod3 = int(Z'cd950c', kind=c_int32_t) + integer(c_int32_t) :: DarkGoldenrod4 = int(Z'8b6508', kind=c_int32_t) + integer(c_int32_t) :: RosyBrown1 = int(Z'ffc1c1', kind=c_int32_t) + integer(c_int32_t) :: RosyBrown2 = int(Z'eeb4b4', kind=c_int32_t) + integer(c_int32_t) :: RosyBrown3 = int(Z'cd9b9b', kind=c_int32_t) + integer(c_int32_t) :: RosyBrown4 = int(Z'8b6969', kind=c_int32_t) + integer(c_int32_t) :: IndianRed1 = int(Z'ff6a6a', kind=c_int32_t) + integer(c_int32_t) :: IndianRed2 = int(Z'ee6363', kind=c_int32_t) + integer(c_int32_t) :: IndianRed3 = int(Z'cd5555', kind=c_int32_t) + integer(c_int32_t) :: IndianRed4 = int(Z'8b3a3a', kind=c_int32_t) + integer(c_int32_t) :: Sienna1 = int(Z'ff8247', kind=c_int32_t) + integer(c_int32_t) :: Sienna2 = int(Z'ee7942', kind=c_int32_t) + integer(c_int32_t) :: Sienna3 = int(Z'cd6839', kind=c_int32_t) + integer(c_int32_t) :: Sienna4 = int(Z'8b4726', kind=c_int32_t) + integer(c_int32_t) :: Burlywood1 = int(Z'ffd39b', kind=c_int32_t) + integer(c_int32_t) :: Burlywood2 = int(Z'eec591', kind=c_int32_t) + integer(c_int32_t) :: Burlywood3 = int(Z'cdaa7d', kind=c_int32_t) + integer(c_int32_t) :: Burlywood4 = int(Z'8b7355', kind=c_int32_t) + integer(c_int32_t) :: Wheat1 = int(Z'ffe7ba', kind=c_int32_t) + integer(c_int32_t) :: Wheat2 = int(Z'eed8ae', kind=c_int32_t) + integer(c_int32_t) :: Wheat3 = int(Z'cdba96', kind=c_int32_t) + integer(c_int32_t) :: Wheat4 = int(Z'8b7e66', kind=c_int32_t) + integer(c_int32_t) :: Tan1 = int(Z'ffa54f', kind=c_int32_t) + integer(c_int32_t) :: Tan2 = int(Z'ee9a49', kind=c_int32_t) + integer(c_int32_t) :: Tan3 = int(Z'cd853f', kind=c_int32_t) + integer(c_int32_t) :: Tan4 = int(Z'8b5a2b', kind=c_int32_t) + integer(c_int32_t) :: Chocolate1 = int(Z'ff7f24', kind=c_int32_t) + integer(c_int32_t) :: Chocolate2 = int(Z'ee7621', kind=c_int32_t) + integer(c_int32_t) :: Chocolate3 = int(Z'cd661d', kind=c_int32_t) + integer(c_int32_t) :: Chocolate4 = int(Z'8b4513', kind=c_int32_t) + integer(c_int32_t) :: Firebrick1 = int(Z'ff3030', kind=c_int32_t) + integer(c_int32_t) :: Firebrick2 = int(Z'ee2c2c', kind=c_int32_t) + integer(c_int32_t) :: Firebrick3 = int(Z'cd2626', kind=c_int32_t) + integer(c_int32_t) :: Firebrick4 = int(Z'8b1a1a', kind=c_int32_t) + integer(c_int32_t) :: Brown1 = int(Z'ff4040', kind=c_int32_t) + integer(c_int32_t) :: Brown2 = int(Z'ee3b3b', kind=c_int32_t) + integer(c_int32_t) :: Brown3 = int(Z'cd3333', kind=c_int32_t) + integer(c_int32_t) :: Brown4 = int(Z'8b2323', kind=c_int32_t) + integer(c_int32_t) :: Salmon1 = int(Z'ff8c69', kind=c_int32_t) + integer(c_int32_t) :: Salmon2 = int(Z'ee8262', kind=c_int32_t) + integer(c_int32_t) :: Salmon3 = int(Z'cd7054', kind=c_int32_t) + integer(c_int32_t) :: Salmon4 = int(Z'8b4c39', kind=c_int32_t) + integer(c_int32_t) :: LightSalmon1 = int(Z'ffa07a', kind=c_int32_t) + integer(c_int32_t) :: LightSalmon2 = int(Z'ee9572', kind=c_int32_t) + integer(c_int32_t) :: LightSalmon3 = int(Z'cd8162', kind=c_int32_t) + integer(c_int32_t) :: LightSalmon4 = int(Z'8b5742', kind=c_int32_t) + integer(c_int32_t) :: Orange1 = int(Z'ffa500', kind=c_int32_t) + integer(c_int32_t) :: Orange2 = int(Z'ee9a00', kind=c_int32_t) + integer(c_int32_t) :: Orange3 = int(Z'cd8500', kind=c_int32_t) + integer(c_int32_t) :: Orange4 = int(Z'8b5a00', kind=c_int32_t) + integer(c_int32_t) :: DarkOrange1 = int(Z'ff7f00', kind=c_int32_t) + integer(c_int32_t) :: DarkOrange2 = int(Z'ee7600', kind=c_int32_t) + integer(c_int32_t) :: DarkOrange3 = int(Z'cd6600', kind=c_int32_t) + integer(c_int32_t) :: DarkOrange4 = int(Z'8b4500', kind=c_int32_t) + integer(c_int32_t) :: Coral1 = int(Z'ff7256', kind=c_int32_t) + integer(c_int32_t) :: Coral2 = int(Z'ee6a50', kind=c_int32_t) + integer(c_int32_t) :: Coral3 = int(Z'cd5b45', kind=c_int32_t) + integer(c_int32_t) :: Coral4 = int(Z'8b3e2f', kind=c_int32_t) + integer(c_int32_t) :: Tomato1 = int(Z'ff6347', kind=c_int32_t) + integer(c_int32_t) :: Tomato2 = int(Z'ee5c42', kind=c_int32_t) + integer(c_int32_t) :: Tomato3 = int(Z'cd4f39', kind=c_int32_t) + integer(c_int32_t) :: Tomato4 = int(Z'8b3626', kind=c_int32_t) + integer(c_int32_t) :: OrangeRed1 = int(Z'ff4500', kind=c_int32_t) + integer(c_int32_t) :: OrangeRed2 = int(Z'ee4000', kind=c_int32_t) + integer(c_int32_t) :: OrangeRed3 = int(Z'cd3700', kind=c_int32_t) + integer(c_int32_t) :: OrangeRed4 = int(Z'8b2500', kind=c_int32_t) + integer(c_int32_t) :: Red1 = int(Z'ff0000', kind=c_int32_t) + integer(c_int32_t) :: Red2 = int(Z'ee0000', kind=c_int32_t) + integer(c_int32_t) :: Red3 = int(Z'cd0000', kind=c_int32_t) + integer(c_int32_t) :: Red4 = int(Z'8b0000', kind=c_int32_t) + integer(c_int32_t) :: DeepPink1 = int(Z'ff1493', kind=c_int32_t) + integer(c_int32_t) :: DeepPink2 = int(Z'ee1289', kind=c_int32_t) + integer(c_int32_t) :: DeepPink3 = int(Z'cd1076', kind=c_int32_t) + integer(c_int32_t) :: DeepPink4 = int(Z'8b0a50', kind=c_int32_t) + integer(c_int32_t) :: HotPink1 = int(Z'ff6eb4', kind=c_int32_t) + integer(c_int32_t) :: HotPink2 = int(Z'ee6aa7', kind=c_int32_t) + integer(c_int32_t) :: HotPink3 = int(Z'cd6090', kind=c_int32_t) + integer(c_int32_t) :: HotPink4 = int(Z'8b3a62', kind=c_int32_t) + integer(c_int32_t) :: Pink1 = int(Z'ffb5c5', kind=c_int32_t) + integer(c_int32_t) :: Pink2 = int(Z'eea9b8', kind=c_int32_t) + integer(c_int32_t) :: Pink3 = int(Z'cd919e', kind=c_int32_t) + integer(c_int32_t) :: Pink4 = int(Z'8b636c', kind=c_int32_t) + integer(c_int32_t) :: LightPink1 = int(Z'ffaeb9', kind=c_int32_t) + integer(c_int32_t) :: LightPink2 = int(Z'eea2ad', kind=c_int32_t) + integer(c_int32_t) :: LightPink3 = int(Z'cd8c95', kind=c_int32_t) + integer(c_int32_t) :: LightPink4 = int(Z'8b5f65', kind=c_int32_t) + integer(c_int32_t) :: PaleVioletRed1 = int(Z'ff82ab', kind=c_int32_t) + integer(c_int32_t) :: PaleVioletRed2 = int(Z'ee799f', kind=c_int32_t) + integer(c_int32_t) :: PaleVioletRed3 = int(Z'cd6889', kind=c_int32_t) + integer(c_int32_t) :: PaleVioletRed4 = int(Z'8b475d', kind=c_int32_t) + integer(c_int32_t) :: Maroon1 = int(Z'ff34b3', kind=c_int32_t) + integer(c_int32_t) :: Maroon2 = int(Z'ee30a7', kind=c_int32_t) + integer(c_int32_t) :: Maroon3 = int(Z'cd2990', kind=c_int32_t) + integer(c_int32_t) :: Maroon4 = int(Z'8b1c62', kind=c_int32_t) + integer(c_int32_t) :: VioletRed1 = int(Z'ff3e96', kind=c_int32_t) + integer(c_int32_t) :: VioletRed2 = int(Z'ee3a8c', kind=c_int32_t) + integer(c_int32_t) :: VioletRed3 = int(Z'cd3278', kind=c_int32_t) + integer(c_int32_t) :: VioletRed4 = int(Z'8b2252', kind=c_int32_t) + integer(c_int32_t) :: Magenta1 = int(Z'ff00ff', kind=c_int32_t) + integer(c_int32_t) :: Magenta2 = int(Z'ee00ee', kind=c_int32_t) + integer(c_int32_t) :: Magenta3 = int(Z'cd00cd', kind=c_int32_t) + integer(c_int32_t) :: Magenta4 = int(Z'8b008b', kind=c_int32_t) + integer(c_int32_t) :: Orchid1 = int(Z'ff83fa', kind=c_int32_t) + integer(c_int32_t) :: Orchid2 = int(Z'ee7ae9', kind=c_int32_t) + integer(c_int32_t) :: Orchid3 = int(Z'cd69c9', kind=c_int32_t) + integer(c_int32_t) :: Orchid4 = int(Z'8b4789', kind=c_int32_t) + integer(c_int32_t) :: Plum1 = int(Z'ffbbff', kind=c_int32_t) + integer(c_int32_t) :: Plum2 = int(Z'eeaeee', kind=c_int32_t) + integer(c_int32_t) :: Plum3 = int(Z'cd96cd', kind=c_int32_t) + integer(c_int32_t) :: Plum4 = int(Z'8b668b', kind=c_int32_t) + integer(c_int32_t) :: MediumOrchid1 = int(Z'e066ff', kind=c_int32_t) + integer(c_int32_t) :: MediumOrchid2 = int(Z'd15fee', kind=c_int32_t) + integer(c_int32_t) :: MediumOrchid3 = int(Z'b452cd', kind=c_int32_t) + integer(c_int32_t) :: MediumOrchid4 = int(Z'7a378b', kind=c_int32_t) + integer(c_int32_t) :: DarkOrchid1 = int(Z'bf3eff', kind=c_int32_t) + integer(c_int32_t) :: DarkOrchid2 = int(Z'b23aee', kind=c_int32_t) + integer(c_int32_t) :: DarkOrchid3 = int(Z'9a32cd', kind=c_int32_t) + integer(c_int32_t) :: DarkOrchid4 = int(Z'68228b', kind=c_int32_t) + integer(c_int32_t) :: Purple1 = int(Z'9b30ff', kind=c_int32_t) + integer(c_int32_t) :: Purple2 = int(Z'912cee', kind=c_int32_t) + integer(c_int32_t) :: Purple3 = int(Z'7d26cd', kind=c_int32_t) + integer(c_int32_t) :: Purple4 = int(Z'551a8b', kind=c_int32_t) + integer(c_int32_t) :: MediumPurple1 = int(Z'ab82ff', kind=c_int32_t) + integer(c_int32_t) :: MediumPurple2 = int(Z'9f79ee', kind=c_int32_t) + integer(c_int32_t) :: MediumPurple3 = int(Z'8968cd', kind=c_int32_t) + integer(c_int32_t) :: MediumPurple4 = int(Z'5d478b', kind=c_int32_t) + integer(c_int32_t) :: Thistle1 = int(Z'ffe1ff', kind=c_int32_t) + integer(c_int32_t) :: Thistle2 = int(Z'eed2ee', kind=c_int32_t) + integer(c_int32_t) :: Thistle3 = int(Z'cdb5cd', kind=c_int32_t) + integer(c_int32_t) :: Thistle4 = int(Z'8b7b8b', kind=c_int32_t) + integer(c_int32_t) :: Gray0 = int(Z'000000', kind=c_int32_t) + integer(c_int32_t) :: Grey0 = int(Z'000000', kind=c_int32_t) + integer(c_int32_t) :: Gray1 = int(Z'030303', kind=c_int32_t) + integer(c_int32_t) :: Grey1 = int(Z'030303', kind=c_int32_t) + integer(c_int32_t) :: Gray2 = int(Z'050505', kind=c_int32_t) + integer(c_int32_t) :: Grey2 = int(Z'050505', kind=c_int32_t) + integer(c_int32_t) :: Gray3 = int(Z'080808', kind=c_int32_t) + integer(c_int32_t) :: Grey3 = int(Z'080808', kind=c_int32_t) + integer(c_int32_t) :: Gray4 = int(Z'0a0a0a', kind=c_int32_t) + integer(c_int32_t) :: Grey4 = int(Z'0a0a0a', kind=c_int32_t) + integer(c_int32_t) :: Gray5 = int(Z'0d0d0d', kind=c_int32_t) + integer(c_int32_t) :: Grey5 = int(Z'0d0d0d', kind=c_int32_t) + integer(c_int32_t) :: Gray6 = int(Z'0f0f0f', kind=c_int32_t) + integer(c_int32_t) :: Grey6 = int(Z'0f0f0f', kind=c_int32_t) + integer(c_int32_t) :: Gray7 = int(Z'121212', kind=c_int32_t) + integer(c_int32_t) :: Grey7 = int(Z'121212', kind=c_int32_t) + integer(c_int32_t) :: Gray8 = int(Z'141414', kind=c_int32_t) + integer(c_int32_t) :: Grey8 = int(Z'141414', kind=c_int32_t) + integer(c_int32_t) :: Gray9 = int(Z'171717', kind=c_int32_t) + integer(c_int32_t) :: Grey9 = int(Z'171717', kind=c_int32_t) + integer(c_int32_t) :: Gray10 = int(Z'1a1a1a', kind=c_int32_t) + integer(c_int32_t) :: Grey10 = int(Z'1a1a1a', kind=c_int32_t) + integer(c_int32_t) :: Gray11 = int(Z'1c1c1c', kind=c_int32_t) + integer(c_int32_t) :: Grey11 = int(Z'1c1c1c', kind=c_int32_t) + integer(c_int32_t) :: Gray12 = int(Z'1f1f1f', kind=c_int32_t) + integer(c_int32_t) :: Grey12 = int(Z'1f1f1f', kind=c_int32_t) + integer(c_int32_t) :: Gray13 = int(Z'212121', kind=c_int32_t) + integer(c_int32_t) :: Grey13 = int(Z'212121', kind=c_int32_t) + integer(c_int32_t) :: Gray14 = int(Z'242424', kind=c_int32_t) + integer(c_int32_t) :: Grey14 = int(Z'242424', kind=c_int32_t) + integer(c_int32_t) :: Gray15 = int(Z'262626', kind=c_int32_t) + integer(c_int32_t) :: Grey15 = int(Z'262626', kind=c_int32_t) + integer(c_int32_t) :: Gray16 = int(Z'292929', kind=c_int32_t) + integer(c_int32_t) :: Grey16 = int(Z'292929', kind=c_int32_t) + integer(c_int32_t) :: Gray17 = int(Z'2b2b2b', kind=c_int32_t) + integer(c_int32_t) :: Grey17 = int(Z'2b2b2b', kind=c_int32_t) + integer(c_int32_t) :: Gray18 = int(Z'2e2e2e', kind=c_int32_t) + integer(c_int32_t) :: Grey18 = int(Z'2e2e2e', kind=c_int32_t) + integer(c_int32_t) :: Gray19 = int(Z'303030', kind=c_int32_t) + integer(c_int32_t) :: Grey19 = int(Z'303030', kind=c_int32_t) + integer(c_int32_t) :: Gray20 = int(Z'333333', kind=c_int32_t) + integer(c_int32_t) :: Grey20 = int(Z'333333', kind=c_int32_t) + integer(c_int32_t) :: Gray21 = int(Z'363636', kind=c_int32_t) + integer(c_int32_t) :: Grey21 = int(Z'363636', kind=c_int32_t) + integer(c_int32_t) :: Gray22 = int(Z'383838', kind=c_int32_t) + integer(c_int32_t) :: Grey22 = int(Z'383838', kind=c_int32_t) + integer(c_int32_t) :: Gray23 = int(Z'3b3b3b', kind=c_int32_t) + integer(c_int32_t) :: Grey23 = int(Z'3b3b3b', kind=c_int32_t) + integer(c_int32_t) :: Gray24 = int(Z'3d3d3d', kind=c_int32_t) + integer(c_int32_t) :: Grey24 = int(Z'3d3d3d', kind=c_int32_t) + integer(c_int32_t) :: Gray25 = int(Z'404040', kind=c_int32_t) + integer(c_int32_t) :: Grey25 = int(Z'404040', kind=c_int32_t) + integer(c_int32_t) :: Gray26 = int(Z'424242', kind=c_int32_t) + integer(c_int32_t) :: Grey26 = int(Z'424242', kind=c_int32_t) + integer(c_int32_t) :: Gray27 = int(Z'454545', kind=c_int32_t) + integer(c_int32_t) :: Grey27 = int(Z'454545', kind=c_int32_t) + integer(c_int32_t) :: Gray28 = int(Z'474747', kind=c_int32_t) + integer(c_int32_t) :: Grey28 = int(Z'474747', kind=c_int32_t) + integer(c_int32_t) :: Gray29 = int(Z'4a4a4a', kind=c_int32_t) + integer(c_int32_t) :: Grey29 = int(Z'4a4a4a', kind=c_int32_t) + integer(c_int32_t) :: Gray30 = int(Z'4d4d4d', kind=c_int32_t) + integer(c_int32_t) :: Grey30 = int(Z'4d4d4d', kind=c_int32_t) + integer(c_int32_t) :: Gray31 = int(Z'4f4f4f', kind=c_int32_t) + integer(c_int32_t) :: Grey31 = int(Z'4f4f4f', kind=c_int32_t) + integer(c_int32_t) :: Gray32 = int(Z'525252', kind=c_int32_t) + integer(c_int32_t) :: Grey32 = int(Z'525252', kind=c_int32_t) + integer(c_int32_t) :: Gray33 = int(Z'545454', kind=c_int32_t) + integer(c_int32_t) :: Grey33 = int(Z'545454', kind=c_int32_t) + integer(c_int32_t) :: Gray34 = int(Z'575757', kind=c_int32_t) + integer(c_int32_t) :: Grey34 = int(Z'575757', kind=c_int32_t) + integer(c_int32_t) :: Gray35 = int(Z'595959', kind=c_int32_t) + integer(c_int32_t) :: Grey35 = int(Z'595959', kind=c_int32_t) + integer(c_int32_t) :: Gray36 = int(Z'5c5c5c', kind=c_int32_t) + integer(c_int32_t) :: Grey36 = int(Z'5c5c5c', kind=c_int32_t) + integer(c_int32_t) :: Gray37 = int(Z'5e5e5e', kind=c_int32_t) + integer(c_int32_t) :: Grey37 = int(Z'5e5e5e', kind=c_int32_t) + integer(c_int32_t) :: Gray38 = int(Z'616161', kind=c_int32_t) + integer(c_int32_t) :: Grey38 = int(Z'616161', kind=c_int32_t) + integer(c_int32_t) :: Gray39 = int(Z'636363', kind=c_int32_t) + integer(c_int32_t) :: Grey39 = int(Z'636363', kind=c_int32_t) + integer(c_int32_t) :: Gray40 = int(Z'666666', kind=c_int32_t) + integer(c_int32_t) :: Grey40 = int(Z'666666', kind=c_int32_t) + integer(c_int32_t) :: Gray41 = int(Z'696969', kind=c_int32_t) + integer(c_int32_t) :: Grey41 = int(Z'696969', kind=c_int32_t) + integer(c_int32_t) :: Gray42 = int(Z'6b6b6b', kind=c_int32_t) + integer(c_int32_t) :: Grey42 = int(Z'6b6b6b', kind=c_int32_t) + integer(c_int32_t) :: Gray43 = int(Z'6e6e6e', kind=c_int32_t) + integer(c_int32_t) :: Grey43 = int(Z'6e6e6e', kind=c_int32_t) + integer(c_int32_t) :: Gray44 = int(Z'707070', kind=c_int32_t) + integer(c_int32_t) :: Grey44 = int(Z'707070', kind=c_int32_t) + integer(c_int32_t) :: Gray45 = int(Z'737373', kind=c_int32_t) + integer(c_int32_t) :: Grey45 = int(Z'737373', kind=c_int32_t) + integer(c_int32_t) :: Gray46 = int(Z'757575', kind=c_int32_t) + integer(c_int32_t) :: Grey46 = int(Z'757575', kind=c_int32_t) + integer(c_int32_t) :: Gray47 = int(Z'787878', kind=c_int32_t) + integer(c_int32_t) :: Grey47 = int(Z'787878', kind=c_int32_t) + integer(c_int32_t) :: Gray48 = int(Z'7a7a7a', kind=c_int32_t) + integer(c_int32_t) :: Grey48 = int(Z'7a7a7a', kind=c_int32_t) + integer(c_int32_t) :: Gray49 = int(Z'7d7d7d', kind=c_int32_t) + integer(c_int32_t) :: Grey49 = int(Z'7d7d7d', kind=c_int32_t) + integer(c_int32_t) :: Gray50 = int(Z'7f7f7f', kind=c_int32_t) + integer(c_int32_t) :: Grey50 = int(Z'7f7f7f', kind=c_int32_t) + integer(c_int32_t) :: Gray51 = int(Z'828282', kind=c_int32_t) + integer(c_int32_t) :: Grey51 = int(Z'828282', kind=c_int32_t) + integer(c_int32_t) :: Gray52 = int(Z'858585', kind=c_int32_t) + integer(c_int32_t) :: Grey52 = int(Z'858585', kind=c_int32_t) + integer(c_int32_t) :: Gray53 = int(Z'878787', kind=c_int32_t) + integer(c_int32_t) :: Grey53 = int(Z'878787', kind=c_int32_t) + integer(c_int32_t) :: Gray54 = int(Z'8a8a8a', kind=c_int32_t) + integer(c_int32_t) :: Grey54 = int(Z'8a8a8a', kind=c_int32_t) + integer(c_int32_t) :: Gray55 = int(Z'8c8c8c', kind=c_int32_t) + integer(c_int32_t) :: Grey55 = int(Z'8c8c8c', kind=c_int32_t) + integer(c_int32_t) :: Gray56 = int(Z'8f8f8f', kind=c_int32_t) + integer(c_int32_t) :: Grey56 = int(Z'8f8f8f', kind=c_int32_t) + integer(c_int32_t) :: Gray57 = int(Z'919191', kind=c_int32_t) + integer(c_int32_t) :: Grey57 = int(Z'919191', kind=c_int32_t) + integer(c_int32_t) :: Gray58 = int(Z'949494', kind=c_int32_t) + integer(c_int32_t) :: Grey58 = int(Z'949494', kind=c_int32_t) + integer(c_int32_t) :: Gray59 = int(Z'969696', kind=c_int32_t) + integer(c_int32_t) :: Grey59 = int(Z'969696', kind=c_int32_t) + integer(c_int32_t) :: Gray60 = int(Z'999999', kind=c_int32_t) + integer(c_int32_t) :: Grey60 = int(Z'999999', kind=c_int32_t) + integer(c_int32_t) :: Gray61 = int(Z'9c9c9c', kind=c_int32_t) + integer(c_int32_t) :: Grey61 = int(Z'9c9c9c', kind=c_int32_t) + integer(c_int32_t) :: Gray62 = int(Z'9e9e9e', kind=c_int32_t) + integer(c_int32_t) :: Grey62 = int(Z'9e9e9e', kind=c_int32_t) + integer(c_int32_t) :: Gray63 = int(Z'a1a1a1', kind=c_int32_t) + integer(c_int32_t) :: Grey63 = int(Z'a1a1a1', kind=c_int32_t) + integer(c_int32_t) :: Gray64 = int(Z'a3a3a3', kind=c_int32_t) + integer(c_int32_t) :: Grey64 = int(Z'a3a3a3', kind=c_int32_t) + integer(c_int32_t) :: Gray65 = int(Z'a6a6a6', kind=c_int32_t) + integer(c_int32_t) :: Grey65 = int(Z'a6a6a6', kind=c_int32_t) + integer(c_int32_t) :: Gray66 = int(Z'a8a8a8', kind=c_int32_t) + integer(c_int32_t) :: Grey66 = int(Z'a8a8a8', kind=c_int32_t) + integer(c_int32_t) :: Gray67 = int(Z'ababab', kind=c_int32_t) + integer(c_int32_t) :: Grey67 = int(Z'ababab', kind=c_int32_t) + integer(c_int32_t) :: Gray68 = int(Z'adadad', kind=c_int32_t) + integer(c_int32_t) :: Grey68 = int(Z'adadad', kind=c_int32_t) + integer(c_int32_t) :: Gray69 = int(Z'b0b0b0', kind=c_int32_t) + integer(c_int32_t) :: Grey69 = int(Z'b0b0b0', kind=c_int32_t) + integer(c_int32_t) :: Gray70 = int(Z'b3b3b3', kind=c_int32_t) + integer(c_int32_t) :: Grey70 = int(Z'b3b3b3', kind=c_int32_t) + integer(c_int32_t) :: Gray71 = int(Z'b5b5b5', kind=c_int32_t) + integer(c_int32_t) :: Grey71 = int(Z'b5b5b5', kind=c_int32_t) + integer(c_int32_t) :: Gray72 = int(Z'b8b8b8', kind=c_int32_t) + integer(c_int32_t) :: Grey72 = int(Z'b8b8b8', kind=c_int32_t) + integer(c_int32_t) :: Gray73 = int(Z'bababa', kind=c_int32_t) + integer(c_int32_t) :: Grey73 = int(Z'bababa', kind=c_int32_t) + integer(c_int32_t) :: Gray74 = int(Z'bdbdbd', kind=c_int32_t) + integer(c_int32_t) :: Grey74 = int(Z'bdbdbd', kind=c_int32_t) + integer(c_int32_t) :: Gray75 = int(Z'bfbfbf', kind=c_int32_t) + integer(c_int32_t) :: Grey75 = int(Z'bfbfbf', kind=c_int32_t) + integer(c_int32_t) :: Gray76 = int(Z'c2c2c2', kind=c_int32_t) + integer(c_int32_t) :: Grey76 = int(Z'c2c2c2', kind=c_int32_t) + integer(c_int32_t) :: Gray77 = int(Z'c4c4c4', kind=c_int32_t) + integer(c_int32_t) :: Grey77 = int(Z'c4c4c4', kind=c_int32_t) + integer(c_int32_t) :: Gray78 = int(Z'c7c7c7', kind=c_int32_t) + integer(c_int32_t) :: Grey78 = int(Z'c7c7c7', kind=c_int32_t) + integer(c_int32_t) :: Gray79 = int(Z'c9c9c9', kind=c_int32_t) + integer(c_int32_t) :: Grey79 = int(Z'c9c9c9', kind=c_int32_t) + integer(c_int32_t) :: Gray80 = int(Z'cccccc', kind=c_int32_t) + integer(c_int32_t) :: Grey80 = int(Z'cccccc', kind=c_int32_t) + integer(c_int32_t) :: Gray81 = int(Z'cfcfcf', kind=c_int32_t) + integer(c_int32_t) :: Grey81 = int(Z'cfcfcf', kind=c_int32_t) + integer(c_int32_t) :: Gray82 = int(Z'd1d1d1', kind=c_int32_t) + integer(c_int32_t) :: Grey82 = int(Z'd1d1d1', kind=c_int32_t) + integer(c_int32_t) :: Gray83 = int(Z'd4d4d4', kind=c_int32_t) + integer(c_int32_t) :: Grey83 = int(Z'd4d4d4', kind=c_int32_t) + integer(c_int32_t) :: Gray84 = int(Z'd6d6d6', kind=c_int32_t) + integer(c_int32_t) :: Grey84 = int(Z'd6d6d6', kind=c_int32_t) + integer(c_int32_t) :: Gray85 = int(Z'd9d9d9', kind=c_int32_t) + integer(c_int32_t) :: Grey85 = int(Z'd9d9d9', kind=c_int32_t) + integer(c_int32_t) :: Gray86 = int(Z'dbdbdb', kind=c_int32_t) + integer(c_int32_t) :: Grey86 = int(Z'dbdbdb', kind=c_int32_t) + integer(c_int32_t) :: Gray87 = int(Z'dedede', kind=c_int32_t) + integer(c_int32_t) :: Grey87 = int(Z'dedede', kind=c_int32_t) + integer(c_int32_t) :: Gray88 = int(Z'e0e0e0', kind=c_int32_t) + integer(c_int32_t) :: Grey88 = int(Z'e0e0e0', kind=c_int32_t) + integer(c_int32_t) :: Gray89 = int(Z'e3e3e3', kind=c_int32_t) + integer(c_int32_t) :: Grey89 = int(Z'e3e3e3', kind=c_int32_t) + integer(c_int32_t) :: Gray90 = int(Z'e5e5e5', kind=c_int32_t) + integer(c_int32_t) :: Grey90 = int(Z'e5e5e5', kind=c_int32_t) + integer(c_int32_t) :: Gray91 = int(Z'e8e8e8', kind=c_int32_t) + integer(c_int32_t) :: Grey91 = int(Z'e8e8e8', kind=c_int32_t) + integer(c_int32_t) :: Gray92 = int(Z'ebebeb', kind=c_int32_t) + integer(c_int32_t) :: Grey92 = int(Z'ebebeb', kind=c_int32_t) + integer(c_int32_t) :: Gray93 = int(Z'ededed', kind=c_int32_t) + integer(c_int32_t) :: Grey93 = int(Z'ededed', kind=c_int32_t) + integer(c_int32_t) :: Gray94 = int(Z'f0f0f0', kind=c_int32_t) + integer(c_int32_t) :: Grey94 = int(Z'f0f0f0', kind=c_int32_t) + integer(c_int32_t) :: Gray95 = int(Z'f2f2f2', kind=c_int32_t) + integer(c_int32_t) :: Grey95 = int(Z'f2f2f2', kind=c_int32_t) + integer(c_int32_t) :: Gray96 = int(Z'f5f5f5', kind=c_int32_t) + integer(c_int32_t) :: Grey96 = int(Z'f5f5f5', kind=c_int32_t) + integer(c_int32_t) :: Gray97 = int(Z'f7f7f7', kind=c_int32_t) + integer(c_int32_t) :: Grey97 = int(Z'f7f7f7', kind=c_int32_t) + integer(c_int32_t) :: Gray98 = int(Z'fafafa', kind=c_int32_t) + integer(c_int32_t) :: Grey98 = int(Z'fafafa', kind=c_int32_t) + integer(c_int32_t) :: Gray99 = int(Z'fcfcfc', kind=c_int32_t) + integer(c_int32_t) :: Grey99 = int(Z'fcfcfc', kind=c_int32_t) + integer(c_int32_t) :: Gray100 = int(Z'ffffff', kind=c_int32_t) + integer(c_int32_t) :: Grey100 = int(Z'ffffff', kind=c_int32_t) + integer(c_int32_t) :: DarkGrey = int(Z'a9a9a9', kind=c_int32_t) + integer(c_int32_t) :: DarkGray = int(Z'a9a9a9', kind=c_int32_t) + integer(c_int32_t) :: DarkBlue = int(Z'00008b', kind=c_int32_t) + integer(c_int32_t) :: DarkCyan = int(Z'008b8b', kind=c_int32_t) + integer(c_int32_t) :: DarkMagenta = int(Z'8b008b', kind=c_int32_t) + integer(c_int32_t) :: DarkRed = int(Z'8b0000', kind=c_int32_t) + integer(c_int32_t) :: LightGreen = int(Z'90ee90', kind=c_int32_t) + integer(c_int32_t) :: Crimson = int(Z'dc143c', kind=c_int32_t) + integer(c_int32_t) :: Indigo = int(Z'4b0082', kind=c_int32_t) + integer(c_int32_t) :: Olive = int(Z'808000', kind=c_int32_t) + integer(c_int32_t) :: RebeccaPurple = int(Z'663399', kind=c_int32_t) + integer(c_int32_t) :: Silver = int(Z'c0c0c0', kind=c_int32_t) + integer(c_int32_t) :: Teal = int(Z'008080', kind=c_int32_t) + end type + + interface + subroutine impl_tracy_set_thread_name(name) bind(C, name="___tracy_set_thread_name") + import + type(c_ptr), intent(in), value :: name + end subroutine impl_tracy_set_thread_name + end interface + + type, bind(C) :: tracy_source_location_data + type(c_ptr) :: name + type(c_ptr) :: function + type(c_ptr) :: file + integer(c_int32_t) :: line + integer(c_int32_t) :: color + end type + + type, bind(C) :: tracy_zone_context + integer(c_int32_t) :: id + integer(c_int32_t) :: active + end type + + type, bind(C) :: tracy_gpu_time_data + integer(c_int64_t) :: gpuTime + integer(c_int16_t) :: queryId + integer(c_int8_t) :: context + end type + + type, bind(C) :: tracy_gpu_zone_begin_data + integer(c_int64_t) :: srcloc + integer(c_int16_t) :: queryId + integer(c_int8_t) :: context + end type + + type, bind(C) :: tracy_gpu_zone_begin_callstack_data + integer(c_int64_t) :: srcloc + integer(c_int32_t) :: depth + integer(c_int16_t) :: queryId + integer(c_int8_t) :: context + end type + + type, bind(C) :: tracy_gpu_zone_end_data + integer(c_int16_t) :: queryId + integer(c_int8_t) :: context + end type + + type, bind(C) :: tracy_gpu_new_context_data + integer(c_int64_t) :: gpuTime + real(c_float) :: period + integer(c_int8_t) :: context + integer(c_int8_t) :: flags + integer(c_int8_t) :: type + end type + + type, bind(C) :: tracy_gpu_context_name_data + integer(c_int8_t) :: context + type(c_ptr) :: name + integer(c_int16_t) :: len + end type + + type, bind(C) :: tracy_gpu_calibration_data + integer(c_int64_t) :: gpuTime + integer(c_int64_t) :: cpuDelta + integer(c_int8_t) :: context + end type + + type, bind(C) :: tracy_gpu_time_sync_data + integer(c_int64_t) :: gpuTime + integer(c_int8_t) :: context + end type + + ! tracy_lockable_context_data and related stuff is missed since Fortran does not have support of mutexes + + interface + subroutine tracy_startup_profiler() bind(C, name="___tracy_startup_profiler") + end subroutine tracy_startup_profiler + subroutine tracy_shutdown_profiler() bind(C, name="___tracy_shutdown_profiler") + end subroutine tracy_shutdown_profiler + function impl_tracy_profiler_started() bind(C, name="___tracy_profiler_started") + import + integer(c_int32_t) :: impl_tracy_profiler_started + end function impl_tracy_profiler_started + end interface + + interface + function impl_tracy_alloc_srcloc(line, source, sourceSz, function_name, functionSz, color) & + bind(C, name="___tracy_alloc_srcloc") + import + integer(c_int64_t) :: impl_tracy_alloc_srcloc + integer(c_int32_t), intent(in), value :: line + type(c_ptr), intent(in), value :: source + integer(c_size_t), intent(in), value :: sourceSz + type(c_ptr), intent(in), value :: function_name + integer(c_size_t), intent(in), value :: functionSz + integer(c_int32_t), intent(in), value :: color + end function impl_tracy_alloc_srcloc + function impl_tracy_alloc_srcloc_name(line, source, sourceSz, function_name, functionSz, zone_name, nameSz, color) & + bind(C, name="___tracy_alloc_srcloc_name") + import + integer(c_int64_t) :: impl_tracy_alloc_srcloc_name + integer(c_int32_t), intent(in), value :: line + type(c_ptr), intent(in), value :: source + integer(c_size_t), intent(in), value :: sourceSz + type(c_ptr), intent(in), value :: function_name + integer(c_size_t), intent(in), value :: functionSz + type(c_ptr), intent(in), value :: zone_name + integer(c_size_t), intent(in), value :: nameSz + integer(c_int32_t), intent(in), value :: color + end function impl_tracy_alloc_srcloc_name + end interface + + interface + type(tracy_zone_context) function impl_tracy_emit_zone_begin_callstack(srcloc, depth, active) & + bind(C, name="___tracy_emit_zone_begin_callstack") + import + type(tracy_source_location_data), intent(in) :: srcloc + integer(c_int32_t), intent(in), value :: depth + integer(c_int32_t), intent(in), value :: active + end function impl_tracy_emit_zone_begin_callstack + type(tracy_zone_context) function impl_tracy_emit_zone_begin_alloc_callstack(srcloc, depth, active) & + bind(C, name="___tracy_emit_zone_begin_alloc_callstack") + import + integer(c_int64_t), intent(in), value :: srcloc + integer(c_int32_t), intent(in), value :: depth + integer(c_int32_t), intent(in), value :: active + end function impl_tracy_emit_zone_begin_alloc_callstack + end interface + interface tracy_zone_begin + module procedure tracy_emit_zone_begin_id, tracy_emit_zone_begin_type + end interface tracy_zone_begin + + interface + subroutine tracy_zone_end(ctx) bind(C, name="___tracy_emit_zone_end") + import + type(tracy_zone_context), intent(in), value :: ctx + end subroutine tracy_zone_end + end interface + + interface + subroutine tracy_emit_zone_text(ctx, txt, size) bind(C, name="___tracy_emit_zone_text") + import + type(tracy_zone_context), intent(in), value :: ctx + type(c_ptr), intent(in), value :: txt + integer(c_size_t), intent(in), value :: size + end subroutine tracy_emit_zone_text + subroutine tracy_emit_zone_name(ctx, txt, size) bind(C, name="___tracy_emit_zone_name") + import + type(tracy_zone_context), intent(in), value :: ctx + type(c_ptr), intent(in), value :: txt + integer(c_size_t), intent(in), value :: size + end subroutine tracy_emit_zone_name + subroutine tracy_emit_zone_color(ctx, color) bind(C, name="___tracy_emit_zone_color") + import + type(tracy_zone_context), intent(in), value :: ctx + integer(c_int32_t), intent(in), value :: color + end subroutine tracy_emit_zone_color + subroutine tracy_emit_zone_value(ctx, value) bind(C, name="___tracy_emit_zone_value") + import + type(tracy_zone_context), intent(in), value :: ctx + integer(c_int64_t), intent(in), value :: value + end subroutine tracy_emit_zone_value + end interface + + ! GPU is not supported yet + + interface + function impl_tracy_connected() bind(C, name="___tracy_connected") + import + integer(c_int32_t) :: impl_tracy_connected + end function impl_tracy_connected + end interface + + interface + subroutine impl_tracy_emit_memory_alloc_callstack(ptr, size, depth, secure) & + bind(C, name="___tracy_emit_memory_alloc_callstack") + import + type(c_ptr), intent(in), value :: ptr + integer(c_size_t), intent(in), value :: size + integer(c_int32_t), intent(in), value :: depth + integer(c_int32_t), intent(in), value :: secure + end subroutine impl_tracy_emit_memory_alloc_callstack + subroutine impl_tracy_emit_memory_alloc_callstack_named(ptr, size, depth, secure, name) & + bind(C, name="___tracy_emit_memory_alloc_callstack_named") + import + type(c_ptr), intent(in), value :: ptr + integer(c_size_t), intent(in), value :: size + integer(c_int32_t), intent(in), value :: depth + integer(c_int32_t), intent(in), value :: secure + type(c_ptr), intent(in), value :: name + end subroutine impl_tracy_emit_memory_alloc_callstack_named + subroutine impl_tracy_emit_memory_free_callstack(ptr, depth, secure) & + bind(C, name="___tracy_emit_memory_free_callstack") + import + type(c_ptr), intent(in), value :: ptr + integer(c_int32_t), intent(in), value :: depth + integer(c_int32_t), intent(in), value :: secure + end subroutine impl_tracy_emit_memory_free_callstack + subroutine impl_tracy_emit_memory_free_callstack_named(ptr, depth, secure, name) & + bind(C, name="___tracy_emit_memory_free_callstack_named") + import + type(c_ptr), intent(in), value :: ptr + integer(c_int32_t), intent(in), value :: depth + integer(c_int32_t), intent(in), value :: secure + type(c_ptr), intent(in), value :: name + end subroutine impl_tracy_emit_memory_free_callstack_named + subroutine impl_tracy_emit_memory_discard_callstack(name, secure, depth) & + bind(C, name="___tracy_emit_memory_discard_callstack") + import + type(c_ptr), intent(in), value :: name + integer(c_int32_t), intent(in), value :: secure + integer(c_int32_t), intent(in), value :: depth + end subroutine impl_tracy_emit_memory_discard_callstack + end interface + + interface + subroutine impl_tracy_emit_message(txt, size, depth) & + bind(C, name="___tracy_emit_message") + import + type(c_ptr), intent(in), value :: txt + integer(c_size_t), value :: size + integer(c_int32_t), value :: depth + end subroutine impl_tracy_emit_message + subroutine impl_tracy_emit_messageC(txt, size, color, depth) & + bind(C, name="___tracy_emit_messageC") + import + type(c_ptr), intent(in), value :: txt + integer(c_size_t), value :: size + integer(c_int32_t), value :: color + integer(c_int32_t), value :: depth + end subroutine impl_tracy_emit_messageC + subroutine impl_tracy_emit_message_appinfo(txt, size) & + bind(C, name="___tracy_emit_message_appinfo") + import + type(c_ptr), intent(in), value :: txt + integer(c_size_t), value :: size + end subroutine impl_tracy_emit_message_appinfo + end interface + + interface + subroutine impl_tracy_emit_frame_mark(name) & + bind(C, name="___tracy_emit_frame_mark") + import + type(c_ptr), intent(in), value :: name + end subroutine impl_tracy_emit_frame_mark + subroutine impl_tracy_emit_frame_mark_start(name) & + bind(C, name="___tracy_emit_frame_mark_start") + import + type(c_ptr), intent(in), value :: name + end subroutine impl_tracy_emit_frame_mark_start + subroutine impl_tracy_emit_frame_mark_end(name) & + bind(C, name="___tracy_emit_frame_mark_end") + import + type(c_ptr), intent(in), value :: name + end subroutine impl_tracy_emit_frame_mark_end + end interface + + interface + subroutine impl_tracy_emit_frame_image(image, w, h, offset, flip) & + bind(C, name="___tracy_emit_frame_image") + import + type(c_ptr), intent(in), value :: image + integer(c_int16_t), intent(in), value :: w + integer(c_int16_t), intent(in), value :: h + integer(c_int8_t), intent(in), value :: offset + integer(c_int32_t), intent(in), value :: flip + end subroutine impl_tracy_emit_frame_image + end interface + + interface + subroutine impl_tracy_emit_plot_int8(name, val) & + bind(C, name="___tracy_emit_plot_int") + import + type(c_ptr), intent(in), value :: name + integer(c_int64_t), value :: val + end subroutine impl_tracy_emit_plot_int8 + subroutine impl_tracy_emit_plot_real4(name, val) & + bind(C, name="___tracy_emit_plot_float") + import + type(c_ptr), intent(in), value :: name + real(c_float), value :: val + end subroutine impl_tracy_emit_plot_real4 + subroutine impl_tracy_emit_plot_real8(name, val) & + bind(C, name="___tracy_emit_plot") + import + type(c_ptr), intent(in), value :: name + real(c_double), value :: val + end subroutine impl_tracy_emit_plot_real8 + end interface + interface tracy_plot + module procedure tracy_plot_int8, tracy_plot_real4, tracy_plot_real8 + end interface tracy_plot + interface + subroutine impl_tracy_emit_plot_config(name, type, step, fill, color) & + bind(C, name="___tracy_emit_plot_config") + import + type(c_ptr), intent(in), value :: name + integer(c_int32_t), intent(in), value :: type + integer(c_int32_t), intent(in), value :: step + integer(c_int32_t), intent(in), value :: fill + integer(c_int32_t), intent(in), value :: color + end subroutine impl_tracy_emit_plot_config + end interface + +#ifdef TRACY_FIBERS + interface + subroutine impl_tracy_fiber_enter(fiber_name) & + bind(C, name="___tracy_fiber_enter") + import + type(c_ptr), intent(in), value :: fiber_name + end subroutine impl_tracy_fiber_enter + subroutine tracy_fiber_leave() & + bind(C, name="___tracy_fiber_leave") + end subroutine tracy_fiber_leave + end interface +#endif + ! + public :: tracy_zone_context + public :: tracy_source_location_data + ! +#ifndef __SUNPRO_F90 + type(TracyColors_t), public, parameter :: TracyColors = TracyColors_t() +#endif + ! + public :: tracy_set_thread_name + public :: tracy_startup_profiler, tracy_shutdown_profiler, tracy_profiler_started + public :: tracy_connected + public :: tracy_appinfo + public :: tracy_alloc_srcloc + public :: tracy_zone_begin, tracy_zone_end + public :: tracy_zone_set_properties + public :: tracy_frame_mark, tracy_frame_start, tracy_frame_end + public :: tracy_memory_alloc, tracy_memory_free, tracy_memory_discard + public :: tracy_message + public :: tracy_image + public :: tracy_plot_config, tracy_plot +#ifdef TRACY_FIBERS + public :: tracy_fiber_enter, tracy_fiber_leave +#endif +contains + subroutine tracy_set_thread_name(name) + character(kind=c_char, len=*), intent(in) :: name + character(kind=c_char, len=:), allocatable, target :: alloc_name + allocate (character(kind=c_char, len=len(name) + 1) :: alloc_name) + alloc_name = name//c_null_char + call impl_tracy_set_thread_name(c_loc(alloc_name)) + end subroutine tracy_set_thread_name + + logical(1) function tracy_profiler_started() + tracy_profiler_started = impl_tracy_profiler_started() /= 0_c_int + end function tracy_profiler_started + + integer(c_int64_t) function tracy_alloc_srcloc(line, source, function_name, zone_name, color) + integer(c_int32_t), intent(in) :: line + character(kind=c_char, len=*), target, intent(in) :: source, function_name + character(kind=c_char, len=*), target, intent(in), optional :: zone_name + integer(c_int32_t), intent(in), optional :: color + ! + integer(c_int32_t) :: color_ + ! + color_ = 0_c_int32_t + if (present(color)) color_ = color + if (present(zone_name)) then + tracy_alloc_srcloc = impl_tracy_alloc_srcloc_name(line, & + c_loc(source), len(source, kind=c_size_t), & + c_loc(function_name), len(function_name, kind=c_size_t), & + c_loc(zone_name), len(zone_name, kind=c_size_t), & + color_) + else + tracy_alloc_srcloc = impl_tracy_alloc_srcloc(line, & + c_loc(source), len(source, kind=c_size_t), & + c_loc(function_name), len(function_name, kind=c_size_t), & + color_) + end if + end function tracy_alloc_srcloc + + type(tracy_zone_context) function tracy_emit_zone_begin_id(srcloc, depth, active) + integer(c_int64_t), intent(inout) :: srcloc + integer(c_int32_t), intent(in), optional :: depth + logical(1), intent(in), optional :: active + ! + integer(c_int32_t) :: depth_ + integer(c_int32_t) :: active_ + active_ = 1_c_int32_t + depth_ = 0_c_int32_t + if (present(active)) then + if (active) then + active_ = 1_c_int32_t + else + active_ = 0_c_int32_t + end if + end if + if (present(depth)) depth_ = depth + tracy_emit_zone_begin_id = impl_tracy_emit_zone_begin_alloc_callstack(srcloc, depth_, active_) + srcloc = 0_c_int64_t + end function tracy_emit_zone_begin_id + type(tracy_zone_context) function tracy_emit_zone_begin_type(srcloc, depth, active) + type(tracy_source_location_data), intent(inout) :: srcloc + integer(c_int32_t), intent(in), optional :: depth + logical(1), intent(in), optional :: active + ! + integer(c_int32_t) :: depth_ + integer(c_int32_t) :: active_ + active_ = 1_c_int32_t + depth_ = 0_c_int32_t + if (present(active)) then + if (active) then + active_ = 1_c_int32_t + else + active_ = 0_c_int32_t + end if + end if + if (present(depth)) depth_ = depth + tracy_emit_zone_begin_type = impl_tracy_emit_zone_begin_callstack(srcloc, depth_, active_) + srcloc = tracy_source_location_data(c_null_ptr, c_null_ptr, c_null_ptr, 0_c_int32_t, 0_c_int32_t) + end function tracy_emit_zone_begin_type + + subroutine tracy_zone_set_properties(ctx, text, name, color, value) + type(tracy_zone_context), intent(in), value :: ctx + character(kind=c_char, len=*), target, intent(in), optional :: text + character(kind=c_char, len=*), target, intent(in), optional :: name + integer(c_int32_t), target, intent(in), optional :: color + integer(c_int64_t), target, intent(in), optional :: value + if (present(text)) then + call tracy_emit_zone_text(ctx, c_loc(text), len(text, kind=c_size_t)) + end if + if (present(name)) then + call tracy_emit_zone_name(ctx, c_loc(name), len(name, kind=c_size_t)) + end if + if (present(color)) then + call tracy_emit_zone_color(ctx, color) + end if + if (present(value)) then + call tracy_emit_zone_value(ctx, value) + end if + end subroutine tracy_zone_set_properties + + logical(1) function tracy_connected() + tracy_connected = impl_tracy_connected() /= 0_c_int32_t + end function tracy_connected + + subroutine tracy_memory_alloc(ptr, size, name, depth, secure) + type(c_ptr), intent(in) :: ptr + integer(c_size_t), intent(in) :: size + character(kind=c_char, len=*), target, intent(in), optional :: name + integer(c_int32_t), intent(in), optional :: depth + logical(1), intent(in), optional :: secure + ! + integer(c_int32_t) :: depth_, secure_ + secure_ = 0_c_int32_t + depth_ = 0_c_int32_t + if (present(secure)) then + if (secure) secure_ = 1_c_int32_t + end if + if (present(depth)) depth_ = depth + if (present(name)) then + call impl_tracy_emit_memory_alloc_callstack_named(ptr, size, depth_, secure_, c_loc(name)) + else + call impl_tracy_emit_memory_alloc_callstack(ptr, size, depth_, secure_) + end if + end subroutine tracy_memory_alloc + subroutine tracy_memory_free(ptr, name, depth, secure) + type(c_ptr), intent(in) :: ptr + character(kind=c_char, len=*), target, intent(in), optional :: name + integer(c_int32_t), intent(in), optional :: depth + logical(1), intent(in), optional :: secure + ! + integer(c_int32_t) :: depth_, secure_ + secure_ = 0_c_int32_t + depth_ = 0_c_int32_t + if (present(secure)) then + if (secure) secure_ = 1_c_int32_t + end if + if (present(depth)) depth_ = depth + if (present(name)) then + call impl_tracy_emit_memory_free_callstack_named(ptr, depth_, secure_, c_loc(name)) + else + call impl_tracy_emit_memory_free_callstack(ptr, depth_, secure_) + end if + end subroutine tracy_memory_free + subroutine tracy_memory_discard(name, depth, secure) + character(kind=c_char, len=*), target, intent(in) :: name + integer(c_int32_t), intent(in), optional :: depth + logical(1), intent(in), optional :: secure + ! + integer(c_int32_t) :: depth_, secure_ + secure_ = 0_c_int32_t + depth_ = 0_c_int32_t + if (present(secure)) then + if (secure) secure_ = 1_c_int32_t + end if + if (present(depth)) depth_ = depth + call impl_tracy_emit_memory_discard_callstack(c_loc(name), depth_, secure_) + end subroutine tracy_memory_discard + + subroutine tracy_message(msg, color, depth) + character(kind=c_char, len=*), target, intent(in) :: msg + integer(c_int32_t), intent(in), optional :: color + integer(c_int32_t), intent(in), optional :: depth + ! + integer(c_int32_t) :: depth_ + depth_ = 0_c_int32_t + if (present(depth)) depth_ = depth + if (present(color)) then + call impl_tracy_emit_messageC(c_loc(msg), len(msg, kind=c_size_t), color, depth_) + else + call impl_tracy_emit_message(c_loc(msg), len(msg, kind=c_size_t), depth_) + end if + end subroutine tracy_message + + subroutine tracy_appinfo(info) + character(kind=c_char, len=*), target, intent(in) :: info + call impl_tracy_emit_message_appinfo(c_loc(info), len(info, kind=c_size_t)) + end subroutine tracy_appinfo + + subroutine tracy_frame_mark(name) + character(kind=c_char, len=*), target, intent(in), optional :: name + if (present(name)) then + call impl_tracy_emit_frame_mark(c_loc(name)) + else + call impl_tracy_emit_frame_mark(c_null_ptr) + end if + end subroutine tracy_frame_mark + subroutine tracy_frame_start(name) + character(kind=c_char, len=*), target, intent(in), optional :: name + if (present(name)) then + call impl_tracy_emit_frame_mark_start(c_loc(name)) + else + call impl_tracy_emit_frame_mark_start(c_null_ptr) + end if + end subroutine tracy_frame_start + subroutine tracy_frame_end(name) + character(kind=c_char, len=*), target, intent(in), optional :: name + if (present(name)) then + call impl_tracy_emit_frame_mark_end(c_loc(name)) + else + call impl_tracy_emit_frame_mark_end(c_null_ptr) + end if + end subroutine tracy_frame_end + + subroutine tracy_image(image, w, h, offset, flip) + type(c_ptr), intent(in) :: image + integer(c_int16_t), intent(in) :: w, h + integer(c_int8_t), intent(in), optional :: offset + logical(1), intent(in), optional :: flip + ! + integer(c_int32_t) :: flip_ + integer(c_int8_t) :: offset_ + flip_ = 0_c_int32_t + offset_ = 0_c_int8_t + if (present(flip)) then + if (flip) flip_ = 1_c_int32_t + end if + if (present(offset)) offset_ = offset + call impl_tracy_emit_frame_image(image, w, h, offset_, flip_) + end subroutine tracy_image + + subroutine tracy_plot_int8(name, val) + character(kind=c_char, len=*), target, intent(in) :: name + integer(c_int64_t) :: val + call impl_tracy_emit_plot_int8(c_loc(name), val) + end subroutine tracy_plot_int8 + subroutine tracy_plot_real4(name, val) + character(kind=c_char, len=*), target, intent(in) :: name + real(c_float) :: val + call impl_tracy_emit_plot_real4(c_loc(name), val) + end subroutine tracy_plot_real4 + subroutine tracy_plot_real8(name, val) + character(kind=c_char, len=*), target, intent(in) :: name + real(c_double) :: val + call impl_tracy_emit_plot_real8(c_loc(name), val) + end subroutine tracy_plot_real8 + + subroutine tracy_plot_config(name, type, step, fill, color) + character(kind=c_char, len=*), target, intent(in) :: name + integer(c_int32_t), intent(in), optional :: type + logical(1), intent(in), optional :: step + logical(1), intent(in), optional :: fill + integer(c_int32_t), intent(in), optional :: color + ! + integer(c_int32_t) :: type_, step_, fill_, color_ + type_ = 0_c_int32_t + step_ = 0_c_int32_t + fill_ = 1_c_int32_t + color_ = 0_c_int32_t + if (present(type)) type_ = type + if (present(step)) then + if (step) step_ = 1_c_int32_t + end if + if (present(fill)) then + if (.not. fill) fill_ = 0_c_int32_t + end if + if (present(color)) color_ = color + call impl_tracy_emit_plot_config(c_loc(name), type_, step_, fill_, color_) + end subroutine tracy_plot_config + +#ifdef TRACY_FIBERS + subroutine tracy_fiber_enter(fiber_name) + character(kind=c_char, len=*), target, intent(in) :: fiber_name + call impl_tracy_fiber_enter(c_loc(fiber_name)) + end subroutine tracy_fiber_enter +#endif +end module tracy diff --git a/external/sources/tracy/public/TracyClient.cpp b/external/sources/tracy/public/TracyClient.cpp index 26387b762e..6224f48bfe 100644 --- a/external/sources/tracy/public/TracyClient.cpp +++ b/external/sources/tracy/public/TracyClient.cpp @@ -30,21 +30,24 @@ #include "client/TracyDxt1.cpp" #include "client/TracyAlloc.cpp" #include "client/TracyOverride.cpp" - -#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 -# include "libbacktrace/alloc.cpp" -# include "libbacktrace/dwarf.cpp" -# include "libbacktrace/fileline.cpp" -# include "libbacktrace/mmapio.cpp" -# include "libbacktrace/posix.cpp" -# include "libbacktrace/sort.cpp" -# include "libbacktrace/state.cpp" -# if TRACY_HAS_CALLSTACK == 4 -# include "libbacktrace/macho.cpp" -# else -# include "libbacktrace/elf.cpp" +#include "client/TracyKCore.cpp" + +#if defined(TRACY_HAS_CALLSTACK) +# if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 +# include "libbacktrace/alloc.cpp" +# include "libbacktrace/dwarf.cpp" +# include "libbacktrace/fileline.cpp" +# include "libbacktrace/mmapio.cpp" +# include "libbacktrace/posix.cpp" +# include "libbacktrace/sort.cpp" +# include "libbacktrace/state.cpp" +# if TRACY_HAS_CALLSTACK == 4 +# include "libbacktrace/macho.cpp" +# else +# include "libbacktrace/elf.cpp" +# endif +# include "common/TracyStackFrames.cpp" # endif -# include "common/TracyStackFrames.cpp" #endif #ifdef _MSC_VER diff --git a/external/sources/tracy/public/client/TracyArmCpuTable.hpp b/external/sources/tracy/public/client/TracyArmCpuTable.hpp index 2b44597643..2b47c3a60d 100644 --- a/external/sources/tracy/public/client/TracyArmCpuTable.hpp +++ b/external/sources/tracy/public/client/TracyArmCpuTable.hpp @@ -305,6 +305,14 @@ static const char* DecodeIosDevice( const char* id ) "iPhone14,4", "iPhone 13 Mini", "iPhone14,5", "iPhone 13", "iPhone14,6", "iPhone SE 3rd Gen", + "iPhone14,7", "iPhone 14", + "iPhone14,8", "iPhone 14 Plus", + "iPhone15,2", "iPhone 14 Pro", + "iPhone15,3", "iPhone 14 Pro Max", + "iPhone15,4", "iPhone 15", + "iPhone15,5", "iPhone 15 Plus", + "iPhone16,1", "iPhone 15 Pro", + "iPhone16,2", "iPhone 15 Pro Max", "iPad1,1", "iPad (A1219/A1337)", "iPad2,1", "iPad 2 (A1395)", "iPad2,2", "iPad 2 (A1396)", @@ -365,6 +373,8 @@ static const char* DecodeIosDevice( const char* id ) "iPad11,4", "iPad Air 3rd gen (A2123/A2153/A2154)", "iPad11,6", "iPad 8th gen (WiFi)", "iPad11,7", "iPad 8th gen (WiFi+Cellular)", + "iPad12,1", "iPad 9th Gen (WiFi)", + "iPad12,2", "iPad 9th Gen (WiFi+Cellular)", "iPad13,1", "iPad Air 4th gen (WiFi)", "iPad13,2", "iPad Air 4th gen (WiFi+Cellular)", "iPad13,4", "iPad Pro 11\" 3rd gen", @@ -377,6 +387,14 @@ static const char* DecodeIosDevice( const char* id ) "iPad13,11", "iPad Pro 12.9\" 5th gen", "iPad13,16", "iPad Air 5th Gen (WiFi)", "iPad13,17", "iPad Air 5th Gen (WiFi+Cellular)", + "iPad13,18", "iPad 10th Gen", + "iPad13,19", "iPad 10th Gen", + "iPad14,1", "iPad mini 6th Gen (WiFi)", + "iPad14,2", "iPad mini 6th Gen (WiFi+Cellular)", + "iPad14,3", "iPad Pro 11\" 4th Gen", + "iPad14,4", "iPad Pro 11\" 4th Gen", + "iPad14,5", "iPad Pro 12.9\" 6th Gen", + "iPad14,6", "iPad Pro 12.9\" 6th Gen", "iPod1,1", "iPod Touch", "iPod2,1", "iPod Touch 2nd gen", "iPod3,1", "iPod Touch 3rd gen", diff --git a/external/sources/tracy/public/client/TracyCallstack.cpp b/external/sources/tracy/public/client/TracyCallstack.cpp index 0de7c9d2e9..bd3290604f 100644 --- a/external/sources/tracy/public/client/TracyCallstack.cpp +++ b/external/sources/tracy/public/client/TracyCallstack.cpp @@ -3,10 +3,12 @@ #include #include #include "TracyCallstack.hpp" +#include "TracyDebug.hpp" #include "TracyFastVector.hpp" #include "TracyStringHelpers.hpp" #include "../common/TracyAlloc.hpp" -#include "TracyDebug.hpp" +#include "../common/TracySystem.hpp" + #ifdef TRACY_HAS_CALLSTACK @@ -31,7 +33,6 @@ # include # include # include -# include "TracyFastVector.hpp" #elif TRACY_HAS_CALLSTACK == 5 # include # include @@ -66,7 +67,7 @@ extern "C" extern "C" const char* ___tracy_demangle( const char* mangled ); #ifndef TRACY_DEMANGLE -constexpr size_t ___tracy_demangle_buffer_len = 1024*1024; +constexpr size_t ___tracy_demangle_buffer_len = 1024*1024; char* ___tracy_demangle_buffer; void ___tracy_init_demangle_buffer() @@ -90,9 +91,177 @@ extern "C" const char* ___tracy_demangle( const char* mangled ) #endif #endif +#if TRACY_HAS_CALLSTACK == 3 +# define TRACY_USE_IMAGE_CACHE +# include +#endif + namespace tracy { +#ifdef TRACY_USE_IMAGE_CACHE +// when we have access to dl_iterate_phdr(), we can build a cache of address ranges to image paths +// so we can quickly determine which image an address falls into. +// We refresh this cache only when we hit an address that doesn't fall into any known range. +class ImageCache +{ +public: + struct ImageEntry + { + void* m_startAddress = nullptr; + void* m_endAddress = nullptr; + char* m_name = nullptr; + }; + + ImageCache() + : m_images( 512 ) + { + Refresh(); + } + + ~ImageCache() + { + Clear(); + } + + const ImageEntry* GetImageForAddress( void* address ) + { + const ImageEntry* entry = GetImageForAddressImpl( address ); + if( !entry ) + { + Refresh(); + return GetImageForAddressImpl( address ); + } + return entry; + } + +private: + tracy::FastVector m_images; + bool m_updated = false; + bool m_haveMainImageName = false; + + static int Callback( struct dl_phdr_info* info, size_t size, void* data ) + { + ImageCache* cache = reinterpret_cast( data ); + + const auto startAddress = reinterpret_cast( info->dlpi_addr ); + if( cache->Contains( startAddress ) ) return 0; + + const uint32_t headerCount = info->dlpi_phnum; + assert( headerCount > 0); + const auto endAddress = reinterpret_cast( info->dlpi_addr + + info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz); + + ImageEntry* image = cache->m_images.push_next(); + image->m_startAddress = startAddress; + image->m_endAddress = endAddress; + + // the base executable name isn't provided when iterating with dl_iterate_phdr, + // we will have to patch the executable image name outside this callback + if( info->dlpi_name && info->dlpi_name[0] != '\0' ) + { + size_t sz = strlen( info->dlpi_name ) + 1; + image->m_name = (char*)tracy_malloc( sz ); + memcpy( image->m_name, info->dlpi_name, sz ); + } + else + { + image->m_name = nullptr; + } + + cache->m_updated = true; + + return 0; + } + + bool Contains( void* startAddress ) const + { + return std::any_of( m_images.begin(), m_images.end(), [startAddress]( const ImageEntry& entry ) { return startAddress == entry.m_startAddress; } ); + } + + void Refresh() + { + m_updated = false; + dl_iterate_phdr( Callback, this ); + + if( m_updated ) + { + std::sort( m_images.begin(), m_images.end(), + []( const ImageEntry& lhs, const ImageEntry& rhs ) { return lhs.m_startAddress > rhs.m_startAddress; } ); + + // patch the main executable image name here, as calling dl_* functions inside the dl_iterate_phdr callback might cause deadlocks + UpdateMainImageName(); + } + } + + void UpdateMainImageName() + { + if( m_haveMainImageName ) + { + return; + } + + for( ImageEntry& entry : m_images ) + { + if( entry.m_name == nullptr ) + { + Dl_info dlInfo; + if( dladdr( (void *)entry.m_startAddress, &dlInfo ) ) + { + if( dlInfo.dli_fname ) + { + size_t sz = strlen( dlInfo.dli_fname ) + 1; + entry.m_name = (char*)tracy_malloc( sz ); + memcpy( entry.m_name, dlInfo.dli_fname, sz ); + } + } + + // we only expect one entry to be null for the main executable entry + break; + } + } + + m_haveMainImageName = true; + } + + const ImageEntry* GetImageForAddressImpl( void* address ) const + { + auto it = std::lower_bound( m_images.begin(), m_images.end(), address, + []( const ImageEntry& lhs, const void* rhs ) { return lhs.m_startAddress > rhs; } ); + + if( it != m_images.end() && address < it->m_endAddress ) + { + return it; + } + return nullptr; + } + + void Clear() + { + for( ImageEntry& entry : m_images ) + { + tracy_free( entry.m_name ); + } + + m_images.clear(); + m_haveMainImageName = false; + } +}; +#endif //#ifdef TRACY_USE_IMAGE_CACHE + +// when "TRACY_SYMBOL_OFFLINE_RESOLVE" is set, instead of fully resolving symbols at runtime, +// simply resolve the offset and image name (which will be enough the resolving to be done offline) +#ifdef TRACY_SYMBOL_OFFLINE_RESOLVE +constexpr bool s_shouldResolveSymbolsOffline = true; +#else +static bool s_shouldResolveSymbolsOffline = false; +bool ShouldResolveSymbolsOffline() +{ + const char* symbolOfflineResolve = GetEnvVar( "TRACY_SYMBOL_OFFLINE_RESOLVE" ); + return (symbolOfflineResolve && symbolOfflineResolve[0] == '1'); +} +#endif // #ifdef TRACY_SYMBOL_OFFLINE_RESOLVE + #if TRACY_HAS_CALLSTACK == 1 enum { MaxCbTrace = 64 }; @@ -108,13 +277,18 @@ extern "C" typedef BOOL (__stdcall *t_SymFromInlineContext)( HANDLE hProcess, DWORD64 Address, ULONG InlineContext, PDWORD64 Displacement, PSYMBOL_INFO Symbol ); typedef BOOL (__stdcall *t_SymGetLineFromInlineContext)( HANDLE hProcess, DWORD64 qwAddr, ULONG InlineContext, DWORD64 qwModuleBaseAddress, PDWORD pdwDisplacement, PIMAGEHLP_LINE64 Line64 ); - TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0; t_SymAddrIncludeInlineTrace _SymAddrIncludeInlineTrace = 0; t_SymQueryInlineTrace _SymQueryInlineTrace = 0; t_SymFromInlineContext _SymFromInlineContext = 0; t_SymGetLineFromInlineContext _SymGetLineFromInlineContext = 0; -} + typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long ); + ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChainPtr = nullptr; + TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void** callers, unsigned long count, unsigned long flags) + { + return ___tracy_RtlWalkFrameChainPtr(callers, count, flags); + } +} struct ModuleCache { @@ -136,18 +310,19 @@ struct KernelDriver KernelDriver* s_krnlCache = nullptr; size_t s_krnlCacheCnt; - void InitCallstackCritical() { - ___tracy_RtlWalkFrameChain = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" ); + ___tracy_RtlWalkFrameChainPtr = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" ); } -void InitCallstack() +void DbgHelpInit() { - _SymAddrIncludeInlineTrace = (t_SymAddrIncludeInlineTrace)GetProcAddress( GetModuleHandleA( "dbghelp.dll" ), "SymAddrIncludeInlineTrace" ); - _SymQueryInlineTrace = (t_SymQueryInlineTrace)GetProcAddress( GetModuleHandleA( "dbghelp.dll" ), "SymQueryInlineTrace" ); - _SymFromInlineContext = (t_SymFromInlineContext)GetProcAddress( GetModuleHandleA( "dbghelp.dll" ), "SymFromInlineContext" ); - _SymGetLineFromInlineContext = (t_SymGetLineFromInlineContext)GetProcAddress( GetModuleHandleA( "dbghelp.dll" ), "SymGetLineFromInlineContext" ); + if( s_shouldResolveSymbolsOffline ) return; + + _SymAddrIncludeInlineTrace = (t_SymAddrIncludeInlineTrace)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymAddrIncludeInlineTrace"); + _SymQueryInlineTrace = (t_SymQueryInlineTrace)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymQueryInlineTrace"); + _SymFromInlineContext = (t_SymFromInlineContext)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymFromInlineContext"); + _SymGetLineFromInlineContext = (t_SymGetLineFromInlineContext)GetProcAddress(GetModuleHandleA("dbghelp.dll"), "SymGetLineFromInlineContext"); #ifdef TRACY_DBGHELP_LOCK DBGHELP_INIT; @@ -157,9 +332,78 @@ void InitCallstack() SymInitialize( GetCurrentProcess(), nullptr, true ); SymSetOptions( SYMOPT_LOAD_LINES ); +#ifdef TRACY_DBGHELP_LOCK + DBGHELP_UNLOCK; +#endif +} + +DWORD64 DbgHelpLoadSymbolsForModule( const char* imageName, uint64_t baseOfDll, uint32_t bllSize ) +{ + if( s_shouldResolveSymbolsOffline ) return 0; + return SymLoadModuleEx( GetCurrentProcess(), nullptr, imageName, nullptr, baseOfDll, bllSize, nullptr, 0 ); +} + +ModuleCache* LoadSymbolsForModuleAndCache( const char* imageName, uint32_t imageNameLength, uint64_t baseOfDll, uint32_t dllSize ) +{ + DbgHelpLoadSymbolsForModule( imageName, baseOfDll, dllSize ); + + ModuleCache* cachedModule = s_modCache->push_next(); + cachedModule->start = baseOfDll; + cachedModule->end = baseOfDll + dllSize; + + // when doing offline symbol resolution, we must store the full path of the dll for the resolving to work + if( s_shouldResolveSymbolsOffline ) + { + cachedModule->name = (char*)tracy_malloc_fast(imageNameLength + 1); + memcpy(cachedModule->name, imageName, imageNameLength); + cachedModule->name[imageNameLength] = '\0'; + } + else + { + auto ptr = imageName + imageNameLength; + while (ptr > imageName && *ptr != '\\' && *ptr != '/') ptr--; + if (ptr > imageName) ptr++; + const auto namelen = imageName + imageNameLength - ptr; + cachedModule->name = (char*)tracy_malloc_fast(namelen + 3); + cachedModule->name[0] = '['; + memcpy(cachedModule->name + 1, ptr, namelen); + cachedModule->name[namelen + 1] = ']'; + cachedModule->name[namelen + 2] = '\0'; + } + + return cachedModule; +} + +void InitCallstack() +{ +#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE + s_shouldResolveSymbolsOffline = ShouldResolveSymbolsOffline(); +#endif //#ifndef TRACY_SYMBOL_OFFLINE_RESOLVE + if( s_shouldResolveSymbolsOffline ) + { + TracyDebug("TRACY: enabling offline symbol resolving!\n"); + } + + DbgHelpInit(); + +#ifdef TRACY_DBGHELP_LOCK + DBGHELP_LOCK; +#endif + + // use TRACY_NO_DBGHELP_INIT_LOAD=1 to disable preloading of driver + // and process module symbol loading at startup time - they will be loaded on demand later + // Sometimes this process can take a very long time and prevent resolving callstack frames + // symbols during that time. + const char* noInitLoadEnv = GetEnvVar( "TRACY_NO_DBGHELP_INIT_LOAD" ); + const bool initTimeModuleLoad = !( noInitLoadEnv && noInitLoadEnv[0] == '1' ); + if ( !initTimeModuleLoad ) + { + TracyDebug("TRACY: skipping init time dbghelper module load\n"); + } + DWORD needed; LPVOID dev[4096]; - if( EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 ) + if( initTimeModuleLoad && EnumDeviceDrivers( dev, sizeof(dev), &needed ) != 0 ) { char windir[MAX_PATH]; if( !GetWindowsDirectoryA( windir, sizeof( windir ) ) ) memcpy( windir, "c:\\windows", 11 ); @@ -193,7 +437,7 @@ void InitCallstack() path = full; } - SymLoadModuleEx( GetCurrentProcess(), nullptr, path, nullptr, (DWORD64)dev[i], 0, nullptr, 0 ); + DbgHelpLoadSymbolsForModule( path, (DWORD64)dev[i], 0 ); const auto psz = strlen( path ); auto pptr = (char*)tracy_malloc_fast( psz+1 ); @@ -214,7 +458,7 @@ void InitCallstack() HANDLE proc = GetCurrentProcess(); HMODULE mod[1024]; - if( EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 ) + if( initTimeModuleLoad && EnumProcessModules( proc, mod, sizeof( mod ), &needed ) != 0 ) { const auto sz = needed / sizeof( HMODULE ); for( size_t i=0; i 0 ) + const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 ); + if( nameLength > 0 ) { // This may be a new module loaded since our call to SymInitialize. // Just in case, force DbgHelp to load its pdb ! - SymLoadModuleEx(proc, NULL, name, NULL, (DWORD64)info.lpBaseOfDll, info.SizeOfImage, NULL, 0); - - auto ptr = name + res; - while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--; - if( ptr > name ) ptr++; - const auto namelen = name + res - ptr; - auto cache = s_modCache->push_next(); - cache->start = base; - cache->end = base + info.SizeOfImage; - cache->name = (char*)tracy_malloc_fast( namelen+3 ); - cache->name[0] = '['; - memcpy( cache->name+1, ptr, namelen ); - cache->name[namelen+1] = ']'; - cache->name[namelen+2] = '\0'; + LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage ); } } } @@ -259,6 +489,8 @@ void EndCallstack() const char* DecodeCallstackPtrFast( uint64_t ptr ) { + if( s_shouldResolveSymbolsOffline ) return "[unresolved]"; + static char ret[MaxNameSize]; const auto proc = GetCurrentProcess(); @@ -294,7 +526,13 @@ const char* GetKernelModulePath( uint64_t addr ) return it->path; } -static const char* GetModuleNameAndPrepareSymbols( uint64_t addr ) +struct ModuleNameAndBaseAddress +{ + const char* name; + uint64_t baseAddr; +}; + +ModuleNameAndBaseAddress GetModuleNameAndPrepareSymbols( uint64_t addr ) { if( ( addr >> 63 ) != 0 ) { @@ -303,17 +541,17 @@ static const char* GetModuleNameAndPrepareSymbols( uint64_t addr ) auto it = std::lower_bound( s_krnlCache, s_krnlCache + s_krnlCacheCnt, addr, []( const KernelDriver& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } ); if( it != s_krnlCache + s_krnlCacheCnt ) { - return it->mod; + return ModuleNameAndBaseAddress{ it->mod, it->addr }; } } - return ""; + return ModuleNameAndBaseAddress{ "", addr }; } for( auto& v : *s_modCache ) { if( addr >= v.start && addr < v.end ) { - return v.name; + return ModuleNameAndBaseAddress{ v.name, v.start }; } } @@ -334,35 +572,33 @@ static const char* GetModuleNameAndPrepareSymbols( uint64_t addr ) if( addr >= base && addr < base + info.SizeOfImage ) { char name[1024]; - const auto res = GetModuleFileNameA( mod[i], name, 1021 ); - if( res > 0 ) + const auto nameLength = GetModuleFileNameA( mod[i], name, 1021 ); + if( nameLength > 0 ) { // since this is the first time we encounter this module, load its symbols (needed for modules loaded after SymInitialize) - SymLoadModuleEx(proc, NULL, name, NULL, (DWORD64)info.lpBaseOfDll, info.SizeOfImage, NULL, 0); - auto ptr = name + res; - while( ptr > name && *ptr != '\\' && *ptr != '/' ) ptr--; - if( ptr > name ) ptr++; - const auto namelen = name + res - ptr; - auto cache = s_modCache->push_next(); - cache->start = base; - cache->end = base + info.SizeOfImage; - cache->name = (char*)tracy_malloc_fast( namelen+3 ); - cache->name[0] = '['; - memcpy( cache->name+1, ptr, namelen ); - cache->name[namelen+1] = ']'; - cache->name[namelen+2] = '\0'; - return cache->name; + ModuleCache* cachedModule = LoadSymbolsForModuleAndCache( name, nameLength, (DWORD64)info.lpBaseOfDll, info.SizeOfImage ); + return ModuleNameAndBaseAddress{ cachedModule->name, cachedModule->start }; } } } } } - return "[unknown]"; + + return ModuleNameAndBaseAddress{ "[unknown]", 0x0 }; } CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) { CallstackSymbolData sym; + + if( s_shouldResolveSymbolsOffline ) + { + sym.file = "[unknown]"; + sym.line = 0; + sym.needFree = false; + return sym; + } + IMAGEHLP_LINE64 line; DWORD displacement = 0; line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); @@ -390,15 +626,32 @@ CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) { - int write; - const auto proc = GetCurrentProcess(); +#ifdef TRACY_DBGHELP_LOCK + DBGHELP_LOCK; +#endif + InitRpmalloc(); + const ModuleNameAndBaseAddress moduleNameAndAddress = GetModuleNameAndPrepareSymbols( ptr ); + + if( s_shouldResolveSymbolsOffline ) + { #ifdef TRACY_DBGHELP_LOCK - DBGHELP_LOCK; + DBGHELP_UNLOCK; #endif - const auto moduleName = GetModuleNameAndPrepareSymbols(ptr); + cb_data[0].symAddr = ptr - moduleNameAndAddress.baseAddr; + cb_data[0].symLen = 0; + + cb_data[0].name = CopyStringFast("[unresolved]"); + cb_data[0].file = CopyStringFast("[unknown]"); + cb_data[0].line = 0; + + return { cb_data, 1, moduleNameAndAddress.name }; + } + + int write; + const auto proc = GetCurrentProcess(); #if !defined TRACY_NO_CALLSTACK_INLINES BOOL doInline = FALSE; @@ -448,7 +701,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) cb_data[write].line = line.LineNumber; } - cb_data[write].name = symValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleName ); + cb_data[write].name = symValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleNameAndAddress.name ); cb_data[write].file = CopyStringFast( filename ); if( symValid ) { @@ -481,7 +734,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) cb.line = line.LineNumber; } - cb.name = symInlineValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleName ); + cb.name = symInlineValid ? CopyStringFast( si->Name, si->NameLen ) : CopyStringFast( moduleNameAndAddress.name ); cb.file = CopyStringFast( filename ); if( symInlineValid ) { @@ -502,17 +755,21 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) DBGHELP_UNLOCK; #endif - return { cb_data, uint8_t( cb_num ), moduleName }; + return { cb_data, uint8_t( cb_num ), moduleNameAndAddress.name }; } #elif TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 enum { MaxCbTrace = 64 }; -struct backtrace_state* cb_bts; +struct backtrace_state* cb_bts = nullptr; + int cb_num; CallstackEntry cb_data[MaxCbTrace]; int cb_fixup; +#ifdef TRACY_USE_IMAGE_CACHE +static ImageCache* s_imageCache = nullptr; +#endif //#ifdef TRACY_USE_IMAGE_CACHE #ifdef TRACY_DEBUGINFOD debuginfod_client* s_debuginfod; @@ -525,13 +782,14 @@ struct DebugInfo int fd; }; -FastVector s_di_known( 16 ); +static FastVector* s_di_known; #endif #ifdef __linux struct KernelSymbol { uint64_t addr; + uint32_t size; const char* name; const char* mod; }; @@ -543,10 +801,11 @@ static void InitKernelSymbols() { FILE* f = fopen( "/proc/kallsyms", "rb" ); if( !f ) return; - tracy::FastVector tmpSym( 1024 ); + tracy::FastVector tmpSym( 512 * 1024 ); size_t linelen = 16 * 1024; // linelen must be big enough to prevent reallocs in getline() auto linebuf = (char*)tracy_malloc( linelen ); ssize_t sz; + size_t validCnt = 0; while( ( sz = getline( &linebuf, &linelen, f ) ) != -1 ) { auto ptr = linebuf; @@ -579,7 +838,7 @@ static void InitKernelSymbols() } if( addr == 0 ) continue; ptr++; - if( *ptr != 'T' && *ptr != 't' ) continue; + const bool valid = *ptr == 'T' || *ptr == 't'; ptr += 2; const auto namestart = ptr; while( *ptr != '\t' && *ptr != '\n' ) ptr++; @@ -594,20 +853,28 @@ static void InitKernelSymbols() modend = ptr; } - auto strname = (char*)tracy_malloc_fast( nameend - namestart + 1 ); - memcpy( strname, namestart, nameend - namestart ); - strname[nameend-namestart] = '\0'; - + char* strname = nullptr; char* strmod = nullptr; - if( modstart ) + + if( valid ) { - strmod = (char*)tracy_malloc_fast( modend - modstart + 1 ); - memcpy( strmod, modstart, modend - modstart ); - strmod[modend-modstart] = '\0'; + validCnt++; + + strname = (char*)tracy_malloc_fast( nameend - namestart + 1 ); + memcpy( strname, namestart, nameend - namestart ); + strname[nameend-namestart] = '\0'; + + if( modstart ) + { + strmod = (char*)tracy_malloc_fast( modend - modstart + 1 ); + memcpy( strmod, modstart, modend - modstart ); + strmod[modend-modstart] = '\0'; + } } auto sym = tmpSym.push_next(); sym->addr = addr; + sym->size = 0; sym->name = strname; sym->mod = strmod; } @@ -615,11 +882,22 @@ static void InitKernelSymbols() fclose( f ); if( tmpSym.empty() ) return; - std::sort( tmpSym.begin(), tmpSym.end(), []( const KernelSymbol& lhs, const KernelSymbol& rhs ) { return lhs.addr > rhs.addr; } ); - s_kernelSymCnt = tmpSym.size(); - s_kernelSym = (KernelSymbol*)tracy_malloc_fast( sizeof( KernelSymbol ) * s_kernelSymCnt ); - memcpy( s_kernelSym, tmpSym.data(), sizeof( KernelSymbol ) * s_kernelSymCnt ); - TracyDebug( "Loaded %zu kernel symbols\n", s_kernelSymCnt ); + std::sort( tmpSym.begin(), tmpSym.end(), []( const KernelSymbol& lhs, const KernelSymbol& rhs ) { return lhs.addr < rhs.addr; } ); + for( size_t i=0; i*)tracy_malloc( sizeof( FastVector ) ); + new (s_di_known) FastVector( 16 ); #endif } @@ -725,11 +1023,11 @@ DebugInfo* FindDebugInfo( FastVector& vec, const uint8_t* buildid_dat int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const char* filename ) { auto buildid = (uint8_t*)buildid_data; - auto it = FindDebugInfo( s_di_known, buildid, buildid_size ); + auto it = FindDebugInfo( *s_di_known, buildid, buildid_size ); if( it ) return it->fd >= 0 ? dup( it->fd ) : -1; int fd = debuginfod_find_debuginfo( s_debuginfod, buildid, buildid_size, nullptr ); - it = s_di_known.push_next(); + it = s_di_known->push_next(); it->buildid_size = buildid_size; it->buildid = (uint8_t*)tracy_malloc( buildid_size ); memcpy( it->buildid, buildid, buildid_size ); @@ -744,7 +1042,7 @@ int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size, const const uint8_t* GetBuildIdForImage( const char* image, size_t& size ) { assert( image ); - for( auto& v : s_di_known ) + for( auto& v : *s_di_known ) { if( strcmp( image, v.filename ) == 0 ) { @@ -763,11 +1061,21 @@ debuginfod_client* GetDebuginfodClient() void EndCallstack() { +#ifdef TRACY_USE_IMAGE_CACHE + if( s_imageCache ) + { + s_imageCache->~ImageCache(); + tracy_free( s_imageCache ); + } +#endif //#ifdef TRACY_USE_IMAGE_CACHE #ifndef TRACY_DEMANGLE ___tracy_free_demangle_buffer(); #endif #ifdef TRACY_DEBUGINFOD - ClearDebugInfoVector( s_di_known ); + ClearDebugInfoVector( *s_di_known ); + s_di_known->~FastVector(); + tracy_free( s_di_known ); + debuginfod_end( s_debuginfod ); #endif } @@ -824,7 +1132,15 @@ static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum* CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) { CallstackSymbolData sym; - backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym ); + if( cb_bts ) + { + backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym ); + } + else + { + SymbolAddressErrorCb(&sym, nullptr, 0); + } + return sym; } @@ -927,33 +1243,67 @@ void SymInfoError( void* /*data*/, const char* /*msg*/, int /*errnum*/ ) cb_data[cb_num-1].symAddr = 0; } +void GetSymbolForOfflineResolve(void* address, uint64_t imageBaseAddress, CallstackEntry& cbEntry) +{ + // tagged with a string that we can identify as an unresolved symbol + cbEntry.name = CopyStringFast( "[unresolved]" ); + // set .so relative offset so it can be resolved offline + cbEntry.symAddr = (uint64_t)address - imageBaseAddress; + cbEntry.symLen = 0x0; + cbEntry.file = CopyStringFast( "[unknown]" ); + cbEntry.line = 0; +} + CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) { InitRpmalloc(); if( ptr >> 63 == 0 ) { - cb_num = 0; - backtrace_pcinfo( cb_bts, ptr, CallstackDataCb, CallstackErrorCb, nullptr ); - assert( cb_num > 0 ); - - backtrace_syminfo( cb_bts, ptr, SymInfoCallback, SymInfoError, nullptr ); + const char* imageName = nullptr; + uint64_t imageBaseAddress = 0x0; - const char* symloc = nullptr; +#ifdef TRACY_USE_IMAGE_CACHE + const auto* image = s_imageCache->GetImageForAddress((void*)ptr); + if( image ) + { + imageName = image->m_name; + imageBaseAddress = uint64_t(image->m_startAddress); + } +#else Dl_info dlinfo; - if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname; + if( dladdr( (void*)ptr, &dlinfo ) ) + { + imageName = dlinfo.dli_fname; + imageBaseAddress = uint64_t( dlinfo.dli_fbase ); + } +#endif + + if( s_shouldResolveSymbolsOffline ) + { + cb_num = 1; + GetSymbolForOfflineResolve( (void*)ptr, imageBaseAddress, cb_data[0] ); + } + else + { + cb_num = 0; + backtrace_pcinfo( cb_bts, ptr, CallstackDataCb, CallstackErrorCb, nullptr ); + assert( cb_num > 0 ); + + backtrace_syminfo( cb_bts, ptr, SymInfoCallback, SymInfoError, nullptr ); + } - return { cb_data, uint8_t( cb_num ), symloc ? symloc : "[unknown]" }; + return { cb_data, uint8_t( cb_num ), imageName ? imageName : "[unknown]" }; } #ifdef __linux else if( s_kernelSym ) { - auto it = std::lower_bound( s_kernelSym, s_kernelSym + s_kernelSymCnt, ptr, []( const KernelSymbol& lhs, const uint64_t& rhs ) { return lhs.addr > rhs; } ); + auto it = std::lower_bound( s_kernelSym, s_kernelSym + s_kernelSymCnt, ptr, []( const KernelSymbol& lhs, const uint64_t& rhs ) { return lhs.addr + lhs.size < rhs; } ); if( it != s_kernelSym + s_kernelSymCnt ) { cb_data[0].name = CopyStringFast( it->name ); cb_data[0].file = CopyStringFast( "" ); cb_data[0].line = 0; - cb_data[0].symLen = 0; + cb_data[0].symLen = it->size; cb_data[0].symAddr = it->addr; return { cb_data, 1, it->mod ? it->mod : "" }; } diff --git a/external/sources/tracy/public/client/TracyCallstack.hpp b/external/sources/tracy/public/client/TracyCallstack.hpp index 0b522b730c..1d8cd654f1 100644 --- a/external/sources/tracy/public/client/TracyCallstack.hpp +++ b/external/sources/tracy/public/client/TracyCallstack.hpp @@ -5,22 +5,28 @@ #include "../common/TracyForceInline.hpp" #include "TracyCallstack.h" -#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5 -# include -#elif TRACY_HAS_CALLSTACK >= 3 -# include -#endif - - #ifndef TRACY_HAS_CALLSTACK namespace tracy { -static tracy_force_inline void* Callstack( int depth ) { return nullptr; } +static constexpr bool has_callstack() { return false; } +static tracy_force_inline void* Callstack( int32_t /*depth*/ ) { return nullptr; } } #else +#if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 5 +# include +#elif TRACY_HAS_CALLSTACK >= 3 +# ifdef TRACY_LIBUNWIND_BACKTRACE + // libunwind is, in general, significantly faster than execinfo based backtraces +# define UNW_LOCAL_ONLY +# include +# else +# include +# endif +#endif + #ifdef TRACY_DEBUGINFOD # include #endif @@ -33,6 +39,8 @@ static tracy_force_inline void* Callstack( int depth ) { return nullptr; } namespace tracy { +static constexpr bool has_callstack() { return true; } + struct CallstackSymbolData { const char* file; @@ -74,11 +82,10 @@ debuginfod_client* GetDebuginfodClient(); extern "C" { - typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long ); - TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain; + TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void**, unsigned long, unsigned long ); } -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 && depth < 63 ); auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) ); @@ -107,7 +114,7 @@ static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, v return _URC_NO_REASON; } -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 && depth < 63 ); @@ -122,12 +129,18 @@ static tracy_force_inline void* Callstack( int depth ) #elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 ); auto trace = (uintptr_t*)tracy_malloc( ( 1 + (size_t)depth ) * sizeof( uintptr_t ) ); + +#ifdef TRACY_LIBUNWIND_BACKTRACE + size_t num = unw_backtrace( (void**)(trace+1), depth ); +#else const auto num = (size_t)backtrace( (void**)(trace+1), depth ); +#endif + *trace = num; return trace; diff --git a/external/sources/tracy/public/client/TracyKCore.cpp b/external/sources/tracy/public/client/TracyKCore.cpp new file mode 100644 index 0000000000..09d51d117a --- /dev/null +++ b/external/sources/tracy/public/client/TracyKCore.cpp @@ -0,0 +1,121 @@ +#ifdef __linux__ + +#include +#include +#include +#include +#include + +#include "TracyDebug.hpp" +#include "TracyKCore.hpp" +#include "../common/TracyAlloc.hpp" + +#if !defined(__GLIBC__) && !defined(__WORDSIZE) +// include __WORDSIZE headers for musl +# include +#endif + +namespace tracy +{ + +using elf_half = uint16_t; +using elf_word = uint32_t; +using elf_sword = int32_t; + +#if __WORDSIZE == 32 + using elf_addr = uint32_t; + using elf_off = uint32_t; + using elf_xword = uint32_t; +#else + using elf_addr = uint64_t; + using elf_off = uint64_t; + using elf_xword = uint64_t; +#endif + +struct elf_ehdr +{ + unsigned char e_ident[16]; + elf_half e_type; + elf_half e_machine; + elf_word e_version; + elf_addr e_entry; + elf_off e_phoff; + elf_off e_shoff; + elf_word e_flags; + elf_half e_ehsize; + elf_half e_phentsize; + elf_half e_phnum; + elf_half e_shentsize; + elf_half e_shnum; + elf_half e_shstrndx; +}; + +struct elf_phdr +{ + elf_word p_type; + elf_word p_flags; + elf_off p_offset; + elf_addr p_vaddr; + elf_addr p_paddr; + elf_xword p_filesz; + elf_xword p_memsz; + uint64_t p_align; // include 32-bit-only flags field for 32-bit compatibility +}; + +KCore::KCore() + : m_offsets( 16 ) +{ + m_fd = open( "/proc/kcore", O_RDONLY ); + if( m_fd == -1 ) return; + + elf_ehdr ehdr; + if( read( m_fd, &ehdr, sizeof( ehdr ) ) != sizeof( ehdr ) ) goto err; + + assert( ehdr.e_phentsize == sizeof( elf_phdr ) ); + + for( elf_half i=0; istart = phdr.p_vaddr; + ptr->size = phdr.p_memsz; + ptr->offset = phdr.p_offset; + } + + std::sort( m_offsets.begin(), m_offsets.end(), []( const Offset& lhs, const Offset& rhs ) { return lhs.start < rhs.start; } ); + TracyDebug( "KCore: %zu segments found\n", m_offsets.size() ); + return; + +err: + close( m_fd ); + m_fd = -1; +} + +KCore::~KCore() +{ + if( m_fd != -1 ) close( m_fd ); +} + +void* KCore::Retrieve( uint64_t addr, uint64_t size ) const +{ + if( m_fd == -1 ) return nullptr; + auto it = std::lower_bound( m_offsets.begin(), m_offsets.end(), addr, []( const Offset& lhs, uint64_t rhs ) { return lhs.start + lhs.size < rhs; } ); + if( it == m_offsets.end() ) return nullptr; + if( addr + size > it->start + it->size ) return nullptr; + if( lseek( m_fd, it->offset + addr - it->start, SEEK_SET ) == -1 ) return nullptr; + auto ptr = tracy_malloc( size ); + if( read( m_fd, ptr, size ) != ssize_t( size ) ) + { + tracy_free( ptr ); + return nullptr; + } + return ptr; +} + +} + +#endif \ No newline at end of file diff --git a/external/sources/tracy/public/client/TracyKCore.hpp b/external/sources/tracy/public/client/TracyKCore.hpp new file mode 100644 index 0000000000..437e172c23 --- /dev/null +++ b/external/sources/tracy/public/client/TracyKCore.hpp @@ -0,0 +1,37 @@ +#ifndef __TRACYKCORE_HPP__ +#define __TRACYKCORE_HPP__ + +#ifdef __linux__ + +#include + +#include "TracyFastVector.hpp" + +namespace tracy +{ + +class KCore +{ + struct Offset + { + uint64_t start; + uint64_t size; + uint64_t offset; + }; + +public: + KCore(); + ~KCore(); + + void* Retrieve( uint64_t addr, uint64_t size ) const; + +private: + int m_fd; + FastVector m_offsets; +}; + +} + +#endif + +#endif diff --git a/external/sources/tracy/public/client/TracyProfiler.cpp b/external/sources/tracy/public/client/TracyProfiler.cpp index ed580123a7..22830765e5 100644 --- a/external/sources/tracy/public/client/TracyProfiler.cpp +++ b/external/sources/tracy/public/client/TracyProfiler.cpp @@ -10,6 +10,9 @@ # include # include # include "../common/TracyUwp.hpp" +# ifndef _MSC_VER +# include +# endif #else # include # include @@ -45,6 +48,14 @@ # include #endif +#ifdef __QNX__ +# include +# include +# include +# include +# include +#endif + #include #include #include @@ -73,6 +84,10 @@ #include "TracySysTrace.hpp" #include "../tracy/TracyC.h" +#if defined TRACY_MANUAL_LIFETIME && !defined(TRACY_DELAYED_INIT) +# error "TRACY_MANUAL_LIFETIME requires enabled TRACY_DELAYED_INIT" +#endif + #ifdef TRACY_PORT # ifndef TRACY_DATA_PORT # define TRACY_DATA_PORT TRACY_PORT @@ -98,9 +113,12 @@ # include extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); +extern "C" typedef char* (WINAPI *t_WineGetVersion)(); +extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); #else # include # include +# include #endif #if defined __linux__ # include @@ -115,6 +133,10 @@ extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PR # include #endif +#ifdef __QNX__ +extern char* __progname; +#endif + namespace tracy { @@ -157,7 +179,11 @@ static std::vector ParseMappings() { uintptr_t start_addr; uintptr_t end_addr; +#if defined(__LP64__) if( sscanf( line, "%lx-%lx", &start_addr, &end_addr ) != 2 ) continue; +#else + if (sscanf( line, "%dx-%dx", &start_addr, &end_addr ) != 2 ) continue; +#endif char* first_space = strchr( line, ' ' ); if( !first_space ) continue; char* perm = first_space + 1; @@ -255,8 +281,19 @@ static bool EnsureReadable( uintptr_t address ) MappingInfo* mapping = LookUpMapping(address); return mapping && EnsureReadable( *mapping ); } - -#endif // defined __ANDROID__ +#elif defined WIN32 +static bool EnsureReadable( uintptr_t address ) +{ + MEMORY_BASIC_INFORMATION memInfo; + VirtualQuery( reinterpret_cast( address ), &memInfo, sizeof( memInfo ) ); + return memInfo.Protect != PAGE_NOACCESS; +} +#else +static bool EnsureReadable( uintptr_t address ) +{ + return true; +} +#endif #ifndef TRACY_DELAYED_INIT @@ -281,7 +318,7 @@ struct ThreadHandleWrapper static inline void CpuId( uint32_t* regs, uint32_t leaf ) { memset(regs, 0, sizeof(uint32_t) * 4); -#if defined _WIN32 +#if defined _MSC_VER __cpuidex( (int*)regs, leaf, 0 ); #else __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 ); @@ -400,6 +437,8 @@ static const char* GetProcessName() #elif defined __APPLE__ || defined BSD auto buf = getprogname(); if( buf ) processName = buf; +#elif defined __QNX__ + processName = __progname; #endif return processName; } @@ -437,6 +476,10 @@ static const char* GetProcessExecutablePath() static char buf[1024]; readlink( "/proc/curproc/exe", buf, 1024 ); return buf; +#elif defined __QNX__ + static char buf[_PC_PATH_MAX + 1]; + _cmdname(buf); + return buf; #else return nullptr; #endif @@ -488,7 +531,16 @@ static const char* GetHostInfo() # ifdef __MINGW32__ ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); # else - ptr += sprintf( ptr, "OS: Windows %i.%i.%i\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); + auto WineGetVersion = (t_WineGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_version" ); + auto WineGetBuildId = (t_WineGetBuildId)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_build_id" ); + if( WineGetVersion && WineGetBuildId ) + { + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu (Wine %s [%s])\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber, WineGetVersion(), WineGetBuildId() ); + } + else + { + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); + } # endif } #elif defined __linux__ @@ -515,6 +567,8 @@ static const char* GetHostInfo() ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" ); #elif defined __OpenBSD__ ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" ); +#elif defined __QNX__ + ptr += sprintf( ptr, "OS: QNX\n" ); #else ptr += sprintf( ptr, "OS: unknown\n" ); #endif @@ -687,6 +741,21 @@ static const char* GetHostInfo() size_t sz = sizeof( memSize ); sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 ); ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 ); +#elif defined __QNX__ + struct asinfo_entry *entries = SYSPAGE_ENTRY(asinfo); + size_t count = SYSPAGE_ENTRY_SIZE(asinfo) / sizeof(struct asinfo_entry); + char *strings = SYSPAGE_ENTRY(strings)->data; + + uint64_t memSize = 0; + size_t i; + for (i = 0; i < count; i++) { + struct asinfo_entry *entry = &entries[i]; + if (strcmp(strings + entry->name, "ram") == 0) { + memSize += entry->end - entry->start + 1; + } + } + memSize = memSize / 1024 / 1024; + ptr += sprintf( ptr, "RAM: %llu MB\n", memSize); #else ptr += sprintf( ptr, "RAM: unknown\n" ); #endif @@ -843,6 +912,13 @@ LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) } #endif +#if defined _WIN32 && !defined _MSC_VER +LONG WINAPI CrashFilterExecute( PEXCEPTION_POINTERS pExp ) +{ + return EXCEPTION_EXECUTE_HANDLER; +} +#endif + static Profiler* s_instance = nullptr; static Thread* s_thread; #ifndef TRACY_NO_FRAME_IMAGE @@ -1142,12 +1218,14 @@ thread_local bool RpThreadShutdown = false; # ifdef TRACY_MANUAL_LIFETIME ProfilerData* s_profilerData = nullptr; static ProfilerThreadData& GetProfilerThreadData(); +static std::atomic s_isProfilerStarted { false }; TRACY_API void StartupProfiler() { s_profilerData = (ProfilerData*)tracy_malloc( sizeof( ProfilerData ) ); new (s_profilerData) ProfilerData(); s_profilerData->profiler.SpawnWorkerThreads(); GetProfilerThreadData().token = ProducerWrapper( *s_profilerData ); + s_isProfilerStarted.store( true, std::memory_order_seq_cst ); } static ProfilerData& GetProfilerData() { @@ -1156,6 +1234,7 @@ static ProfilerData& GetProfilerData() } TRACY_API void ShutdownProfiler() { + s_isProfilerStarted.store( false, std::memory_order_seq_cst ); s_profilerData->~ProfilerData(); tracy_free( s_profilerData ); s_profilerData = nullptr; @@ -1163,6 +1242,10 @@ TRACY_API void ShutdownProfiler() RpThreadInitDone = false; RpInitDone.store( 0, std::memory_order_release ); } +TRACY_API bool IsProfilerStarted() +{ + return s_isProfilerStarted.load( std::memory_order_seq_cst ); +} # else static std::atomic profilerDataLock { 0 }; static std::atomic profilerData { nullptr }; @@ -1321,6 +1404,8 @@ TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } +constexpr static size_t SafeSendBufferSize = 65536; + Profiler::Profiler() : m_timeBegin( 0 ) , m_mainThread( detail::GetThreadHandleImpl() ) @@ -1375,6 +1460,11 @@ Profiler::Profiler() CalibrateDelay(); ReportTopology(); +#ifdef __linux__ + m_kcore = (KCore*)tracy_malloc( sizeof( KCore ) ); + new(m_kcore) KCore(); +#endif + #ifndef TRACY_NO_EXIT const char* noExitEnv = GetEnvVar( "TRACY_NO_EXIT" ); if( noExitEnv && noExitEnv[0] == '1' ) @@ -1389,15 +1479,99 @@ Profiler::Profiler() m_userPort = atoi( userPort ); } + m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); + +#ifndef _WIN32 + pipe(m_pipe); +# if defined __APPLE__ || defined BSD + // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default + m_pipeBufSize = 16384; +# else + m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; + while( fcntl( m_pipe[0], F_SETPIPE_SZ, m_pipeBufSize ) < 0 && errno == EPERM ) m_pipeBufSize /= 2; // too big; reduce + m_pipeBufSize = fcntl( m_pipe[0], F_GETPIPE_SZ ); +# endif + fcntl( m_pipe[1], F_SETFL, O_NONBLOCK ); +#endif + #if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) SpawnWorkerThreads(); #endif } +void Profiler::InstallCrashHandler() +{ + +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER + struct sigaction threadFreezer = {}; + threadFreezer.sa_handler = ThreadFreezer; + sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); + + struct sigaction crashHandler = {}; + crashHandler.sa_sigaction = CrashHandler; + crashHandler.sa_flags = SA_SIGINFO; + sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); + sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); + sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); + sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); + sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); + sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); +#endif + +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only + // want to catch unhandled exceptions. + m_prevHandler = reinterpret_cast( SetUnhandledExceptionFilter( CrashFilter ) ); +#endif + +#ifndef TRACY_NO_CRASH_HANDLER + m_crashHandlerInstalled = true; +#endif + +} + +void Profiler::RemoveCrashHandler() +{ +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + if( m_crashHandlerInstalled ) + { + auto prev = SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER)m_prevHandler ); + if( prev != CrashFilter ) SetUnhandledExceptionFilter( prev ); // A different exception filter was installed over ours => put it back + } +#endif + +#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER + if( m_crashHandlerInstalled ) + { + auto restore = []( int signum, struct sigaction* prev ) { + struct sigaction old; + sigaction( signum, prev, &old ); + if( old.sa_sigaction != CrashHandler ) sigaction( signum, &old, nullptr ); // A different signal handler was installed over ours => put it back + }; + restore( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr ); + restore( SIGILL, &m_prevSignal.ill ); + restore( SIGFPE, &m_prevSignal.fpe ); + restore( SIGSEGV, &m_prevSignal.segv ); + restore( SIGPIPE, &m_prevSignal.pipe ); + restore( SIGBUS, &m_prevSignal.bus ); + restore( SIGABRT, &m_prevSignal.abrt ); + } +#endif + m_crashHandlerInstalled = false; +} + void Profiler::SpawnWorkerThreads() { #ifdef TRACY_HAS_SYSTEM_TRACING - if( SysTraceStart( m_samplingPeriod ) ) + // use TRACY_NO_SYS_TRACE=1 to force disabling sys tracing (even if available in the underlying system) + // as it can have significant impact on the size of the traces + const char* noSysTrace = GetEnvVar( "TRACY_NO_SYS_TRACE" ); + const bool disableSystrace = (noSysTrace && noSysTrace[0] == '1'); + if( disableSystrace ) + { + TracyDebug("TRACY: Sys Trace was disabled by 'TRACY_NO_SYS_TRACE=1'\n"); + } + else if( SysTraceStart( m_samplingPeriod ) ) { s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_sysTraceThread) Thread( SysTraceWorker, nullptr ); @@ -1423,27 +1597,6 @@ void Profiler::SpawnWorkerThreads() # ifdef TRACY_HAS_CALLSTACK s_symbolThreadId = GetThreadId( s_symbolThread->Handle() ); # endif - m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter ); -#endif - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - struct sigaction threadFreezer = {}; - threadFreezer.sa_handler = ThreadFreezer; - sigaction( TRACY_CRASH_SIGNAL, &threadFreezer, &m_prevSignal.pwr ); - - struct sigaction crashHandler = {}; - crashHandler.sa_sigaction = CrashHandler; - crashHandler.sa_flags = SA_SIGINFO; - sigaction( SIGILL, &crashHandler, &m_prevSignal.ill ); - sigaction( SIGFPE, &crashHandler, &m_prevSignal.fpe ); - sigaction( SIGSEGV, &crashHandler, &m_prevSignal.segv ); - sigaction( SIGPIPE, &crashHandler, &m_prevSignal.pipe ); - sigaction( SIGBUS, &crashHandler, &m_prevSignal.bus ); - sigaction( SIGABRT, &crashHandler, &m_prevSignal.abrt ); -#endif - -#ifndef TRACY_NO_CRASH_HANDLER - m_crashHandlerInstalled = true; #endif #ifdef TRACY_HAS_CALLSTACK @@ -1457,22 +1610,7 @@ Profiler::~Profiler() { m_shutdown.store( true, std::memory_order_relaxed ); -#if defined _WIN32 && !defined TRACY_UWP - if( m_crashHandlerInstalled ) RemoveVectoredExceptionHandler( m_exceptionHandler ); -#endif - -#if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER - if( m_crashHandlerInstalled ) - { - sigaction( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr, nullptr ); - sigaction( SIGILL, &m_prevSignal.ill, nullptr ); - sigaction( SIGFPE, &m_prevSignal.fpe, nullptr ); - sigaction( SIGSEGV, &m_prevSignal.segv, nullptr ); - sigaction( SIGPIPE, &m_prevSignal.pipe, nullptr ); - sigaction( SIGBUS, &m_prevSignal.bus, nullptr ); - sigaction( SIGABRT, &m_prevSignal.abrt, nullptr ); - } -#endif + RemoveCrashHandler(); #ifdef TRACY_HAS_SYSTEM_TRACING if( s_sysTraceThread ) @@ -1500,6 +1638,17 @@ Profiler::~Profiler() EndCallstack(); #endif +#ifdef __linux__ + m_kcore->~KCore(); + tracy_free( m_kcore ); +#endif + +#ifndef _WIN32 + close( m_pipe[0] ); + close( m_pipe[1] ); +#endif + tracy_free( m_safeSendBuffer ); + tracy_free( m_lz4Buf ); tracy_free( m_buffer ); LZ4_freeStream( (LZ4_stream_t*)m_stream ); @@ -1677,6 +1826,12 @@ void Profiler::Worker() new(m_broadcast) UdpBroadcast(); # ifdef TRACY_ONLY_LOCALHOST const char* addr = "127.255.255.255"; +# elif defined TRACY_CLIENT_ADDRESS + const char* addr = TRACY_CLIENT_ADDRESS; +# elif defined __QNX__ + // global broadcast address of 255.255.255.255 is not well-supported by QNX, + // use the interface broadcast address instead, e.g. "const char* addr = 192.168.1.255;" +# error Need to specify TRACY_CLIENT_ADDRESS for a QNX target. # else const char* addr = "255.255.255.255"; # endif @@ -1789,6 +1944,7 @@ void Profiler::Worker() m_connectionId.fetch_add( 1, std::memory_order_release ); #endif m_isConnected.store( true, std::memory_order_release ); + InstallCrashHandler(); HandshakeStatus handshake = HandshakeWelcome; m_sock->Send( &handshake, sizeof( handshake ) ); @@ -1891,6 +2047,8 @@ void Profiler::Worker() if( ShouldExit() ) break; m_isConnected.store( false, std::memory_order_release ); + RemoveCrashHandler(); + #ifdef TRACY_ON_DEMAND m_bufferOffset = 0; m_bufferStart = 0; @@ -2718,6 +2876,15 @@ Profiler::DequeueStatus Profiler::DequeueSerial() MemWrite( &item->memFree.time, dt ); break; } + case QueueType::MemDiscard: + case QueueType::MemDiscardCallstack: + { + int64_t t = MemRead( &item->memDiscard.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->memDiscard.time, dt ); + break; + } case QueueType::GpuZoneBeginSerial: case QueueType::GpuZoneBeginCallstackSerial: { @@ -2954,6 +3121,66 @@ bool Profiler::CommitData() return ret; } +char* Profiler::SafeCopyProlog( const char* data, size_t size ) +{ + bool success = true; + char* buf = m_safeSendBuffer; +#ifndef NDEBUG + assert( !m_inUse.exchange(true) ); +#endif + + if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); + +#ifdef _WIN32 +# ifdef _MSC_VER + __try + { + memcpy( buf, data, size ); + } + __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) + { + success = false; + } +# else + memcpy( buf, data, size ); +# endif +#else + // Send through the pipe to ensure safe reads + for( size_t offset = 0; offset != size; /*in loop*/ ) + { + size_t sendsize = size - offset; + ssize_t result1, result2; + while( ( result1 = write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno == EINTR ) { /* retry */ } + if( result1 < 0 ) + { + success = false; + break; + } + while( ( result2 = read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno == EINTR ) { /* retry */ } + if( result2 != result1 ) + { + success = false; + break; + } + offset += result1; + } +#endif + + if( success ) return buf; + + SafeCopyEpilog( buf ); + return nullptr; +} + +void Profiler::SafeCopyEpilog( char* buf ) +{ + if( buf != m_safeSendBuffer ) tracy_free( buf ); + +#ifndef NDEBUG + m_inUse.store( false ); +#endif +} + bool Profiler::SendData( const char* data, size_t len ) { const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 ); @@ -3280,6 +3507,17 @@ void Profiler::HandleSymbolQueueItem( const SymbolQueueItem& si ) } } } +#elif defined __linux__ + void* data = m_kcore->Retrieve( si.ptr, si.extra ); + if( data ) + { + TracyLfqPrepare( QueueType::SymbolCodeMetadata ); + MemWrite( &item->symbolCodeMetadata.symbol, si.ptr ); + MemWrite( &item->symbolCodeMetadata.ptr, (uint64_t)data ); + MemWrite( &item->symbolCodeMetadata.size, (uint32_t)si.extra ); + TracyLfqCommit; + break; + } #endif TracyLfqPrepare( QueueType::AckSymbolCodeNotAvailable ); TracyLfqCommit; @@ -3365,7 +3603,22 @@ bool Profiler::HandleServerQuery() } else { - SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName ); + auto t = GetThreadNameData( (uint32_t)ptr ); + if( t ) + { + SendString( ptr, t->name, QueueType::ThreadName ); + if( t->groupHint != 0 ) + { + TracyLfqPrepare( QueueType::ThreadGroupHint ); + MemWrite( &item->threadGroupHint.thread, (uint32_t)ptr ); + MemWrite( &item->threadGroupHint.groupHint, t->groupHint ); + TracyLfqCommit; + } + } + else + { + SendString( ptr, GetThreadName( (uint32_t)ptr ), QueueType::ThreadName ); + } } break; case ServerQuerySourceLocation: @@ -3603,6 +3856,7 @@ void Profiler::ReportTopology() struct CpuData { uint32_t package; + uint32_t die; uint32_t core; uint32_t thread; }; @@ -3615,23 +3869,55 @@ void Profiler::ReportTopology() # endif if( !_GetLogicalProcessorInformationEx ) return; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* packageInfo = nullptr; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* dieInfo = nullptr; + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* coreInfo = nullptr; + DWORD psz = 0; _GetLogicalProcessorInformationEx( RelationProcessorPackage, nullptr, &psz ); - auto packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); - auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); - assert( res ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + packageInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( psz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorPackage, packageInfo, &psz ); + assert( res ); + } + else + { + psz = 0; + } + + DWORD dsz = 0; + _GetLogicalProcessorInformationEx( RelationProcessorDie, nullptr, &dsz ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + dieInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( dsz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorDie, dieInfo, &dsz ); + assert( res ); + } + else + { + dsz = 0; + } DWORD csz = 0; _GetLogicalProcessorInformationEx( RelationProcessorCore, nullptr, &csz ); - auto coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); - res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); - assert( res ); + if( GetLastError() == ERROR_INSUFFICIENT_BUFFER ) + { + coreInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)tracy_malloc( csz ); + auto res = _GetLogicalProcessorInformationEx( RelationProcessorCore, coreInfo, &csz ); + assert( res ); + } + else + { + csz = 0; + } SYSTEM_INFO sysinfo; GetSystemInfo( &sysinfo ); const uint32_t numcpus = sysinfo.dwNumberOfProcessors; auto cpuData = (CpuData*)tracy_malloc( sizeof( CpuData ) * numcpus ); + memset( cpuData, 0, sizeof( CpuData ) * numcpus ); for( uint32_t i=0; iRelationship == RelationProcessorDie ); + // FIXME account for GroupCount + auto mask = ptr->Processor.GroupMask[0].Mask; + int core = 0; + while( mask != 0 ) + { + if( mask & 1 ) cpuData[core].die = idx; + core++; + mask >>= 1; + } + ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); + idx++; + } + idx = 0; ptr = coreInfo; while( (char*)ptr < ((char*)coreInfo) + csz ) @@ -3676,6 +3980,7 @@ void Profiler::ReportTopology() TracyLfqPrepare( QueueType::CpuTopology ); MemWrite( &item->cpuTopology.package, data.package ); + MemWrite( &item->cpuTopology.die, data.die ); MemWrite( &item->cpuTopology.core, data.core ); MemWrite( &item->cpuTopology.thread, data.thread ); @@ -3711,12 +4016,26 @@ void Profiler::ReportTopology() fclose( f ); cpuData[i].package = uint32_t( atoi( buf ) ); cpuData[i].thread = i; + sprintf( path, "%s%i/topology/core_id", basePath, i ); f = fopen( path, "rb" ); - read = fread( buf, 1, 1024, f ); - buf[read] = '\0'; - fclose( f ); - cpuData[i].core = uint32_t( atoi( buf ) ); + if( f ) + { + read = fread( buf, 1, 1024, f ); + buf[read] = '\0'; + fclose( f ); + cpuData[i].core = uint32_t( atoi( buf ) ); + } + + sprintf( path, "%s%i/topology/die_id", basePath, i ); + f = fopen( path, "rb" ); + if( f ) + { + read = fread( buf, 1, 1024, f ); + buf[read] = '\0'; + fclose( f ); + cpuData[i].die = uint32_t( atoi( buf ) ); + } } for( int i=0; icpuTopology.package, data.package ); + MemWrite( &item->cpuTopology.die, data.die ); MemWrite( &item->cpuTopology.core, data.core ); MemWrite( &item->cpuTopology.thread, data.thread ); @@ -3740,7 +4060,7 @@ void Profiler::ReportTopology() #endif } -void Profiler::SendCallstack( int depth, const char* skipBefore ) +void Profiler::SendCallstack( int32_t depth, const char* skipBefore ) { #ifdef TRACY_HAS_CALLSTACK auto ptr = Callstack( depth ); @@ -3815,44 +4135,41 @@ void Profiler::HandleSymbolCodeQuery( uint64_t symbol, uint32_t size ) } else { -#ifdef __ANDROID__ - // On Android it's common for code to be in mappings that are only executable - // but not readable. - if( !EnsureReadable( symbol ) ) - { - AckSymbolCodeNotAvailable(); - return; - } -#endif - SendLongString( symbol, (const char*)symbol, size, QueueType::SymbolCode ); + auto&& lambda = [ this, symbol ]( const char* buf, size_t size ) { + SendLongString( symbol, buf, size, QueueType::SymbolCode ); + }; + + // 'symbol' may have come from a module that has since unloaded, perform a safe copy before sending + if( !WithSafeCopy( (const char*)symbol, size, lambda ) ) AckSymbolCodeNotAvailable(); } } void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) { bool ok = false; - struct stat st; - if( stat( data, &st ) == 0 && (uint64_t)st.st_mtime < m_exectime ) + FILE* f = fopen( data, "rb" ); + if( f ) { - if( st.st_size < ( TargetFrameSize - 16 ) ) + struct stat st; + if( fstat( fileno( f ), &st ) == 0 && (uint64_t)st.st_mtime < m_exectime && st.st_size < ( TargetFrameSize - 16 ) ) { - FILE* f = fopen( data, "rb" ); - if( f ) + auto ptr = (char*)tracy_malloc_fast( st.st_size ); + auto rd = fread( ptr, 1, st.st_size, f ); + if( rd == (size_t)st.st_size ) { - auto ptr = (char*)tracy_malloc_fast( st.st_size ); - auto rd = fread( ptr, 1, st.st_size, f ); - fclose( f ); - if( rd == (size_t)st.st_size ) - { - TracyLfqPrepare( QueueType::SourceCodeMetadata ); - MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); - MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); - MemWrite( &item->sourceCodeMetadata.id, id ); - TracyLfqCommit; - ok = true; - } + TracyLfqPrepare( QueueType::SourceCodeMetadata ); + MemWrite( &item->sourceCodeMetadata.ptr, (uint64_t)ptr ); + MemWrite( &item->sourceCodeMetadata.size, (uint32_t)rd ); + MemWrite( &item->sourceCodeMetadata.id, id ); + TracyLfqCommit; + ok = true; + } + else + { + tracy_free_fast( ptr ); } } + fclose( f ); } #ifdef TRACY_DEBUGINFOD @@ -3882,6 +4199,10 @@ void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) TracyLfqCommit; ok = true; } + else + { + tracy_free_fast( ptr ); + } } close( d ); } @@ -3908,6 +4229,10 @@ void Profiler::HandleSourceCodeQuery( char* data, char* image, uint32_t id ) TracyLfqCommit; ok = true; } + else + { + tracy_free_fast( ptr ); + } } } @@ -3937,7 +4262,7 @@ int64_t Profiler::GetTimeQpc() extern "C" { #endif -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -3965,7 +4290,7 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_l return ctx; } -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -3984,17 +4309,21 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___trac TracyQueueCommitC( zoneValidationThread ); } #endif - tracy::GetProfiler().SendCallstack( depth ); + auto zoneQueue = tracy::QueueType::ZoneBegin; + if( depth > 0 && tracy::has_callstack() ) { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginCallstack ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); + tracy::GetProfiler().SendCallstack( depth ); + zoneQueue = tracy::QueueType::ZoneBeginCallstack; } + TracyQueuePrepareC( zoneQueue ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); + TracyQueueCommitC( zoneBeginThread ); + return ctx; } -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -4026,7 +4355,7 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int act return ctx; } -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -4049,13 +4378,17 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srclo TracyQueueCommitC( zoneValidationThread ); } #endif - tracy::GetProfiler().SendCallstack( depth ); + auto zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLoc; + if( depth > 0 && tracy::has_callstack() ) { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); + tracy::GetProfiler().SendCallstack( depth ); + zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLocCallstack; } + TracyQueuePrepareC( zoneQueue ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommitC( zoneBeginThread ); + return ctx; } @@ -4153,34 +4486,86 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) } } -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ) { tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ) { tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); + } + else + { + tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); + } + else + { + tracy::Profiler::MemFree( ptr, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ) { tracy::Profiler::MemDiscard( name, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemDiscardCallstack( name, secure != 0, depth ); + } + else + { + tracy::Profiler::MemDiscard( name, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); + } + else + { + tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); + } +} +TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); + } + else + { + tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); + } +} TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); } +TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip != 0 ); } TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step, fill, color ); } -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); } -TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); } -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); } -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } +TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step != 0, fill != 0, color ); } +TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ) { tracy::Profiler::Message( txt, size, callstack_depth ); } +TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ) { tracy::Profiler::Message( txt, callstack_depth ); } +TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, size, color, callstack_depth ); } +TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, color, callstack_depth ); } TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, color ); } -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) { - return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); } TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data data ) @@ -4258,6 +4643,11 @@ TRACY_API void ___tracy_emit_gpu_new_context( ___tracy_gpu_new_context_data data tracy::MemWrite( &item->gpuNewContext.context, data.context ); tracy::MemWrite( &item->gpuNewContext.flags, data.flags ); tracy::MemWrite( &item->gpuNewContext.type, data.type ); + +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + TracyLfqCommitC; } @@ -4270,6 +4660,11 @@ TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context tracy::MemWrite( &item->gpuContextNameFat.context, data.context ); tracy::MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); tracy::MemWrite( &item->gpuContextNameFat.size, data.len ); + +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + TracyLfqCommitC; } @@ -4283,6 +4678,15 @@ TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibrat TracyLfqCommitC; } +TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data data ) +{ + TracyLfqPrepareC( tracy::QueueType::GpuTimeSync ); + tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTimeSync.context, data.context ); + TracyLfqCommitC; +} + TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data data ) { auto item = tracy::Profiler::QueueSerial(); @@ -4390,17 +4794,196 @@ TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_c tracy::Profiler::QueueSerialFinish(); } -TRACY_API int ___tracy_connected( void ) +TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data data ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::GpuTimeSync ); + tracy::MemWrite( &item->gpuTimeSync.cpuTime, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->gpuTimeSync.gpuTime, data.gpuTime ); + tracy::MemWrite( &item->gpuTimeSync.context, data.context ); + tracy::Profiler::QueueSerialFinish(); +} + +struct __tracy_lockable_context_data +{ + uint32_t m_id; +#ifdef TRACY_ON_DEMAND + std::atomic m_lockCount; + std::atomic m_active; +#endif +}; + +TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ) +{ + struct __tracy_lockable_context_data *lockdata = (__tracy_lockable_context_data*)tracy::tracy_malloc( sizeof( __tracy_lockable_context_data ) ); + lockdata->m_id =tracy:: GetLockCounter().fetch_add( 1, std::memory_order_relaxed ); +#ifdef TRACY_ON_DEMAND + new(&lockdata->m_lockCount) std::atomic( 0 ); + new(&lockdata->m_active) std::atomic( false ); +#endif + assert( lockdata->m_id != (std::numeric_limits::max)() ); + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockAnnounce ); + tracy::MemWrite( &item->lockAnnounce.id, lockdata->m_id ); + tracy::MemWrite( &item->lockAnnounce.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); + tracy::MemWrite( &item->lockAnnounce.type, tracy::LockType::Lockable ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); + + return lockdata; +} + +TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockTerminate ); + tracy::MemWrite( &item->lockTerminate.id, lockdata->m_id ); + tracy::MemWrite( &item->lockTerminate.time, tracy::Profiler::GetTime() ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); + +#ifdef TRACY_ON_DEMAND + lockdata->m_lockCount.~atomic(); + lockdata->m_active.~atomic(); +#endif + tracy::tracy_free((void*)lockdata); +} + +TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ +#ifdef TRACY_ON_DEMAND + bool queue = false; + const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( locks == 0 || active ) + { + const bool connected = tracy::GetProfiler().IsConnected(); + if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); + if( connected ) queue = true; + } + if( !queue ) return static_cast(false); +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockWait ); + tracy::MemWrite( &item->lockWait.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockWait.id, lockdata->m_id ); + tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); + return static_cast(true); +} + +TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); + tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); + tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +{ +#ifdef TRACY_ON_DEMAND + lockdata->m_lockCount.fetch_sub( 1, std::memory_order_relaxed ); + if( !lockdata->m_active.load( std::memory_order_relaxed ) ) return; + if( !tracy::GetProfiler().IsConnected() ) + { + lockdata->m_active.store( false, std::memory_order_relaxed ); + return; + } +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockRelease ); + tracy::MemWrite( &item->lockRelease.id, lockdata->m_id ); + tracy::MemWrite( &item->lockRelease.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ) +{ +#ifdef TRACY_ON_DEMAND + if( !acquired ) return; + + bool queue = false; + const auto locks = lockdata->m_lockCount.fetch_add( 1, std::memory_order_relaxed ); + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( locks == 0 || active ) + { + const bool connected = tracy::GetProfiler().IsConnected(); + if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); + if( connected ) queue = true; + } + if( !queue ) return; +#endif + + if( acquired ) + { + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockObtain ); + tracy::MemWrite( &item->lockObtain.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockObtain.id, lockdata->m_id ); + tracy::MemWrite( &item->lockObtain.time, tracy::Profiler::GetTime() ); + tracy::Profiler::QueueSerialFinish(); + } +} + +TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ) +{ +#ifdef TRACY_ON_DEMAND + const auto active = lockdata->m_active.load( std::memory_order_relaxed ); + if( !active ) return; + const auto connected = tracy::GetProfiler().IsConnected(); + if( !connected ) + { + if( active ) lockdata->m_active.store( false, std::memory_order_relaxed ); + return; + } +#endif + + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockMark ); + tracy::MemWrite( &item->lockMark.thread, tracy::GetThreadHandle() ); + tracy::MemWrite( &item->lockMark.id, lockdata->m_id ); + tracy::MemWrite( &item->lockMark.srcloc, (uint64_t)srcloc ); + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ) +{ + assert( nameSz < (std::numeric_limits::max)() ); + auto ptr = (char*)tracy::tracy_malloc( nameSz ); + memcpy( ptr, name, nameSz ); + auto item = tracy::Profiler::QueueSerial(); + tracy::MemWrite( &item->hdr.type, tracy::QueueType::LockName ); + tracy::MemWrite( &item->lockNameFat.id, lockdata->m_id ); + tracy::MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); + tracy::MemWrite( &item->lockNameFat.size, (uint16_t)nameSz ); +#ifdef TRACY_ON_DEMAND + tracy::GetProfiler().DeferItem( *item ); +#endif + tracy::Profiler::QueueSerialFinish(); +} + +TRACY_API int32_t ___tracy_connected( void ) { - return tracy::GetProfiler().IsConnected(); + return static_cast( tracy::GetProfiler().IsConnected() ); } #ifdef TRACY_FIBERS -TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber ); } +TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::EnterFiber( fiber, 0 ); } TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } #endif -# ifdef TRACY_MANUAL_LIFETIME +# if defined TRACY_MANUAL_LIFETIME && defined TRACY_DELAYED_INIT TRACY_API void ___tracy_startup_profiler( void ) { tracy::StartupProfiler(); @@ -4410,6 +4993,11 @@ TRACY_API void ___tracy_shutdown_profiler( void ) { tracy::ShutdownProfiler(); } + +TRACY_API int32_t ___tracy_profiler_started( void ) +{ + return static_cast( tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ) ); +} # endif #ifdef __cplusplus diff --git a/external/sources/tracy/public/client/TracyProfiler.hpp b/external/sources/tracy/public/client/TracyProfiler.hpp index e3b256dfa6..8d16905860 100644 --- a/external/sources/tracy/public/client/TracyProfiler.hpp +++ b/external/sources/tracy/public/client/TracyProfiler.hpp @@ -10,6 +10,7 @@ #include "tracy_concurrentqueue.h" #include "tracy_SPSCQueue.h" #include "TracyCallstack.hpp" +#include "TracyKCore.hpp" #include "TracySysPower.hpp" #include "TracySysTime.hpp" #include "TracyFastVector.hpp" @@ -27,7 +28,7 @@ # include #endif -#if ( defined _WIN32 || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) ) +#if ( (defined _WIN32 && !(defined _M_ARM64 || defined _M_ARM)) || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) ) # define TRACY_HW_TIMER #endif @@ -51,6 +52,10 @@ namespace tracy #if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME) TRACY_API void StartupProfiler(); TRACY_API void ShutdownProfiler(); +TRACY_API bool IsProfilerStarted(); +# define TracyIsStarted tracy::IsProfilerStarted() +#else +# define TracyIsStarted true #endif class GpuCtx; @@ -109,11 +114,11 @@ struct LuaZoneState #define TracyLfqPrepare( _type ) \ - moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ - auto __token = GetToken(); \ + tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ + auto __token = tracy::GetToken(); \ auto& __tail = __token->get_tail_index(); \ auto item = __token->enqueue_begin( __magic ); \ - MemWrite( &item->hdr.type, _type ); + tracy::MemWrite( &item->hdr.type, _type ); #define TracyLfqCommit \ __tail.store( __magic + 1, std::memory_order_release ); @@ -131,11 +136,11 @@ struct LuaZoneState #ifdef TRACY_FIBERS # define TracyQueuePrepare( _type ) \ - auto item = Profiler::QueueSerial(); \ - MemWrite( &item->hdr.type, _type ); + auto item = tracy::Profiler::QueueSerial(); \ + tracy::MemWrite( &item->hdr.type, _type ); # define TracyQueueCommit( _name ) \ - MemWrite( &item->_name.thread, GetThreadHandle() ); \ - Profiler::QueueSerialFinish(); + tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \ + tracy::Profiler::QueueSerialFinish(); # define TracyQueuePrepareC( _type ) \ auto item = tracy::Profiler::QueueSerial(); \ tracy::MemWrite( &item->hdr.type, _type ); @@ -382,58 +387,58 @@ class Profiler TracyLfqCommit; } - static tracy_force_inline void Message( const char* txt, size_t size, int callstack ) + static tracy_force_inline void Message( const char* txt, size_t size, int32_t callstack_depth ) { assert( size < (std::numeric_limits::max)() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - TracyQueuePrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::Message : QueueType::MessageCallstack ); MemWrite( &item->messageFat.time, GetTime() ); MemWrite( &item->messageFat.text, (uint64_t)ptr ); MemWrite( &item->messageFat.size, (uint16_t)size ); TracyQueueCommit( messageFatThread ); } - static tracy_force_inline void Message( const char* txt, int callstack ) + static tracy_force_inline void Message( const char* txt, int32_t callstack_depth ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } - TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); MemWrite( &item->messageLiteral.time, GetTime() ); MemWrite( &item->messageLiteral.text, (uint64_t)txt ); TracyQueueCommit( messageLiteralThread ); } - static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack ) + static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { assert( size < (std::numeric_limits::max)() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); MemWrite( &item->messageColorFat.time, GetTime() ); MemWrite( &item->messageColorFat.text, (uint64_t)ptr ); MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) ); @@ -443,17 +448,17 @@ class Profiler TracyQueueCommit( messageColorFatThread ); } - static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack ) + static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int32_t callstack_depth ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } - TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); MemWrite( &item->messageColorLiteral.time, GetTime() ); MemWrite( &item->messageColorLiteral.text, (uint64_t)txt ); MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) ); @@ -505,29 +510,31 @@ class Profiler GetProfiler().m_serialLock.unlock(); } - static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure ) + static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int32_t depth, bool secure ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - MemAlloc( ptr, size, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); + profiler.m_serialLock.unlock(); + } + else + { + MemAlloc( ptr, size, secure ); + } } - static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure ) + static tracy_force_inline void MemFreeCallstack( const void* ptr, int32_t depth, bool secure ) { if( secure && !ProfilerAvailable() ) return; if( !ProfilerAllocatorAvailable() ) @@ -535,23 +542,25 @@ class Profiler MemFree( ptr, secure ); return; } -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - MemFree( ptr, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); + profiler.m_serialLock.unlock(); + } + else + { + MemFree( ptr, secure ); + } } static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name ) @@ -582,64 +591,101 @@ class Profiler GetProfiler().m_serialLock.unlock(); } - static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ) + static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int32_t depth, bool secure, const char* name ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemName( name ); - SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - static_cast(name); // unused - MemAlloc( ptr, size, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemName( name ); + SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size ); + profiler.m_serialLock.unlock(); + } + else + { + MemAllocNamed( ptr, size, secure, name ); + } } - static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ) + static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int32_t depth, bool secure, const char* name ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemName( name ); - SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - static_cast(name); // unused - MemFree( ptr, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemName( name ); + SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr ); + profiler.m_serialLock.unlock(); + } + else + { + MemFreeNamed( ptr, secure, name ); + } } - static tracy_force_inline void SendCallstack( int depth ) + static tracy_force_inline void MemDiscard( const char* name, bool secure ) { -#ifdef TRACY_HAS_CALLSTACK - auto ptr = Callstack( depth ); - TracyQueuePrepare( QueueType::Callstack ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - TracyQueueCommit( callstackFatThread ); -#else - static_cast(depth); // unused + if( secure && !ProfilerAvailable() ) return; +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; #endif + const auto thread = GetThreadHandle(); + + GetProfiler().m_serialLock.lock(); + SendMemDiscard( QueueType::MemDiscard, thread, name ); + GetProfiler().m_serialLock.unlock(); + } + + static tracy_force_inline void MemDiscardCallstack( const char* name, bool secure, int32_t depth ) + { + if( secure && !ProfilerAvailable() ) return; + if( depth > 0 && has_callstack() ) + { +# ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +# endif + const auto thread = GetThreadHandle(); + + auto callstack = Callstack( depth ); + + GetProfiler().m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemDiscard( QueueType::MemDiscard, thread, name ); + GetProfiler().m_serialLock.unlock(); + } + else + { + MemDiscard( name, secure ); + } + } + + static tracy_force_inline void SendCallstack( int32_t depth ) + { + if( depth > 0 && has_callstack() ) + { + auto ptr = Callstack( depth ); + TracyQueuePrepare( QueueType::Callstack ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); + TracyQueueCommit( callstackFatThread ); + } } static tracy_force_inline void ParameterRegister( ParameterCallback cb, void* data ) @@ -672,11 +718,12 @@ class Profiler } #ifdef TRACY_FIBERS - static tracy_force_inline void EnterFiber( const char* fiber ) + static tracy_force_inline void EnterFiber( const char* fiber, int32_t groupHint ) { TracyQueuePrepare( QueueType::FiberEnter ); MemWrite( &item->fiberEnter.time, GetTime() ); MemWrite( &item->fiberEnter.fiber, (uint64_t)fiber ); + MemWrite( &item->fiberEnter.groupHint, groupHint ); TracyQueueCommit( fiberEnter ); } @@ -688,7 +735,7 @@ class Profiler } #endif - void SendCallstack( int depth, const char* skipBefore ); + void SendCallstack( int32_t depth, const char* skipBefore ); static void CutCallstack( void* callstack, const char* skipBefore ); static bool ShouldExit(); @@ -741,29 +788,29 @@ class Profiler // 1b null terminator // nsz zone name (optional) - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, function, nullptr, 0 ); + return AllocSourceLocation( line, source, function, nullptr, 0, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz ); + return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color = 0 ) { - return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 ); + return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0, color ); } - static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color = 0 ) { const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz ); assert( sz32 <= (std::numeric_limits::max)() ); const auto sz = uint16_t( sz32 ); auto ptr = (char*)tracy_malloc( sz ); memcpy( ptr, &sz, 2 ); - memset( ptr + 2, 0, 4 ); + memcpy( ptr + 2, &color, 4 ); memcpy( ptr + 6, &line, 4 ); memcpy( ptr + 10, function, functionSz ); ptr[10 + functionSz] = '\0'; @@ -794,6 +841,9 @@ class Profiler void HandleSymbolQueueItem( const SymbolQueueItem& si ); #endif + void InstallCrashHandler(); + void RemoveCrashHandler(); + void ClearQueues( tracy::moodycamel::ConsumerToken& token ); void ClearSerial(); DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token ); @@ -826,6 +876,21 @@ class Profiler m_bufferOffset += int( len ); } + char* SafeCopyProlog( const char* p, size_t size ); + void SafeCopyEpilog( char* buf ); + + template // must be void( const char* buf, size_t size ) + bool WithSafeCopy( const char* p, size_t size, Callable&& callable ) + { + if( char* buf = SafeCopyProlog( p, size ) ) + { + callable( buf, size ); + SafeCopyEpilog( buf ); + return true; + } + return false; + } + bool SendData( const char* data, size_t len ); void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type ); void SendSourceLocation( uint64_t ptr ); @@ -855,14 +920,13 @@ class Profiler static tracy_force_inline void SendCallstackSerial( void* ptr ) { -#ifdef TRACY_HAS_CALLSTACK - auto item = GetProfiler().m_serialQueue.prepare_next(); - MemWrite( &item->hdr.type, QueueType::CallstackSerial ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - GetProfiler().m_serialQueue.commit_next(); -#else - static_cast(ptr); // unused -#endif + if( has_callstack() ) + { + auto item = GetProfiler().m_serialQueue.prepare_next(); + MemWrite( &item->hdr.type, QueueType::CallstackSerial ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); + GetProfiler().m_serialQueue.commit_next(); + } } static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size ) @@ -900,6 +964,18 @@ class Profiler GetProfiler().m_serialQueue.commit_next(); } + static tracy_force_inline void SendMemDiscard( QueueType type, const uint32_t thread, const char* name ) + { + assert( type == QueueType::MemDiscard || type == QueueType::MemDiscardCallstack ); + + auto item = GetProfiler().m_serialQueue.prepare_next(); + MemWrite( &item->hdr.type, type ); + MemWrite( &item->memDiscard.time, GetTime() ); + MemWrite( &item->memDiscard.thread, thread ); + MemWrite( &item->memDiscard.name, (uint64_t)name ); + GetProfiler().m_serialQueue.commit_next(); + } + static tracy_force_inline void SendMemName( const char* name ) { assert( name ); @@ -983,13 +1059,24 @@ class Profiler char* m_queryData; char* m_queryDataPtr; +#ifndef NDEBUG + // m_safeSendBuffer and m_pipe should only be used by the Tracy Profiler thread; this ensures that in debug builds. + std::atomic_bool m_inUse{ false }; +#endif + char* m_safeSendBuffer; + #if defined _WIN32 - void* m_exceptionHandler; + void* m_prevHandler; +#else + int m_pipe[2]; + int m_pipeBufSize; #endif + #ifdef __linux__ struct { struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt; } m_prevSignal; + KCore* m_kcore; #endif bool m_crashHandlerInstalled; diff --git a/external/sources/tracy/public/client/TracyScoped.hpp b/external/sources/tracy/public/client/TracyScoped.hpp index d2274e40b0..7f9256d8c3 100644 --- a/external/sources/tracy/public/client/TracyScoped.hpp +++ b/external/sources/tracy/public/client/TracyScoped.hpp @@ -2,6 +2,7 @@ #define __TRACYSCOPED_HPP__ #include +#include #include #include @@ -9,6 +10,7 @@ #include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" #include "TracyProfiler.hpp" +#include "TracyCallstack.hpp" namespace tracy { @@ -21,7 +23,7 @@ class ScopedZone ScopedZone& operator=( const ScopedZone& ) = delete; ScopedZone& operator=( ScopedZone&& ) = delete; - tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true ) + tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int32_t depth = -1, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -32,13 +34,19 @@ class ScopedZone #ifdef TRACY_ON_DEMAND m_connectionId = GetProfiler().ConnectionId(); #endif - TracyQueuePrepare( QueueType::ZoneBegin ); + auto zoneQueue = QueueType::ZoneBegin; + if( depth > 0 && has_callstack() ) + { + GetProfiler().SendCallstack( depth ); + zoneQueue = QueueType::ZoneBeginCallstack; + } + TracyQueuePrepare( zoneQueue ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); TracyQueueCommit( zoneBeginThread ); } - tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true ) + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int32_t depth = -1, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -49,51 +57,21 @@ class ScopedZone #ifdef TRACY_ON_DEMAND m_connectionId = GetProfiler().ConnectionId(); #endif - GetProfiler().SendCallstack( depth ); - - TracyQueuePrepare( QueueType::ZoneBeginCallstack ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommit( zoneBeginThread ); - } - - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - m_connectionId = GetProfiler().ConnectionId(); -#endif - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + auto zoneQueue = QueueType::ZoneBeginAllocSrcLoc; + if( depth > 0 && has_callstack() ) + { + GetProfiler().SendCallstack( depth ); + zoneQueue = QueueType::ZoneBeginAllocSrcLocCallstack; + } + TracyQueuePrepare( zoneQueue ); + const auto srcloc = + Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, srcloc ); TracyQueueCommit( zoneBeginThread ); } - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - m_connectionId = GetProfiler().ConnectionId(); -#endif - GetProfiler().SendCallstack( depth ); - - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommit( zoneBeginThread ); - } + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {} tracy_force_inline ~ScopedZone() { @@ -121,6 +99,30 @@ class ScopedZone TracyQueueCommit( zoneTextFatThread ); } + void TextFmt( const char* fmt, ... ) + { + if( !m_active ) return; +#ifdef TRACY_ON_DEMAND + if( GetProfiler().ConnectionId() != m_connectionId ) return; +#endif + va_list args; + va_start( args, fmt ); + auto size = vsnprintf( nullptr, 0, fmt, args ); + va_end( args ); + if( size < 0 ) return; + assert( size < (std::numeric_limits::max)() ); + + char* ptr = (char*)tracy_malloc( size_t( size ) + 1 ); + va_start( args, fmt ); + vsnprintf( ptr, size_t( size ) + 1, fmt, args ); + va_end( args ); + + TracyQueuePrepare( QueueType::ZoneText ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommit( zoneTextFatThread ); + } + tracy_force_inline void Name( const char* txt, size_t size ) { assert( size < (std::numeric_limits::max)() ); @@ -136,6 +138,30 @@ class ScopedZone TracyQueueCommit( zoneTextFatThread ); } + void NameFmt( const char* fmt, ... ) + { + if( !m_active ) return; +#ifdef TRACY_ON_DEMAND + if( GetProfiler().ConnectionId() != m_connectionId ) return; +#endif + va_list args; + va_start( args, fmt ); + auto size = vsnprintf( nullptr, 0, fmt, args ); + va_end( args ); + if( size < 0 ) return; + assert( size < (std::numeric_limits::max)() ); + + char* ptr = (char*)tracy_malloc( size_t( size ) + 1 ); + va_start( args, fmt ); + vsnprintf( ptr, size_t( size ) + 1, fmt, args ); + va_end( args ); + + TracyQueuePrepare( QueueType::ZoneName ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommit( zoneTextFatThread ); + } + tracy_force_inline void Color( uint32_t color ) { if( !m_active ) return; diff --git a/external/sources/tracy/public/client/TracySysPower.cpp b/external/sources/tracy/public/client/TracySysPower.cpp index bd5939da2b..6ad1d64783 100644 --- a/external/sources/tracy/public/client/TracySysPower.cpp +++ b/external/sources/tracy/public/client/TracySysPower.cpp @@ -85,7 +85,7 @@ void SysPower::ScanDirectory( const char* path, int parent ) FILE* f = fopen( tmp, "r" ); if( f ) { - fscanf( f, "%" PRIu64, &maxRange ); + (void)fscanf( f, "%" PRIu64, &maxRange ); fclose( f ); } } diff --git a/external/sources/tracy/public/client/TracySysTrace.cpp b/external/sources/tracy/public/client/TracySysTrace.cpp index af0641fef1..8e7f6139b6 100644 --- a/external/sources/tracy/public/client/TracySysTrace.cpp +++ b/external/sources/tracy/public/client/TracySysTrace.cpp @@ -16,16 +16,25 @@ namespace tracy { -static constexpr int GetSamplingFrequency() +static int GetSamplingFrequency() { + int samplingHz = TRACY_SAMPLING_HZ; + + auto env = GetEnvVar( "TRACY_SAMPLING_HZ" ); + if( env ) + { + int val = atoi( env ); + if( val > 0 ) samplingHz = val; + } + #if defined _WIN32 - return TRACY_SAMPLING_HZ > 8000 ? 8000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ ); + return samplingHz > 8000 ? 8000 : ( samplingHz < 1 ? 1 : samplingHz ); #else - return TRACY_SAMPLING_HZ > 1000000 ? 1000000 : ( TRACY_SAMPLING_HZ < 1 ? 1 : TRACY_SAMPLING_HZ ); + return samplingHz > 1000000 ? 1000000 : ( samplingHz < 1 ? 1 : samplingHz ); #endif } -static constexpr int GetSamplingPeriod() +static int GetSamplingPeriod() { return 1000000000 / GetSamplingFrequency(); } @@ -164,8 +173,11 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record ) MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId ); MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId ); MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber ); - MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason ); - MemWrite( &item->contextSwitch.state, cswitch->oldThreadState ); + MemWrite( &item->contextSwitch.oldThreadWaitReason, cswitch->oldThreadWaitReason ); + MemWrite( &item->contextSwitch.oldThreadState, cswitch->oldThreadState ); + MemWrite( &item->contextSwitch.newThreadPriority, cswitch->newThreadPriority ); + MemWrite( &item->contextSwitch.oldThreadPriority, cswitch->oldThreadPriority ); + MemWrite( &item->contextSwitch.previousCState, cswitch->previousCState ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == 50 ) @@ -174,7 +186,10 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record ) TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart ); + MemWrite( &item->threadWakeup.cpu, record->BufferContext.ProcessorNumber ); MemWrite( &item->threadWakeup.thread, rt->threadId ); + MemWrite( &item->threadWakeup.adjustReason, rt->adjustReason ); + MemWrite( &item->threadWakeup.adjustIncrement, rt->adjustIncrement ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 ) @@ -321,7 +336,7 @@ static void SetupVsync() #endif } -static constexpr int GetSamplingInterval() +static int GetSamplingInterval() { return GetSamplingPeriod() / 100; } @@ -489,11 +504,11 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch if( _GetThreadDescription ) { PWSTR tmp; - _GetThreadDescription( hnd, &tmp ); - char buf[256]; - if( tmp ) + if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) ) { + char buf[256]; auto ret = wcstombs( buf, tmp, 256 ); + LocalFree(tmp); if( ret != 0 ) { threadName = CopyString( buf, ret ); @@ -669,7 +684,7 @@ enum TraceEventId EventBranchMiss, EventVsync, EventContextSwitch, - EventWakeup, + EventWaking, }; static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid ) @@ -758,16 +773,16 @@ bool SysTraceStart( int64_t& samplingPeriod ) TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel ); #endif - int switchId = -1, wakeupId = -1, vsyncId = -1; + int switchId = -1, wakingId = -1, vsyncId = -1; const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" ); if( switchIdStr ) switchId = atoi( switchIdStr ); - const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" ); - if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr ); + const auto wakingIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_waking/id" ); + if( wakingIdStr ) wakingId = atoi( wakingIdStr ); const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" ); if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr ); TracyDebug( "sched_switch id: %i\n", switchId ); - TracyDebug( "sched_wakeup id: %i\n", wakeupId ); + TracyDebug( "sched_waking id: %i\n", wakingId ); TracyDebug( "drm_vblank_event id: %i\n", vsyncId ); #ifdef TRACY_NO_SAMPLING @@ -822,7 +837,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) 2 + // CPU cycles + instructions retired 2 + // cache reference + miss 2 + // branch retired + miss - 2 + // context switches + wakeups + 2 + // context switches + waking ups 1 // vsync ); s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers ); @@ -1067,18 +1082,31 @@ bool SysTraceStart( int64_t& samplingPeriod ) } } - if( wakeupId != -1 ) + if( wakingId != -1 ) { - pe.config = wakeupId; - pe.config &= ~PERF_SAMPLE_CALLCHAIN; + pe = {}; + pe.type = PERF_TYPE_TRACEPOINT; + pe.size = sizeof( perf_event_attr ); + pe.sample_period = 1; + pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW; + // Coult ask for callstack here + //pe.sample_type |= PERF_SAMPLE_CALLCHAIN; + pe.disabled = 1; + pe.inherit = 1; + pe.config = wakingId; + pe.read_format = 0; +#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) + pe.use_clockid = 1; + pe.clockid = CLOCK_MONOTONIC_RAW; +#endif - TracyDebug( "Setup wakeup capture\n" ); + TracyDebug( "Setup waking up capture\n" ); for( int i=0; i 0 ) { + // Find the earliest event from the active buffers int sel = -1; int selPos; int64_t t0 = std::numeric_limits::max(); @@ -1360,6 +1389,7 @@ void SysTraceWorker( void* ptr ) } } } + // Found any event if( sel >= 0 ) { auto& ring = ringArray[ctxBufferIdx + sel]; @@ -1375,10 +1405,10 @@ void SysTraceWorker( void* ptr ) const auto rid = ring.GetId(); if( rid == EventContextSwitch ) { - // Layout: - // u64 time - // u64 cnt - // u64 ip[cnt] + // Layout: See /sys/kernel/debug/tracing/events/sched/sched_switch/format + // u64 time // PERF_SAMPLE_TIME + // u64 cnt // PERF_SAMPLE_CALLCHAIN + // u64 ip[cnt] // PERF_SAMPLE_CALLCHAIN // u32 size // u8 data[size] // Data (not ABI stable, but has not changed since it was added, in 2009): @@ -1399,35 +1429,43 @@ void SysTraceWorker( void* ptr ) const auto traceOffset = offset; offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16; - uint32_t prev_pid, next_pid; + uint32_t prev_pid, prev_prio; + uint32_t next_pid, next_prio; long prev_state; ring.Read( &prev_pid, offset, sizeof( uint32_t ) ); - offset += sizeof( uint32_t ) + sizeof( uint32_t ); + offset += sizeof( uint32_t ); + ring.Read( &prev_prio, offset, sizeof( uint32_t ) ); + offset += sizeof( uint32_t ); ring.Read( &prev_state, offset, sizeof( long ) ); offset += sizeof( long ) + 16; ring.Read( &next_pid, offset, sizeof( uint32_t ) ); - - uint8_t reason = 100; - uint8_t state; - - if( prev_state & 0x0001 ) state = 104; - else if( prev_state & 0x0002 ) state = 101; - else if( prev_state & 0x0004 ) state = 105; - else if( prev_state & 0x0008 ) state = 106; - else if( prev_state & 0x0010 ) state = 108; - else if( prev_state & 0x0020 ) state = 109; - else if( prev_state & 0x0040 ) state = 110; - else if( prev_state & 0x0080 ) state = 102; - else state = 103; + offset += sizeof( uint32_t ); + ring.Read( &next_prio, offset, sizeof( uint32_t ) ); + + uint8_t oldThreadWaitReason = 100; + uint8_t oldThreadState; + + if( prev_state & 0x0001 ) oldThreadState = 104; + else if( prev_state & 0x0002 ) oldThreadState = 101; + else if( prev_state & 0x0004 ) oldThreadState = 105; + else if( prev_state & 0x0008 ) oldThreadState = 106; + else if( prev_state & 0x0010 ) oldThreadState = 108; + else if( prev_state & 0x0020 ) oldThreadState = 109; + else if( prev_state & 0x0040 ) oldThreadState = 110; + else if( prev_state & 0x0080 ) oldThreadState = 102; + else oldThreadState = 103; TracyLfqPrepare( QueueType::ContextSwitch ); MemWrite( &item->contextSwitch.time, t0 ); MemWrite( &item->contextSwitch.oldThread, prev_pid ); MemWrite( &item->contextSwitch.newThread, next_pid ); MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) ); - MemWrite( &item->contextSwitch.reason, reason ); - MemWrite( &item->contextSwitch.state, state ); + MemWrite( &item->contextSwitch.oldThreadWaitReason, oldThreadWaitReason ); + MemWrite( &item->contextSwitch.oldThreadState, oldThreadState ); + MemWrite( &item->contextSwitch.previousCState, uint8_t( 0 ) ); + MemWrite( &item->contextSwitch.newThreadPriority, int8_t( next_prio ) ); + MemWrite( &item->contextSwitch.oldThreadPriority, int8_t( prev_prio ) ); TracyLfqCommit; if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) ) @@ -1441,27 +1479,33 @@ void SysTraceWorker( void* ptr ) TracyLfqCommit; } } - else if( rid == EventWakeup ) + else if( rid == EventWaking) { + // See /sys/kernel/debug/tracing/events/sched/sched_waking/format // Layout: - // u64 time + // u64 time // PERF_SAMPLE_TIME // u32 size // u8 data[size] // Data: // u8 hdr[8] // u8 comm[16] // u32 pid - // u32 prio - // u64 target_cpu - - offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16; - + // i32 prio + // i32 target_cpu + const uint32_t dataOffset = sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ); + offset += dataOffset + 8 + 16; uint32_t pid; ring.Read( &pid, offset, sizeof( uint32_t ) ); - + TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, t0 ); MemWrite( &item->threadWakeup.thread, pid ); + MemWrite( &item->threadWakeup.cpu, (uint8_t)ring.GetCpu() ); + + int8_t adjustReason = -1; // Does not exist on Linux + int8_t adjustIncrement = 0; // Should perhaps store the new prio? + MemWrite( &item->threadWakeup.adjustReason, adjustReason ); + MemWrite( &item->threadWakeup.adjustIncrement, adjustIncrement ); TracyLfqCommit; } else diff --git a/external/sources/tracy/public/client/tracy_rpmalloc.cpp b/external/sources/tracy/public/client/tracy_rpmalloc.cpp index 711505d21a..315a40f96e 100644 --- a/external/sources/tracy/public/client/tracy_rpmalloc.cpp +++ b/external/sources/tracy/public/client/tracy_rpmalloc.cpp @@ -690,7 +690,9 @@ static pthread_key_t _memory_thread_heap; # define _Thread_local __declspec(thread) # define TLS_MODEL # else -# ifndef __HAIKU__ +# if defined(__ANDROID__) && __ANDROID_API__ >= 29 && defined(__NDK_MAJOR__) && __NDK_MAJOR__ >= 26 +# define TLS_MODEL __attribute__((tls_model("local-dynamic"))) +# elif !defined(__HAIKU__) # define TLS_MODEL __attribute__((tls_model("initial-exec"))) # else # define TLS_MODEL @@ -781,7 +783,7 @@ rpmalloc_set_main_thread(void) { static void _rpmalloc_spin(void) { -#if defined(_MSC_VER) +#if defined(_MSC_VER) && !(defined(_M_ARM) || defined(_M_ARM64)) _mm_pause(); #elif defined(__x86_64__) || defined(__i386__) __asm__ volatile("pause" ::: "memory"); @@ -793,8 +795,7 @@ _rpmalloc_spin(void) { #elif defined(__sparc__) __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0"); #else - struct timespec ts = {0}; - nanosleep(&ts, 0); + std::this_thread::yield(); #endif } diff --git a/external/sources/tracy/public/common/TracyProtocol.hpp b/external/sources/tracy/public/common/TracyProtocol.hpp index 5eb1639db3..40cf5e6730 100644 --- a/external/sources/tracy/public/common/TracyProtocol.hpp +++ b/external/sources/tracy/public/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 64 }; +enum : uint32_t { ProtocolVersion = 74 }; enum : uint16_t { BroadcastVersion = 3 }; using lz4sz_t = uint32_t; @@ -47,10 +47,10 @@ enum ServerQuery : uint8_t ServerQueryFrameName, ServerQueryParameter, ServerQueryFiberName, + ServerQueryExternalName, // Items above are high priority. Split order must be preserved. See IsQueryPrio(). ServerQueryDisconnect, ServerQueryCallstackFrame, - ServerQueryExternalName, ServerQuerySymbol, ServerQuerySymbolCode, ServerQuerySourceCode, diff --git a/external/sources/tracy/public/common/TracyQueue.hpp b/external/sources/tracy/public/common/TracyQueue.hpp index 051d412abf..daef3ec1b0 100644 --- a/external/sources/tracy/public/common/TracyQueue.hpp +++ b/external/sources/tracy/public/common/TracyQueue.hpp @@ -42,6 +42,8 @@ enum class QueueType : uint8_t MemAllocCallstackNamed, MemFreeCallstack, MemFreeCallstackNamed, + MemDiscard, + MemDiscardCallstack, GpuZoneBegin, GpuZoneBeginCallstack, GpuZoneBeginAllocSrcLoc, @@ -70,6 +72,7 @@ enum class QueueType : uint8_t KeepAlive, ThreadContext, GpuCalibration, + GpuTimeSync, Crash, CrashReport, ZoneValidation, @@ -107,6 +110,7 @@ enum class QueueType : uint8_t SingleStringData, SecondStringData, MemNamePayload, + ThreadGroupHint, StringData, ThreadName, PlotName, @@ -258,6 +262,7 @@ struct QueueFiberEnter int64_t time; uint64_t fiber; // ptr uint32_t thread; + int32_t groupHint; }; struct QueueFiberLeave @@ -398,7 +403,10 @@ enum class GpuContextType : uint8_t Vulkan, OpenCL, Direct3D12, - Direct3D11 + Direct3D11, + Metal, + Custom, + CUDA }; enum GpuContextFlags : uint8_t @@ -453,6 +461,13 @@ struct QueueGpuCalibration uint8_t context; }; +struct QueueGpuTimeSync +{ + int64_t gpuTime; + int64_t cpuTime; + uint8_t context; +}; + struct QueueGpuContextName { uint8_t context; @@ -469,6 +484,12 @@ struct QueueMemNamePayload uint64_t name; }; +struct QueueThreadGroupHint +{ + uint32_t thread; + int32_t groupHint; +}; + struct QueueMemAlloc { int64_t time; @@ -484,6 +505,13 @@ struct QueueMemFree uint64_t ptr; }; +struct QueueMemDiscard +{ + int64_t time; + uint32_t thread; + uint64_t name; +}; + struct QueueCallstackFat { uint64_t ptr; @@ -577,14 +605,20 @@ struct QueueContextSwitch uint32_t oldThread; uint32_t newThread; uint8_t cpu; - uint8_t reason; - uint8_t state; + uint8_t oldThreadWaitReason; + uint8_t oldThreadState; + uint8_t previousCState; + int8_t newThreadPriority; + int8_t oldThreadPriority; }; struct QueueThreadWakeup { int64_t time; uint32_t thread; + uint8_t cpu; + int8_t adjustReason; + int8_t adjustIncrement; }; struct QueueTidToPid @@ -631,6 +665,7 @@ struct QueueSourceCodeNotAvailable struct QueueCpuTopology { uint32_t package; + uint32_t die; uint32_t core; uint32_t thread; }; @@ -718,11 +753,14 @@ struct QueueItem QueueGpuZoneEnd gpuZoneEnd; QueueGpuTime gpuTime; QueueGpuCalibration gpuCalibration; + QueueGpuTimeSync gpuTimeSync; QueueGpuContextName gpuContextName; QueueGpuContextNameFat gpuContextNameFat; QueueMemAlloc memAlloc; QueueMemFree memFree; + QueueMemDiscard memDiscard; QueueMemNamePayload memName; + QueueThreadGroupHint threadGroupHint; QueueCallstackFat callstackFat; QueueCallstackFatThread callstackFatThread; QueueCallstackAllocFat callstackAllocFat; @@ -792,6 +830,8 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named + sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), + sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location @@ -821,6 +861,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ) + sizeof( QueueThreadContext ), sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), + sizeof( QueueHeader ) + sizeof( QueueGpuTimeSync ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), @@ -858,6 +899,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // single string data sizeof( QueueHeader ), // second string data sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ), + sizeof( QueueHeader ) + sizeof( QueueThreadGroupHint ), // keep all QueueStringTransfer below sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name diff --git a/external/sources/tracy/public/common/TracySocket.cpp b/external/sources/tracy/public/common/TracySocket.cpp index 259678989e..bdba361965 100644 --- a/external/sources/tracy/public/common/TracySocket.cpp +++ b/external/sources/tracy/public/common/TracySocket.cpp @@ -21,6 +21,9 @@ # pragma warning(disable:4267) # endif # define poll WSAPoll +# ifdef _MSC_VER +# pragma comment(lib, "ws2_32.lib") +# endif #else # include # include diff --git a/external/sources/tracy/public/common/TracySystem.cpp b/external/sources/tracy/public/common/TracySystem.cpp index 9a477aa310..a92a34578a 100644 --- a/external/sources/tracy/public/common/TracySystem.cpp +++ b/external/sources/tracy/public/common/TracySystem.cpp @@ -26,8 +26,13 @@ # include #elif defined __FreeBSD__ # include -#elif defined __NetBSD__ || defined __DragonFly__ +#elif defined __NetBSD__ +# include +#elif defined __DragonFly__ # include +#elif defined __QNX__ +# include +# include #endif #ifdef __MINGW32__ @@ -78,6 +83,8 @@ TRACY_API uint32_t GetThreadHandleImpl() return lwp_gettid(); #elif defined __OpenBSD__ return getthrid(); +#elif defined __QNX__ + return (uint32_t) gettid(); #elif defined __EMSCRIPTEN__ // Not supported, but let it compile. return 0; @@ -96,16 +103,10 @@ TRACY_API uint32_t GetThreadHandleImpl() } #ifdef TRACY_ENABLE -struct ThreadNameData -{ - uint32_t id; - const char* name; - ThreadNameData* next; -}; std::atomic& GetThreadNameData(); #endif -#ifdef _MSC_VER +#if defined _MSC_VER && !defined __clang__ # pragma pack( push, 8 ) struct THREADNAME_INFO { @@ -129,6 +130,11 @@ void ThreadNameMsvcMagic( const THREADNAME_INFO& info ) #endif TRACY_API void SetThreadName( const char* name ) +{ + SetThreadNameWithHint( name, 0 ); +} + +TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ) { #if defined _WIN32 # ifdef TRACY_UWP @@ -144,7 +150,7 @@ TRACY_API void SetThreadName( const char* name ) } else { -# if defined _MSC_VER +# if defined _MSC_VER && !defined __clang__ THREADNAME_INFO info; info.dwType = 0x1000; info.szName = name; @@ -176,6 +182,21 @@ TRACY_API void SetThreadName( const char* name ) #endif } } +#elif defined __QNX__ + { + const auto sz = strlen( name ); + if( sz <= _NTO_THREAD_NAME_MAX ) + { + pthread_setname_np( pthread_self(), name ); + } + else + { + char buf[_NTO_THREAD_NAME_MAX + 1]; + memcpy( buf, name, _NTO_THREAD_NAME_MAX ); + buf[_NTO_THREAD_NAME_MAX] = '\0'; + pthread_setname_np( pthread_self(), buf ); + } + }; #endif #ifdef TRACY_ENABLE { @@ -185,6 +206,7 @@ TRACY_API void SetThreadName( const char* name ) buf[sz] = '\0'; auto data = (ThreadNameData*)tracy_malloc_fast( sizeof( ThreadNameData ) ); data->id = detail::GetThreadHandleImpl(); + data->groupHint = groupHint; data->name = buf; data->next = GetThreadNameData().load( std::memory_order_relaxed ); while( !GetThreadNameData().compare_exchange_weak( data->next, data, std::memory_order_release, std::memory_order_relaxed ) ) {} @@ -192,6 +214,22 @@ TRACY_API void SetThreadName( const char* name ) #endif } +#ifdef TRACY_ENABLE +ThreadNameData* GetThreadNameData( uint32_t id ) +{ + auto ptr = GetThreadNameData().load( std::memory_order_relaxed ); + while( ptr ) + { + if( ptr->id == id ) + { + return ptr; + } + ptr = ptr->next; + } + return nullptr; +} +#endif + TRACY_API const char* GetThreadName( uint32_t id ) { static char buf[256]; @@ -255,6 +293,11 @@ TRACY_API const char* GetThreadName( uint32_t id ) pthread_setcancelstate( cs, 0 ); # endif return buf; +#elif defined __QNX__ + static char qnxNameBuf[_NTO_THREAD_NAME_MAX + 1] = {0}; + if (pthread_getname_np(static_cast(id), qnxNameBuf, _NTO_THREAD_NAME_MAX) == 0) { + return qnxNameBuf; + }; #endif sprintf( buf, "%" PRIu32, id ); diff --git a/external/sources/tracy/public/common/TracySystem.hpp b/external/sources/tracy/public/common/TracySystem.hpp index e0040e95c6..2f565e9a26 100644 --- a/external/sources/tracy/public/common/TracySystem.hpp +++ b/external/sources/tracy/public/common/TracySystem.hpp @@ -14,6 +14,16 @@ TRACY_API uint32_t GetThreadHandleImpl(); } #ifdef TRACY_ENABLE +struct ThreadNameData +{ + uint32_t id; + int32_t groupHint; + const char* name; + ThreadNameData* next; +}; + +ThreadNameData* GetThreadNameData( uint32_t id ); + TRACY_API uint32_t GetThreadHandle(); #else static inline uint32_t GetThreadHandle() @@ -23,9 +33,10 @@ static inline uint32_t GetThreadHandle() #endif TRACY_API void SetThreadName( const char* name ); +TRACY_API void SetThreadNameWithHint( const char* name, int32_t groupHint ); TRACY_API const char* GetThreadName( uint32_t id ); -TRACY_API const char* GetEnvVar(const char* name); +TRACY_API const char* GetEnvVar( const char* name ); } diff --git a/external/sources/tracy/public/common/TracyVersion.hpp b/external/sources/tracy/public/common/TracyVersion.hpp index 2355279f72..93b6737eda 100644 --- a/external/sources/tracy/public/common/TracyVersion.hpp +++ b/external/sources/tracy/public/common/TracyVersion.hpp @@ -6,8 +6,8 @@ namespace tracy namespace Version { enum { Major = 0 }; -enum { Minor = 10 }; -enum { Patch = 0 }; +enum { Minor = 12 }; +enum { Patch = 2 }; } } diff --git a/external/sources/tracy/public/common/tracy_lz4.cpp b/external/sources/tracy/public/common/tracy_lz4.cpp index 6c26639c57..15d0990f82 100644 --- a/external/sources/tracy/public/common/tracy_lz4.cpp +++ b/external/sources/tracy/public/common/tracy_lz4.cpp @@ -128,11 +128,11 @@ #endif /* _MSC_VER */ #ifndef LZ4_FORCE_INLINE -# ifdef _MSC_VER /* Visual Studio */ +# if defined (_MSC_VER) && !defined (__clang__) /* MSVC */ # define LZ4_FORCE_INLINE static __forceinline # else # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ +# if defined (__GNUC__) || defined (__clang__) # define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) # else # define LZ4_FORCE_INLINE static inline diff --git a/external/sources/tracy/public/libbacktrace/dwarf.cpp b/external/sources/tracy/public/libbacktrace/dwarf.cpp index f3899cbce1..52fa8a8d22 100644 --- a/external/sources/tracy/public/libbacktrace/dwarf.cpp +++ b/external/sources/tracy/public/libbacktrace/dwarf.cpp @@ -725,8 +725,8 @@ struct dwarf_data struct dwarf_data *next; /* The data for .gnu_debugaltlink. */ struct dwarf_data *altlink; - /* The base address for this file. */ - uintptr_t base_address; +/* The base address mapping for this file. */ + struct libbacktrace_base_address base_address; /* A sorted list of address ranges. */ struct unit_addrs *addrs; /* Number of address ranges in list. */ @@ -1947,8 +1947,9 @@ update_pcrange (const struct attr* attr, const struct attr_val* val, static int add_low_high_range (struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, - uintptr_t base_address, int is_bigendian, - struct unit *u, const struct pcrange *pcrange, + struct libbacktrace_base_address base_address, + int is_bigendian, struct unit *u, + const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, uintptr_t lowpc, uintptr_t highpc, @@ -1983,8 +1984,8 @@ add_low_high_range (struct backtrace_state *state, /* Add in the base address of the module when recording PC values, so that we can look up the PC directly. */ - lowpc += base_address; - highpc += base_address; + lowpc = libbacktrace_add_base (lowpc, base_address); + highpc = libbacktrace_add_base (highpc, base_address); return add_range (state, rdata, lowpc, highpc, error_callback, data, vec); } @@ -1996,7 +1997,7 @@ static int add_ranges_from_ranges ( struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, - uintptr_t base_address, int is_bigendian, + struct libbacktrace_base_address base_address, int is_bigendian, struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, @@ -2042,10 +2043,11 @@ add_ranges_from_ranges ( base = (uintptr_t) high; else { - if (!add_range (state, rdata, - (uintptr_t) low + base + base_address, - (uintptr_t) high + base + base_address, - error_callback, data, vec)) + uintptr_t rl, rh; + + rl = libbacktrace_add_base ((uintptr_t) low + base, base_address); + rh = libbacktrace_add_base ((uintptr_t) high + base, base_address); + if (!add_range (state, rdata, rl, rh, error_callback, data, vec)) return 0; } } @@ -2063,7 +2065,7 @@ static int add_ranges_from_rnglists ( struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, - uintptr_t base_address, int is_bigendian, + struct libbacktrace_base_address base_address, int is_bigendian, struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, @@ -2146,9 +2148,10 @@ add_ranges_from_rnglists ( u->addrsize, is_bigendian, index, error_callback, data, &high)) return 0; - if (!add_range (state, rdata, low + base_address, - high + base_address, error_callback, data, - vec)) + if (!add_range (state, rdata, + libbacktrace_add_base (low, base_address), + libbacktrace_add_base (high, base_address), + error_callback, data, vec)) return 0; } break; @@ -2165,7 +2168,7 @@ add_ranges_from_rnglists ( error_callback, data, &low)) return 0; length = read_uleb128 (&rnglists_buf); - low += base_address; + low = libbacktrace_add_base (low, base_address); if (!add_range (state, rdata, low, low + length, error_callback, data, vec)) return 0; @@ -2179,8 +2182,9 @@ add_ranges_from_rnglists ( low = read_uleb128 (&rnglists_buf); high = read_uleb128 (&rnglists_buf); - if (!add_range (state, rdata, low + base + base_address, - high + base + base_address, + if (!add_range (state, rdata, + libbacktrace_add_base (low + base, base_address), + libbacktrace_add_base (high + base, base_address), error_callback, data, vec)) return 0; } @@ -2197,9 +2201,10 @@ add_ranges_from_rnglists ( low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); high = (uintptr_t) read_address (&rnglists_buf, u->addrsize); - if (!add_range (state, rdata, low + base_address, - high + base_address, error_callback, data, - vec)) + if (!add_range (state, rdata, + libbacktrace_add_base (low, base_address), + libbacktrace_add_base (high, base_address), + error_callback, data, vec)) return 0; } break; @@ -2211,7 +2216,7 @@ add_ranges_from_rnglists ( low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); length = (uintptr_t) read_uleb128 (&rnglists_buf); - low += base_address; + low = libbacktrace_add_base (low, base_address); if (!add_range (state, rdata, low, low + length, error_callback, data, vec)) return 0; @@ -2239,7 +2244,7 @@ add_ranges_from_rnglists ( static int add_ranges (struct backtrace_state *state, const struct dwarf_sections *dwarf_sections, - uintptr_t base_address, int is_bigendian, + struct libbacktrace_base_address base_address, int is_bigendian, struct unit *u, uintptr_t base, const struct pcrange *pcrange, int (*add_range) (struct backtrace_state *state, void *rdata, uintptr_t lowpc, uintptr_t highpc, @@ -2275,7 +2280,8 @@ add_ranges (struct backtrace_state *state, read, 0 if there is some error. */ static int -find_address_ranges (struct backtrace_state *state, uintptr_t base_address, +find_address_ranges (struct backtrace_state *state, + struct libbacktrace_base_address base_address, struct dwarf_buf *unit_buf, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *altlink, @@ -2430,7 +2436,8 @@ find_address_ranges (struct backtrace_state *state, uintptr_t base_address, on success, 0 on failure. */ static int -build_address_map (struct backtrace_state *state, uintptr_t base_address, +build_address_map (struct backtrace_state *state, + struct libbacktrace_base_address base_address, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *altlink, backtrace_error_callback error_callback, void *data, @@ -2649,7 +2656,7 @@ add_line (struct backtrace_state *state, struct dwarf_data *ddata, /* Add in the base address here, so that we can look up the PC directly. */ - ln->pc = pc + ddata->base_address; + ln->pc = libbacktrace_add_base (pc, ddata->base_address); ln->filename = filename; ln->lineno = lineno; @@ -4251,6 +4258,19 @@ dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata, } } +bool dwarf_fileline_dwarf_lookup_pc_in_all_entries(struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, backtrace_error_callback error_callback, void *data, + int& found, int ret) +{ + for (struct dwarf_data* ddata = (struct dwarf_data *)state->fileline_data; + ddata != NULL; + ddata = ddata->next) + { + ret = dwarf_lookup_pc(state, ddata, pc, callback, error_callback, data, &found); + if (ret != 0 || found) return true; + } + return false; +} /* Return the file/line information for a PC using the DWARF mapping we built earlier. */ @@ -4262,20 +4282,30 @@ dwarf_fileline (struct backtrace_state *state, uintptr_t pc, { struct dwarf_data *ddata; int found; - int ret; + int ret = 0; if (!state->threaded) + { + if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) { - for (ddata = (struct dwarf_data *) state->fileline_data; - ddata != NULL; - ddata = ddata->next) - { - ret = dwarf_lookup_pc (state, ddata, pc, callback, error_callback, - data, &found); - if (ret != 0 || found) - return ret; - } + return ret; } + + // if we failed to obtain an entry in range, it can mean that the address map has been changed and new entries + // have been loaded in the meantime. Request a refresh and try again. + if (state->request_known_address_ranges_refresh_fn) + { + int new_range_count = state->request_known_address_ranges_refresh_fn(state, pc); + if (new_range_count > 0) + { + if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) + { + return ret; + } + } + } + + } else { struct dwarf_data **pp; @@ -4306,7 +4336,7 @@ dwarf_fileline (struct backtrace_state *state, uintptr_t pc, static struct dwarf_data * build_dwarf_data (struct backtrace_state *state, - uintptr_t base_address, + struct libbacktrace_base_address base_address, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *altlink, @@ -4364,7 +4394,7 @@ build_dwarf_data (struct backtrace_state *state, int backtrace_dwarf_add (struct backtrace_state *state, - uintptr_t base_address, + struct libbacktrace_base_address base_address, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *fileline_altlink, diff --git a/external/sources/tracy/public/libbacktrace/elf.cpp b/external/sources/tracy/public/libbacktrace/elf.cpp index c65bc4e768..ffe8d7024b 100644 --- a/external/sources/tracy/public/libbacktrace/elf.cpp +++ b/external/sources/tracy/public/libbacktrace/elf.cpp @@ -38,6 +38,7 @@ POSSIBILITY OF SUCH DAMAGE. */ #include #include #include +#include #ifdef HAVE_DL_ITERATE_PHDR #include @@ -642,7 +643,7 @@ elf_symbol_search (const void *vkey, const void *ventry) static int elf_initialize_syminfo (struct backtrace_state *state, - uintptr_t base_address, + struct libbacktrace_base_address base_address, const unsigned char *symtab_data, size_t symtab_size, const unsigned char *strtab, size_t strtab_size, backtrace_error_callback error_callback, @@ -708,7 +709,8 @@ elf_initialize_syminfo (struct backtrace_state *state, = *(const b_elf_addr *) (opd->data + (sym->st_value - opd->addr)); else elf_symbols[j].address = sym->st_value; - elf_symbols[j].address += base_address; + elf_symbols[j].address = + libbacktrace_add_base (elf_symbols[j].address, base_address); elf_symbols[j].size = sym->st_size; ++j; } @@ -1199,14 +1201,7 @@ elf_fetch_bits_backward (const unsigned char **ppin, val = *pval; if (unlikely (pin <= pinend)) - { - if (bits == 0) - { - elf_uncompress_failed (); - return 0; - } - return 1; - } + return 1; pin -= 4; @@ -5093,7 +5088,7 @@ elf_uncompress_chdr (struct backtrace_state *state, backtrace_error_callback error_callback, void *data, unsigned char **uncompressed, size_t *uncompressed_size) { - const b_elf_chdr *chdr; + b_elf_chdr chdr; char *alc; size_t alc_len; unsigned char *po; @@ -5105,27 +5100,30 @@ elf_uncompress_chdr (struct backtrace_state *state, if (compressed_size < sizeof (b_elf_chdr)) return 1; - chdr = (const b_elf_chdr *) compressed; + /* The lld linker can misalign a compressed section, so we can't safely read + the fields directly as we can for other ELF sections. See + https://github.com/ianlancetaylor/libbacktrace/pull/120. */ + memcpy (&chdr, compressed, sizeof (b_elf_chdr)); alc = NULL; alc_len = 0; - if (*uncompressed != NULL && *uncompressed_size >= chdr->ch_size) + if (*uncompressed != NULL && *uncompressed_size >= chdr.ch_size) po = *uncompressed; else { - alc_len = chdr->ch_size; + alc_len = chdr.ch_size; alc = (char*)backtrace_alloc (state, alc_len, error_callback, data); if (alc == NULL) return 0; po = (unsigned char *) alc; } - switch (chdr->ch_type) + switch (chdr.ch_type) { case ELFCOMPRESS_ZLIB: if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), compressed_size - sizeof (b_elf_chdr), - zdebug_table, po, chdr->ch_size)) + zdebug_table, po, chdr.ch_size)) goto skip; break; @@ -5133,7 +5131,7 @@ elf_uncompress_chdr (struct backtrace_state *state, if (!elf_zstd_decompress (compressed + sizeof (b_elf_chdr), compressed_size - sizeof (b_elf_chdr), (unsigned char *)zdebug_table, po, - chdr->ch_size)) + chdr.ch_size)) goto skip; break; @@ -5143,7 +5141,7 @@ elf_uncompress_chdr (struct backtrace_state *state, } *uncompressed = po; - *uncompressed_size = chdr->ch_size; + *uncompressed_size = chdr.ch_size; return 1; @@ -5585,6 +5583,7 @@ elf_uncompress_lzma_block (const unsigned char *compressed, uint64_t header_compressed_size; uint64_t header_uncompressed_size; unsigned char lzma2_properties; + size_t crc_offset; uint32_t computed_crc; uint32_t stream_crc; size_t uncompressed_offset; @@ -5688,28 +5687,29 @@ elf_uncompress_lzma_block (const unsigned char *compressed, /* The properties describe the dictionary size, but we don't care what that is. */ - /* Block header padding. */ - if (unlikely (off + 4 > compressed_size)) + /* Skip to just before CRC, verifying zero bytes in between. */ + crc_offset = block_header_offset + block_header_size - 4; + if (unlikely (crc_offset + 4 > compressed_size)) { elf_uncompress_failed (); return 0; } - - off = (off + 3) &~ (size_t) 3; - - if (unlikely (off + 4 > compressed_size)) + for (; off < crc_offset; off++) { - elf_uncompress_failed (); - return 0; + if (compressed[off] != 0) + { + elf_uncompress_failed (); + return 0; + } } /* Block header CRC. */ computed_crc = elf_crc32 (0, compressed + block_header_offset, block_header_size - 4); - stream_crc = (compressed[off] - | (compressed[off + 1] << 8) - | (compressed[off + 2] << 16) - | (compressed[off + 3] << 24)); + stream_crc = ((uint32_t)compressed[off] + | ((uint32_t)compressed[off + 1] << 8) + | ((uint32_t)compressed[off + 2] << 16) + | ((uint32_t)compressed[off + 3] << 24)); if (unlikely (computed_crc != stream_crc)) { elf_uncompress_failed (); @@ -6216,10 +6216,10 @@ elf_uncompress_lzma_block (const unsigned char *compressed, return 0; } computed_crc = elf_crc32 (0, uncompressed, uncompressed_offset); - stream_crc = (compressed[off] - | (compressed[off + 1] << 8) - | (compressed[off + 2] << 16) - | (compressed[off + 3] << 24)); + stream_crc = ((uint32_t)compressed[off] + | ((uint32_t)compressed[off + 1] << 8) + | ((uint32_t)compressed[off + 2] << 16) + | ((uint32_t)compressed[off + 3] << 24)); if (computed_crc != stream_crc) { elf_uncompress_failed (); @@ -6319,10 +6319,10 @@ elf_uncompress_lzma (struct backtrace_state *state, /* Next comes a CRC of the stream flags. */ computed_crc = elf_crc32 (0, compressed + 6, 2); - stream_crc = (compressed[8] - | (compressed[9] << 8) - | (compressed[10] << 16) - | (compressed[11] << 24)); + stream_crc = ((uint32_t)compressed[8] + | ((uint32_t)compressed[9] << 8) + | ((uint32_t)compressed[10] << 16) + | ((uint32_t)compressed[11] << 24)); if (unlikely (computed_crc != stream_crc)) { elf_uncompress_failed (); @@ -6363,10 +6363,10 @@ elf_uncompress_lzma (struct backtrace_state *state, /* Before that is a footer CRC. */ computed_crc = elf_crc32 (0, compressed + offset, 6); - stream_crc = (compressed[offset - 4] - | (compressed[offset - 3] << 8) - | (compressed[offset - 2] << 16) - | (compressed[offset - 1] << 24)); + stream_crc = ((uint32_t)compressed[offset - 4] + | ((uint32_t)compressed[offset - 3] << 8) + | ((uint32_t)compressed[offset - 2] << 16) + | ((uint32_t)compressed[offset - 1] << 24)); if (unlikely (computed_crc != stream_crc)) { elf_uncompress_failed (); @@ -6422,10 +6422,10 @@ elf_uncompress_lzma (struct backtrace_state *state, /* Next is a CRC of the index. */ computed_crc = elf_crc32 (0, compressed + index_offset, offset - index_offset); - stream_crc = (compressed[offset] - | (compressed[offset + 1] << 8) - | (compressed[offset + 2] << 16) - | (compressed[offset + 3] << 24)); + stream_crc = ((uint32_t)compressed[offset] + | ((uint32_t)compressed[offset + 1] << 8) + | ((uint32_t)compressed[offset + 2] << 16) + | ((uint32_t)compressed[offset + 3] << 24)); if (unlikely (computed_crc != stream_crc)) { elf_uncompress_failed (); @@ -6518,8 +6518,10 @@ backtrace_uncompress_lzma (struct backtrace_state *state, static int elf_add (struct backtrace_state *state, const char *filename, int descriptor, const unsigned char *memory, size_t memory_size, - uintptr_t base_address, backtrace_error_callback error_callback, - void *data, fileline *fileline_fn, int *found_sym, int *found_dwarf, + struct libbacktrace_base_address base_address, + struct elf_ppc64_opd_data *caller_opd, + backtrace_error_callback error_callback, void *data, + fileline *fileline_fn, int *found_sym, int *found_dwarf, struct dwarf_data **fileline_entry, int exe, int debuginfo, const char *with_buildid_data, uint32_t with_buildid_size) { @@ -6574,6 +6576,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, struct elf_view split_debug_view[DEBUG_MAX]; unsigned char split_debug_view_valid[DEBUG_MAX]; struct elf_ppc64_opd_data opd_data, *opd; + int opd_view_valid; struct dwarf_sections dwarf_sections; struct dwarf_data *fileline_altlink = NULL; @@ -6602,6 +6605,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, debug_view_valid = 0; memset (&split_debug_view_valid[0], 0, sizeof split_debug_view_valid); opd = NULL; + opd_view_valid = 0; if (!elf_get_view (state, descriptor, memory, memory_size, 0, sizeof ehdr, error_callback, data, &ehdr_view)) @@ -6858,7 +6862,8 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, } } - if (!gnu_debugdata_view_valid + if (!debuginfo + && !gnu_debugdata_view_valid && strcmp (name, ".gnu_debugdata") == 0) { if (!elf_get_view (state, descriptor, memory, memory_size, @@ -6885,12 +6890,18 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, opd->addr = shdr->sh_addr; opd->data = (const char *) opd_data.view.view.data; opd->size = shdr->sh_size; + opd_view_valid = 1; } } + /* A debuginfo file may not have a useful .opd section, but we can use the + one from the original executable. */ + if (opd == NULL) + opd = caller_opd; + if (symtab_shndx == 0) symtab_shndx = dynsym_shndx; - if (symtab_shndx != 0 && !debuginfo) + if (symtab_shndx != 0) { const b_elf_shdr *symtab_shdr; unsigned int strtab_shndx; @@ -6966,9 +6977,9 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, elf_release_view (state, &debuglink_view, error_callback, data); if (debugaltlink_view_valid) elf_release_view (state, &debugaltlink_view, error_callback, data); - ret = elf_add (state, "", d, NULL, 0, base_address, error_callback, - data, fileline_fn, found_sym, found_dwarf, NULL, 0, - 1, NULL, 0); + ret = elf_add (state, "", d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 1, NULL, 0); if (ret < 0) backtrace_close (d, error_callback, data); else if (descriptor >= 0) @@ -6983,12 +6994,6 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, buildid_view_valid = 0; } - if (opd) - { - elf_release_view (state, &opd->view, error_callback, data); - opd = NULL; - } - if (debuglink_name != NULL) { int d; @@ -7003,9 +7008,9 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, elf_release_view (state, &debuglink_view, error_callback, data); if (debugaltlink_view_valid) elf_release_view (state, &debugaltlink_view, error_callback, data); - ret = elf_add (state, "", d, NULL, 0, base_address, error_callback, - data, fileline_fn, found_sym, found_dwarf, NULL, 0, - 1, NULL, 0); + ret = elf_add (state, "", d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 1, NULL, 0); if (ret < 0) backtrace_close (d, error_callback, data); else if (descriptor >= 0) @@ -7030,7 +7035,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, { int ret; - ret = elf_add (state, filename, d, NULL, 0, base_address, + ret = elf_add (state, filename, d, NULL, 0, base_address, opd, error_callback, data, fileline_fn, found_sym, found_dwarf, &fileline_altlink, 0, 1, debugaltlink_buildid_data, debugaltlink_buildid_size); @@ -7067,7 +7072,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, if (ret) { ret = elf_add (state, filename, -1, gnu_debugdata_uncompressed, - gnu_debugdata_uncompressed_size, base_address, + gnu_debugdata_uncompressed_size, base_address, opd, error_callback, data, fileline_fn, found_sym, found_dwarf, NULL, 0, 0, NULL, 0); if (ret >= 0 && descriptor >= 0) @@ -7076,6 +7081,13 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, } } + if (opd_view_valid) + { + elf_release_view (state, &opd->view, error_callback, data); + opd_view_valid = 0; + opd = NULL; + } + /* Read all the debug sections in a single view, since they are probably adjacent in the file. If any of sections are uncompressed, we never release this view. */ @@ -7322,7 +7334,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor, if (split_debug_view_valid[i]) elf_release_view (state, &split_debug_view[i], error_callback, data); } - if (opd) + if (opd_view_valid) elf_release_view (state, &opd->view, error_callback, data); if (descriptor >= 0) backtrace_close (descriptor, error_callback, data); @@ -7350,13 +7362,37 @@ struct PhdrIterate { char* dlpi_name; ElfW(Addr) dlpi_addr; + ElfW(Addr) dlpi_end_addr; }; FastVector s_phdrData(16); +struct ElfAddrRange +{ + ElfW(Addr) dlpi_addr; + ElfW(Addr) dlpi_end_addr; +}; +FastVector s_sortedKnownElfRanges(16); + +static int address_in_known_elf_ranges(uintptr_t pc) +{ + auto it = std::lower_bound( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), pc, + []( const ElfAddrRange& lhs, const uintptr_t rhs ) { return uintptr_t(lhs.dlpi_addr) > rhs; } ); + if( it != s_sortedKnownElfRanges.end() && pc <= it->dlpi_end_addr ) + { + return true; + } + return false; +} + static int phdr_callback_mock (struct dl_phdr_info *info, size_t size ATTRIBUTE_UNUSED, void *pdata) { + if( address_in_known_elf_ranges(info->dlpi_addr) ) + { + return 0; + } + auto ptr = s_phdrData.push_next(); if (info->dlpi_name) { @@ -7366,6 +7402,12 @@ phdr_callback_mock (struct dl_phdr_info *info, size_t size ATTRIBUTE_UNUSED, } else ptr->dlpi_name = nullptr; ptr->dlpi_addr = info->dlpi_addr; + + // calculate the end address as well, so we can quickly determine if a PC is within the range of this image + ptr->dlpi_end_addr = uintptr_t(info->dlpi_addr) + (info->dlpi_phnum ? uintptr_t( + info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz) : 0); + return 0; } @@ -7379,6 +7421,7 @@ phdr_callback (struct PhdrIterate *info, void *pdata) const char *filename; int descriptor; int does_not_exist; + struct libbacktrace_base_address base_address; fileline elf_fileline_fn; int found_dwarf; @@ -7408,7 +7451,8 @@ phdr_callback (struct PhdrIterate *info, void *pdata) return 0; } - if (elf_add (pd->state, filename, descriptor, NULL, 0, info->dlpi_addr, + base_address.m = info->dlpi_addr; + if (elf_add (pd->state, filename, descriptor, NULL, 0, base_address, NULL, pd->error_callback, pd->data, &elf_fileline_fn, pd->found_sym, &found_dwarf, NULL, 0, 0, NULL, 0)) { @@ -7422,6 +7466,66 @@ phdr_callback (struct PhdrIterate *info, void *pdata) return 0; } +static int elf_iterate_phdr_and_add_new_files(phdr_data *pd) +{ + assert(s_phdrData.empty()); + // dl_iterate_phdr, will only add entries for elf files loaded in a previously unseen range + dl_iterate_phdr(phdr_callback_mock, nullptr); + + if(s_phdrData.size() == 0) + { + return 0; + } + + uint32_t headersAdded = 0; + for (auto &v : s_phdrData) + { + phdr_callback(&v, (void *)pd); + + auto newEntry = s_sortedKnownElfRanges.push_next(); + newEntry->dlpi_addr = v.dlpi_addr; + newEntry->dlpi_end_addr = v.dlpi_end_addr; + + tracy_free(v.dlpi_name); + + headersAdded++; + } + + s_phdrData.clear(); + + std::sort( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), + []( const ElfAddrRange& lhs, const ElfAddrRange& rhs ) { return lhs.dlpi_addr > rhs.dlpi_addr; } ); + + return headersAdded; +} + +#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT +/* Request an elf entry update if the pc passed in is not in any of the known elf ranges. +This could mean that new images were dlopened and we need to add those new elf entries */ +static int elf_refresh_address_ranges_if_needed(struct backtrace_state *state, uintptr_t pc) +{ + if ( address_in_known_elf_ranges(pc) ) + { + return 0; + } + + struct phdr_data pd; + int found_sym = 0; + int found_dwarf = 0; + fileline fileline_fn = nullptr; + pd.state = state; + pd.error_callback = nullptr; + pd.data = nullptr; + pd.fileline_fn = &fileline_fn; + pd.found_sym = &found_sym; + pd.found_dwarf = &found_dwarf; + pd.exe_filename = nullptr; + pd.exe_descriptor = -1; + + return elf_iterate_phdr_and_add_new_files(&pd); +} +#endif //#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT + /* Initialize the backtrace data we need from an ELF executable. At the ELF level, all we need to do is find the debug info sections. */ @@ -7437,11 +7541,21 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, fileline elf_fileline_fn = elf_nodebug; struct phdr_data pd; - ret = elf_add (state, filename, descriptor, NULL, 0, 0, error_callback, data, - &elf_fileline_fn, &found_sym, &found_dwarf, NULL, 1, 0, NULL, - 0); - if (!ret) - return 0; + + /* When using fdpic we must use dl_iterate_phdr for all modules, including + the main executable, so that we can get the right base address + mapping. */ + if (!libbacktrace_using_fdpic ()) + { + struct libbacktrace_base_address zero_base_address; + + memset (&zero_base_address, 0, sizeof zero_base_address); + ret = elf_add (state, filename, descriptor, NULL, 0, zero_base_address, + NULL, error_callback, data, &elf_fileline_fn, &found_sym, + &found_dwarf, NULL, 1, 0, NULL, 0); + if (!ret) + return 0; + } pd.state = state; pd.error_callback = error_callback; @@ -7452,14 +7566,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, pd.exe_filename = filename; pd.exe_descriptor = ret < 0 ? descriptor : -1; - assert (s_phdrData.empty()); - dl_iterate_phdr (phdr_callback_mock, nullptr); - for (auto& v : s_phdrData) - { - phdr_callback (&v, (void *) &pd); - tracy_free (v.dlpi_name); - } - s_phdrData.clear(); + elf_iterate_phdr_and_add_new_files(&pd); if (!state->threaded) { @@ -7485,6 +7592,13 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, if (*fileline_fn == NULL || *fileline_fn == elf_nodebug) *fileline_fn = elf_fileline_fn; + // install an address range refresh callback so we can cope with dynamically loaded elf files +#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT + state->request_known_address_ranges_refresh_fn = elf_refresh_address_ranges_if_needed; +#else + state->request_known_address_ranges_refresh_fn = NULL; +#endif + return 1; } diff --git a/external/sources/tracy/public/libbacktrace/fileline.cpp b/external/sources/tracy/public/libbacktrace/fileline.cpp index 8645d754af..5a37ff0c7d 100644 --- a/external/sources/tracy/public/libbacktrace/fileline.cpp +++ b/external/sources/tracy/public/libbacktrace/fileline.cpp @@ -47,6 +47,18 @@ POSSIBILITY OF SUCH DAMAGE. */ #include #endif +#ifdef HAVE_WINDOWS_H +#ifndef WIN32_MEAN_AND_LEAN +#define WIN32_MEAN_AND_LEAN +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include +#endif + #include "backtrace.hpp" #include "internal.hpp" @@ -158,6 +170,47 @@ macho_get_executable_path (struct backtrace_state *state, #endif /* !defined (HAVE_MACH_O_DYLD_H) */ +#if HAVE_DECL__PGMPTR + +#define windows_executable_filename() _pgmptr + +#else /* !HAVE_DECL__PGMPTR */ + +#define windows_executable_filename() NULL + +#endif /* !HAVE_DECL__PGMPTR */ + +#ifdef HAVE_WINDOWS_H + +#define FILENAME_BUF_SIZE (MAX_PATH) + +static char * +windows_get_executable_path (char *buf, backtrace_error_callback error_callback, + void *data) +{ + size_t got; + int error; + + got = GetModuleFileNameA (NULL, buf, FILENAME_BUF_SIZE - 1); + error = GetLastError (); + if (got == 0 + || (got == FILENAME_BUF_SIZE - 1 && error == ERROR_INSUFFICIENT_BUFFER)) + { + error_callback (data, + "could not get the filename of the current executable", + error); + return NULL; + } + return buf; +} + +#else /* !defined (HAVE_WINDOWS_H) */ + +#define windows_get_executable_path(buf, error_callback, data) NULL +#define FILENAME_BUF_SIZE 64 + +#endif /* !defined (HAVE_WINDOWS_H) */ + /* Initialize the fileline information from the executable. Returns 1 on success, 0 on failure. */ @@ -171,7 +224,7 @@ fileline_initialize (struct backtrace_state *state, int called_error_callback; int descriptor; const char *filename; - char buf[64]; + char buf[FILENAME_BUF_SIZE]; if (!state->threaded) failed = state->fileline_initialization_failed; @@ -195,7 +248,7 @@ fileline_initialize (struct backtrace_state *state, descriptor = -1; called_error_callback = 0; - for (pass = 0; pass < 8; ++pass) + for (pass = 0; pass < 10; ++pass) { int does_not_exist; @@ -208,25 +261,33 @@ fileline_initialize (struct backtrace_state *state, filename = getexecname (); break; case 2: - filename = "/proc/self/exe"; + /* Test this before /proc/self/exe, as the latter exists but points + to the wine binary (and thus doesn't work). */ + filename = windows_executable_filename (); break; case 3: - filename = "/proc/curproc/file"; + filename = "/proc/self/exe"; break; case 4: + filename = "/proc/curproc/file"; + break; + case 5: snprintf (buf, sizeof (buf), "/proc/%ld/object/a.out", (long) getpid ()); filename = buf; break; - case 5: + case 6: filename = sysctl_exec_name1 (state, error_callback, data); break; - case 6: + case 7: filename = sysctl_exec_name2 (state, error_callback, data); break; - case 7: + case 8: filename = macho_get_executable_path (state, error_callback, data); break; + case 9: + filename = windows_get_executable_path (buf, error_callback, data); + break; default: abort (); } diff --git a/external/sources/tracy/public/libbacktrace/internal.hpp b/external/sources/tracy/public/libbacktrace/internal.hpp index f871844b62..213959759b 100644 --- a/external/sources/tracy/public/libbacktrace/internal.hpp +++ b/external/sources/tracy/public/libbacktrace/internal.hpp @@ -133,6 +133,11 @@ typedef void (*syminfo) (struct backtrace_state *state, uintptr_t pc, backtrace_syminfo_callback callback, backtrace_error_callback error_callback, void *data); +/* The type of the function that will trigger an known address range refresh + (if pc passed in is for an address whichs lies ourtisde of known ranges) */ +typedef int (*request_known_address_ranges_refresh)(struct backtrace_state *state, + uintptr_t pc); + /* What the backtrace state pointer points to. */ struct backtrace_state @@ -159,6 +164,8 @@ struct backtrace_state int lock_alloc; /* The freelist when using mmap. */ struct backtrace_freelist_struct *freelist; + /* Trigger an known address range refresh */ + request_known_address_ranges_refresh request_known_address_ranges_refresh_fn; }; /* Open a file for reading. Returns -1 on error. If DOES_NOT_EXIST @@ -326,10 +333,44 @@ struct dwarf_sections struct dwarf_data; +/* The load address mapping. */ + +#if defined(__FDPIC__) && defined(HAVE_DL_ITERATE_PHDR) && (defined(HAVE_LINK_H) || defined(HAVE_SYS_LINK_H)) + +#ifdef HAVE_LINK_H + #include +#endif +#ifdef HAVE_SYS_LINK_H + #include +#endif + +#define libbacktrace_using_fdpic() (1) + +struct libbacktrace_base_address +{ + struct elf32_fdpic_loadaddr m; +}; + +#define libbacktrace_add_base(pc, base) \ + ((uintptr_t) (__RELOC_POINTER ((pc), (base).m))) + +#else /* not _FDPIC__ */ + +#define libbacktrace_using_fdpic() (0) + +struct libbacktrace_base_address +{ + uintptr_t m; +}; + +#define libbacktrace_add_base(pc, base) ((pc) + (base).m) + +#endif /* not _FDPIC__ */ + /* Add file/line information for a DWARF module. */ extern int backtrace_dwarf_add (struct backtrace_state *state, - uintptr_t base_address, + struct libbacktrace_base_address base_address, const struct dwarf_sections *dwarf_sections, int is_bigendian, struct dwarf_data *fileline_altlink, diff --git a/external/sources/tracy/public/libbacktrace/macho.cpp b/external/sources/tracy/public/libbacktrace/macho.cpp index 6cccdabaa0..b9f0845658 100644 --- a/external/sources/tracy/public/libbacktrace/macho.cpp +++ b/external/sources/tracy/public/libbacktrace/macho.cpp @@ -274,12 +274,14 @@ struct macho_nlist_64 /* Value found in nlist n_type field. */ -#define MACH_O_N_EXT 0x01 /* Extern symbol */ +#define MACH_O_N_STAB 0xe0 /* Stabs debugging symbol */ +#define MACH_O_N_TYPE 0x0e /* Mask for type bits */ + +/* Values found after masking with MACH_O_N_TYPE. */ +#define MACH_O_N_UNDF 0x00 /* Undefined symbol */ #define MACH_O_N_ABS 0x02 /* Absolute symbol */ -#define MACH_O_N_SECT 0x0e /* Defined in section */ +#define MACH_O_N_SECT 0x0e /* Defined in section from n_sect field */ -#define MACH_O_N_TYPE 0x0e /* Mask for type bits */ -#define MACH_O_N_STAB 0xe0 /* Stabs debugging symbol */ /* Information we keep for a Mach-O symbol. */ @@ -316,8 +318,9 @@ static const char * const dwarf_section_names[DEBUG_MAX] = /* Forward declaration. */ static int macho_add (struct backtrace_state *, const char *, int, off_t, - const unsigned char *, uintptr_t, int, - backtrace_error_callback, void *, fileline *, int *); + const unsigned char *, struct libbacktrace_base_address, + int, backtrace_error_callback, void *, fileline *, + int *); /* A dummy callback function used when we can't find any debug info. */ @@ -495,10 +498,10 @@ macho_defined_symbol (uint8_t type) { if ((type & MACH_O_N_STAB) != 0) return 0; - if ((type & MACH_O_N_EXT) != 0) - return 0; switch (type & MACH_O_N_TYPE) { + case MACH_O_N_UNDF: + return 0; case MACH_O_N_ABS: return 1; case MACH_O_N_SECT: @@ -512,7 +515,7 @@ macho_defined_symbol (uint8_t type) static int macho_add_symtab (struct backtrace_state *state, int descriptor, - uintptr_t base_address, int is_64, + struct libbacktrace_base_address base_address, int is_64, off_t symoff, unsigned int nsyms, off_t stroff, unsigned int strsize, backtrace_error_callback error_callback, void *data) @@ -627,7 +630,7 @@ macho_add_symtab (struct backtrace_state *state, int descriptor, if (name[0] == '_') ++name; macho_symbols[j].name = name; - macho_symbols[j].address = value + base_address; + macho_symbols[j].address = libbacktrace_add_base (value, base_address); ++j; } @@ -760,7 +763,8 @@ macho_syminfo (struct backtrace_state *state, uintptr_t addr, static int macho_add_fat (struct backtrace_state *state, const char *filename, int descriptor, int swapped, off_t offset, - const unsigned char *match_uuid, uintptr_t base_address, + const unsigned char *match_uuid, + struct libbacktrace_base_address base_address, int skip_symtab, uint32_t nfat_arch, int is_64, backtrace_error_callback error_callback, void *data, fileline *fileline_fn, int *found_sym) @@ -862,7 +866,8 @@ macho_add_fat (struct backtrace_state *state, const char *filename, static int macho_add_dsym (struct backtrace_state *state, const char *filename, - uintptr_t base_address, const unsigned char *uuid, + struct libbacktrace_base_address base_address, + const unsigned char *uuid, backtrace_error_callback error_callback, void *data, fileline* fileline_fn) { @@ -980,7 +985,7 @@ macho_add_dsym (struct backtrace_state *state, const char *filename, static int macho_add (struct backtrace_state *state, const char *filename, int descriptor, off_t offset, const unsigned char *match_uuid, - uintptr_t base_address, int skip_symtab, + struct libbacktrace_base_address base_address, int skip_symtab, backtrace_error_callback error_callback, void *data, fileline *fileline_fn, int *found_sym) { @@ -1242,7 +1247,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, c = _dyld_image_count (); for (i = 0; i < c; ++i) { - uintptr_t base_address; + struct libbacktrace_base_address base_address; const char *name; int d; fileline mff; @@ -1266,7 +1271,7 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, continue; } - base_address = _dyld_get_image_vmaddr_slide (i); + base_address.m = _dyld_get_image_vmaddr_slide (i); mff = macho_nodebug; if (!macho_add (state, name, d, 0, NULL, base_address, 0, @@ -1321,10 +1326,12 @@ backtrace_initialize (struct backtrace_state *state, const char *filename, void *data, fileline *fileline_fn) { fileline macho_fileline_fn; + struct libbacktrace_base_address zero_base_address; int found_sym; macho_fileline_fn = macho_nodebug; - if (!macho_add (state, filename, descriptor, 0, NULL, 0, 0, + memset (&zero_base_address, 0, sizeof zero_base_address); + if (!macho_add (state, filename, descriptor, 0, NULL, zero_base_address, 0, error_callback, data, &macho_fileline_fn, &found_sym)) return 0; diff --git a/external/sources/tracy/public/tracy/Tracy.hpp b/external/sources/tracy/public/tracy/Tracy.hpp index 978eb5ef15..605d149fa8 100644 --- a/external/sources/tracy/public/tracy/Tracy.hpp +++ b/external/sources/tracy/public/tracy/Tracy.hpp @@ -13,11 +13,13 @@ #endif #ifndef TracyLine -# define TracyLine __LINE__ +# define TracyLine TracyConcat(__LINE__,U) // MSVC Edit and continue __LINE__ is non-constant. See https://developercommunity.visualstudio.com/t/-line-cannot-be-used-as-an-argument-for-constexpr/195665 #endif #ifndef TRACY_ENABLE +#define TracyNoop + #define ZoneNamed(x,y) #define ZoneNamedN(x,y,z) #define ZoneNamedC(x,y,z) @@ -33,8 +35,12 @@ #define ZoneText(x,y) #define ZoneTextV(x,y,z) +#define ZoneTextF(x,...) +#define ZoneTextVF(x,y,...) #define ZoneName(x,y) #define ZoneNameV(x,y,z) +#define ZoneNameF(x,...) +#define ZoneNameVF(x,y,...) #define ZoneColor(x) #define ZoneColorV(x,y) #define ZoneValue(x) @@ -69,8 +75,10 @@ #define TracyAlloc(x,y) #define TracyFree(x) +#define TracyMemoryDiscard(x) #define TracySecureAlloc(x,y) #define TracySecureFree(x) +#define TracySecureMemoryDiscard(x) #define TracyAllocN(x,y,z) #define TracyFreeN(x,y) @@ -92,8 +100,10 @@ #define TracyAllocS(x,y,z) #define TracyFreeS(x,y) +#define TracyMemoryDiscardS(x,y) #define TracySecureAllocS(x,y,z) #define TracySecureFreeS(x,y) +#define TracySecureMemoryDiscardS(x,y) #define TracyAllocNS(x,y,z,w) #define TracyFreeNS(x,y,z) @@ -109,9 +119,11 @@ #define TracyParameterRegister(x,y) #define TracyParameterSetup(x,y,z,w) #define TracyIsConnected false +#define TracyIsStarted false #define TracySetProgramName(x) #define TracyFiberEnter(x) +#define TracyFiberEnterHint(x,y) #define TracyFiberLeave #else @@ -122,23 +134,20 @@ #include "../client/TracyProfiler.hpp" #include "../client/TracyScoped.hpp" -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#ifndef TRACY_CALLSTACK +#define TRACY_CALLSTACK 0 +#endif -# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) -# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) -#else -# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) +#define TracyNoop tracy::ProfilerAvailable() -# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, active ) -# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ) -#endif +#define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) + +#define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) +#define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) +#define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active ) #define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) #define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true ) @@ -147,8 +156,12 @@ #define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size ) #define ZoneTextV( varname, txt, size ) varname.Text( txt, size ) +#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ ) +#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ ) #define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size ) #define ZoneNameV( varname, txt, size ) varname.Name( txt, size ) +#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ ) +#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ ) #define ZoneColor( color ) ___tracy_scoped_zone.Color( color ) #define ZoneColorV( varname, color ) varname.Color( color ) #define ZoneValue( value ) ___tracy_scoped_zone.Value( value ) @@ -169,7 +182,7 @@ #define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } #define LockableBase( type ) tracy::Lockable #define SharedLockableBase( type ) tracy::SharedLockable -#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##varname ) +#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##__LINE__ { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##__LINE__ ) #define LockableName( varname, txt, size ) varname.CustomName( txt, size ) #define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ) @@ -177,95 +190,52 @@ #define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size ) -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK ) -# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK ) -# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ) -# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) - -# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ) -# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ) -# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ) -# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ) - -# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ) -# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ) -# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ) -# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ) -#else -# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 ) -# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 ) -# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 ) -# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 ) - -# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false ) -# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false ) -# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true ) -# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true ) - -# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name ) -# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name ) -# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name ) -# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name ) -#endif - -#ifdef TRACY_HAS_CALLSTACK -# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) - -# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) -# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) - -# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) -# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) -# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) -# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) - -# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ) -# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ) -# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ) -# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ) - -# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ) -# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ) -# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ) -# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ) - -# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ) -# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ) -# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth ) -# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth ) -#else -# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active ) -# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active ) -# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active ) -# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active ) - -# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active ) -# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active ) - -# define ZoneScopedS( depth ) ZoneScoped -# define ZoneScopedNS( name, depth ) ZoneScopedN( name ) -# define ZoneScopedCS( color, depth ) ZoneScopedC( color ) -# define ZoneScopedNCS( name, color, depth ) ZoneScopedNC( name, color ) - -# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size ) -# define TracyFreeS( ptr, depth ) TracyFree( ptr ) -# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size ) -# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr ) - -# define TracyAllocNS( ptr, size, depth, name ) TracyAllocN( ptr, size, name ) -# define TracyFreeNS( ptr, depth, name ) TracyFreeN( ptr, name ) -# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAllocN( ptr, size, name ) -# define TracySecureFreeNS( ptr, depth, name ) TracySecureFreeN( ptr, name ) - -# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size ) -# define TracyMessageLS( txt, depth ) TracyMessageL( txt ) -# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color ) -# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color ) -#endif +#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK ) +#define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK ) +#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ) +#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) + +#define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ) +#define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ) +#define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ) +#define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ) + +#define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ) +#define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ) +#define TracyMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, false, TRACY_CALLSTACK ) +#define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ) +#define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ) +#define TracySecureMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, true, TRACY_CALLSTACK ) + +#define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) + +#define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) +#define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) + +#define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) +#define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) +#define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) +#define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) + +#define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ) +#define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ) +#define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ) +#define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ) + +#define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ) +#define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ) +#define TracyMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, false, depth ) +#define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ) +#define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ) +#define TracySecureMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, true, depth ) + +#define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ) +#define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ) +#define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth ) +#define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth ) #define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data ) #define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) @@ -274,7 +244,8 @@ #define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); #ifdef TRACY_FIBERS -# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber ) +# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 ) +# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint ) # define TracyFiberLeave tracy::Profiler::LeaveFiber() #endif diff --git a/external/sources/tracy/public/tracy/TracyC.h b/external/sources/tracy/public/tracy/TracyC.h index 996889c40f..1b1373e0d0 100644 --- a/external/sources/tracy/public/tracy/TracyC.h +++ b/external/sources/tracy/public/tracy/TracyC.h @@ -4,7 +4,6 @@ #include #include -#include "../client/TracyCallstack.h" #include "../common/TracyApi.h" #ifdef __cplusplus @@ -39,6 +38,8 @@ TRACY_API void ___tracy_set_thread_name( const char* name ); typedef const void* TracyCZoneCtx; +typedef const void* TracyCLockCtx; + #define TracyCZone(c,x) #define TracyCZoneN(c,x,y) #define TracyCZoneC(c,x,y) @@ -51,8 +52,10 @@ typedef const void* TracyCZoneCtx; #define TracyCAlloc(x,y) #define TracyCFree(x) +#define TracyCMemoryDiscard(x) #define TracyCSecureAlloc(x,y) #define TracyCSecureFree(x) +#define TracyCSecureMemoryDiscard(x) #define TracyCAllocN(x,y,z) #define TracyCFreeN(x,y) @@ -83,8 +86,10 @@ typedef const void* TracyCZoneCtx; #define TracyCAllocS(x,y,z) #define TracyCFreeS(x,y) +#define TracyCMemoryDiscardS(x,y) #define TracyCSecureAllocS(x,y,z) #define TracyCSecureFreeS(x,y) +#define TracyCSecureMemoryDiscardS(x,y) #define TracyCAllocNS(x,y,z,w) #define TracyCFreeNS(x,y,z) @@ -96,7 +101,18 @@ typedef const void* TracyCZoneCtx; #define TracyCMessageCS(x,y,z,w) #define TracyCMessageLCS(x,y,z) +#define TracyCLockCtx(l) +#define TracyCLockAnnounce(l) +#define TracyCLockTerminate(l) +#define TracyCLockBeforeLock(l) +#define TracyCLockAfterLock(l) +#define TracyCLockAfterUnlock(l) +#define TracyCLockAfterTryLock(l,x) +#define TracyCLockMark(l) +#define TracyCLockCustomName(l,x,y) + #define TracyCIsConnected 0 +#define TracyCIsStarted 0 #ifdef TRACY_FIBERS # define TracyCFiberEnter(fiber) @@ -124,7 +140,7 @@ struct ___tracy_source_location_data struct ___tracy_c_zone_context { uint32_t id; - int active; + int32_t active; }; struct ___tracy_gpu_time_data @@ -142,7 +158,7 @@ struct ___tracy_gpu_zone_begin_data { struct ___tracy_gpu_zone_begin_callstack_data { uint64_t srcloc; - int depth; + int32_t depth; uint16_t queryId; uint8_t context; }; @@ -172,23 +188,36 @@ struct ___tracy_gpu_calibration_data { uint8_t context; }; +struct ___tracy_gpu_time_sync_data { + int64_t gpuTime; + uint8_t context; +}; + +struct __tracy_lockable_context_data; + // Some containers don't support storing const types. // This struct, as visible to user, is immutable, so treat it as if const was declared here. typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; +typedef struct __tracy_lockable_context_data* TracyCLockCtx; #ifdef TRACY_MANUAL_LIFETIME TRACY_API void ___tracy_startup_profiler(void); TRACY_API void ___tracy_shutdown_profiler(void); +TRACY_API int32_t ___tracy_profiler_started(void); + +# define TracyCIsStarted ___tracy_profiler_started() +#else +# define TracyCIsStarted 1 #endif -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ); -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ); +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ); TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ); TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ); TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ); @@ -204,6 +233,7 @@ TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data ); TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data ); TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data ); TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data ); +TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data ); TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data ); TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data ); @@ -214,21 +244,19 @@ TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_dat TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data ); TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data ); TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data ); +TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data ); -TRACY_API int ___tracy_connected(void); +TRACY_API int32_t ___tracy_connected(void); -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -#else -# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); +#ifndef TRACY_CALLSTACK +#define TRACY_CALLSTACK 0 #endif +#define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); + #define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx ); #define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size ); @@ -237,57 +265,44 @@ TRACY_API int ___tracy_connected(void); #define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value ); -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ); -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ); -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ); -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ); -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ); - -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ); -TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ); -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ); -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ); - -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) -# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) -# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) -# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) - -# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name ) -# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name ) -# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name ) -# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name ) - -# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); -# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); -# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); -# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); -#else -# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 ); -# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 ); -# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 ); -# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 ); - -# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name ); -# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name ); -# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name ); -# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name ); - -# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 ); -# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 ); -# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 ); -# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 ); -#endif +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ); +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ); +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ); +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ); +TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ); +TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ); + +TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ); + +#define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) +#define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) +#define TracyCMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 0, TRACY_CALLSTACK ); +#define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) +#define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) +#define TracyCSecureMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 1, TRACY_CALLSTACK ); + +#define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name ) +#define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name ) +#define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name ) +#define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name ) + +#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); +#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); +#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); +#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); TRACY_API void ___tracy_emit_frame_mark( const char* name ); TRACY_API void ___tracy_emit_frame_mark_start( const char* name ); TRACY_API void ___tracy_emit_frame_mark_end( const char* name ); -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ); +TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ); #define TracyCFrameMark ___tracy_emit_frame_mark( 0 ); #define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name ); @@ -299,7 +314,7 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_ TRACY_API void ___tracy_emit_plot( const char* name, double val ); TRACY_API void ___tracy_emit_plot_float( const char* name, float val ); TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ); -TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ); +TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ); TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); #define TracyCPlot( name, val ) ___tracy_emit_plot( name, val ); @@ -309,47 +324,46 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); #define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size ); -#ifdef TRACY_HAS_CALLSTACK -# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); - -# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) -# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) -# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) -# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) - -# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name ) -# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name ) -# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name ) -# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name ) - -# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); -# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); -# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth ); -# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth ); -#else -# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active ) -# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active ) -# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active ) -# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active ) - -# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size ) -# define TracyCFreeS( ptr, depth ) TracyCFree( ptr ) -# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size ) -# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr ) - -# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name ) -# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name ) -# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name ) -# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name ) - -# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size ) -# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt ) -# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color ) -# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color ) -#endif +#define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); + +#define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) +#define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) +#define TracyCMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 0, depth ) +#define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) +#define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) +#define TracyCSecureMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 1, depth ) + +#define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name ) +#define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name ) +#define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name ) +#define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name ) + +#define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); +#define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); +#define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth ); +#define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth ); + + +TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ); +TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ); +TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ); +TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ); + +#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock ); +#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock ); +#define TracyCLockAfterLock( lock ) ___tracy_after_lock_lockable_ctx( lock ); +#define TracyCLockAfterUnlock( lock ) ___tracy_after_unlock_lockable_ctx( lock ); +#define TracyCLockAfterTryLock( lock, acquired ) ___tracy_after_try_lock_lockable_ctx( lock, acquired ); +#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz ); #define TracyCIsConnected ___tracy_connected() diff --git a/external/sources/tracy/public/tracy/TracyCUDA.hpp b/external/sources/tracy/public/tracy/TracyCUDA.hpp new file mode 100644 index 0000000000..40ff55dc2b --- /dev/null +++ b/external/sources/tracy/public/tracy/TracyCUDA.hpp @@ -0,0 +1,1325 @@ +#ifndef __TRACYCUDA_HPP__ +#define __TRACYCUDA_HPP__ + +#ifndef TRACY_ENABLE + +#define TracyCUDAContext() nullptr +#define TracyCUDAContextDestroy(ctx) +#define TracyCUDAContextName(ctx, name, size) + +#define TracyCUDAStartProfiling(ctx) +#define TracyCUDAStopProfiling(ctx) + +#define TracyCUDACollect(ctx) + +#else +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef _MSC_VER +#include +#endif + +#include + +#ifndef UNREFERENCED +#define UNREFERENCED(x) (void)x +#endif//UNREFERENCED + +#ifndef TRACY_CUDA_CALIBRATED_CONTEXT +#define TRACY_CUDA_CALIBRATED_CONTEXT (1) +#endif//TRACY_CUDA_CALIBRATED_CONTEXT + +#ifndef TRACY_CUDA_ENABLE_COLLECTOR_THREAD +#define TRACY_CUDA_ENABLE_COLLECTOR_THREAD (1) +#endif//TRACY_CUDA_ENABLE_COLLECTOR_THREAD + +#ifndef TRACY_CUDA_ENABLE_CUDA_CALL_STATS +#define TRACY_CUDA_ENABLE_CUDA_CALL_STATS (0) +#endif//TRACY_CUDA_ENABLE_CUDA_CALL_STATS + +namespace { + +// TODO(marcos): wrap these in structs for better type safety +using CUptiTimestamp = uint64_t; +using TracyTimestamp = int64_t; + +struct IncrementalRegression { + using float_t = double; + struct Parameters { + float_t slope, intercept; + }; + + int n = 0; + float_t x_mean = 0; + float_t y_mean = 0; + float_t x_svar = 0; + float_t y_svar = 0; + float_t xy_scov = 0; + + auto parameters() const { + float_t slope = xy_scov / x_svar; + float_t intercept = y_mean - slope * x_mean; + return Parameters{ slope, intercept }; + } + + auto orthogonal() const { + // NOTE(marcos): orthogonal regression is Deming regression with delta = 1 + float_t delta = float_t(1); // delta = 1 -> orthogonal regression + float_t k = y_svar - delta * x_svar; + float_t slope = (k + sqrt(k * k + 4 * delta * xy_scov * xy_scov)) / (2 * xy_scov); + float_t intercept = y_mean - slope * x_mean; + return Parameters{ slope, intercept }; + } + + void addSample(float_t x, float_t y) { + ++n; + float_t x_mean_prev = x_mean; + float_t y_mean_prev = y_mean; + x_mean += (x - x_mean) / n; + y_mean += (y - y_mean) / n; + x_svar += (x - x_mean_prev) * (x - x_mean); + y_svar += (y - y_mean_prev) * (y - y_mean); + xy_scov += (x - x_mean_prev) * (y - y_mean); + } +}; + +tracy_force_inline TracyTimestamp tracyGetTimestamp() { + return tracy::Profiler::GetTime(); +} + +auto& getCachedRegressionParameters() { + // WARN(marcos): in theory, these linear regression parameters would be loaded/stored atomically; + // in practice, however, it should not matter so long as the loads/stores are not "sliced" + static IncrementalRegression::Parameters cached; + return cached; +} + +TracyTimestamp tracyFromCUpti(CUptiTimestamp cuptiTime) { + // NOTE(marcos): linear regression estimate + // y_hat = slope * x + intercept | X: CUptiTimestamp, Y: TracyTimestamp + auto [slope, intercept] = getCachedRegressionParameters(); + double y_hat = slope * cuptiTime + intercept; + TracyTimestamp tracyTime = TracyTimestamp(y_hat); + assert(tracyTime >= 0); + return tracyTime; +} + +template +tracy_force_inline void tracyMemWrite(T& where,U what) { + static_assert(std::is_same_v, "tracy::MemWrite: type mismatch."); + tracy::MemWrite(&where, what); +} + +void* tracyMalloc(size_t bytes) { + return tracy::tracy_malloc(bytes); +} + +void tracyFree(void* ptr) { + tracy::tracy_free(ptr); +} + +void tracyZoneBegin(TracyTimestamp time, tracy::SourceLocationData* srcLoc) { + using namespace tracy; + TracyQueuePrepare(QueueType::ZoneBegin); + tracyMemWrite(item->zoneBegin.time, time); + tracyMemWrite(item->zoneBegin.srcloc, (uint64_t)srcLoc); + TracyQueueCommit(zoneBeginThread); +} + +void tracyZoneEnd(TracyTimestamp time) { + using namespace tracy; + TracyQueuePrepare(QueueType::ZoneEnd); + tracyMemWrite(item->zoneEnd.time, time); + TracyQueueCommit(zoneEndThread); +} + +void tracyPlot(const char* name, float value, TracyTimestamp time) { + using namespace tracy; + TracyLfqPrepare(QueueType::PlotDataFloat); + tracyMemWrite(item->plotDataFloat.name, (uint64_t)name); + tracyMemWrite(item->plotDataFloat.time, time); + tracyMemWrite(item->plotDataFloat.val, value); + TracyLfqCommit; +} + +void tracyPlot(const char* name, float value, CUptiTimestamp time) { + tracyPlot(name, value, tracyFromCUpti(time)); +} + +void tracyPlotActivity(const char* name, TracyTimestamp start, TracyTimestamp end, float value = 1.0f, float baseline = 0.0f) { + tracyPlot(name, baseline, start); + tracyPlot(name, value, start + 3); + tracyPlot(name, value, end - 3); + tracyPlot(name, baseline, end); +} + +void tracyPlotActivity(const char* name, CUptiTimestamp start, CUptiTimestamp end, float value = 1.0f, float baseline = 0.0f) { + tracyPlotActivity(name, tracyFromCUpti(start), tracyFromCUpti(end), value, baseline); +} + +void tracyPlotBlip(const char* name, TracyTimestamp time, float value = 1.0f, float baseline = 0.0f) { + tracyPlot(name, baseline, time - 3); + tracyPlot(name, value, time); + tracyPlot(name, baseline, time + 3); +} + +void tracyPlotBlip(const char* name, CUptiTimestamp time, float value = 1.0f, float baseline = 0.0f) { + tracyPlotBlip(name, tracyFromCUpti(time), value, baseline); +} + +void tracyEmitMemAlloc(const char* name, const void* ptr, size_t size, TracyTimestamp time) { + using namespace tracy; + const auto thread = GetThreadHandle(); + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::MemNamePayload); + tracyMemWrite(item->memName.name, (uint64_t)name); + Profiler::QueueSerialFinish(); + + item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::MemAllocNamed); + tracyMemWrite(item->memAlloc.time, time); + tracyMemWrite(item->memAlloc.thread, thread); + tracyMemWrite(item->memAlloc.ptr, (uint64_t)ptr); + + if (compile_time_condition::value) + { + memcpy(&item->memAlloc.size, &size, 4); + memset(&item->memAlloc.size + 4, 0, 2); + } + else + { + assert(sizeof(size) == 8); + memcpy(&item->memAlloc.size, &size, 4); + memcpy(((char *)&item->memAlloc.size) + 4, ((char *)&size) + 4, 2); + } + Profiler::QueueSerialFinish(); +} + +void tracyEmitMemFree(const char* name, const void* ptr, TracyTimestamp time) { + using namespace tracy; + const auto thread = GetThreadHandle(); + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::MemNamePayload); + tracyMemWrite(item->memName.name, (uint64_t)name); + Profiler::QueueSerialFinish(); + + item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::MemFreeNamed); + tracyMemWrite(item->memFree.time, time); + tracyMemWrite(item->memFree.thread, thread); + tracyMemWrite(item->memFree.ptr, (uint64_t)ptr); + Profiler::QueueSerialFinish(); +} + +void tracyEmitMemAlloc(const char* name, const void* ptr, size_t size, CUptiTimestamp cuptiTime) { + tracyEmitMemAlloc(name, ptr, size, tracyFromCUpti(cuptiTime)); +} + +void tracyEmitMemFree(const char* name, const void* ptr, CUptiTimestamp cuptiTime) { + tracyEmitMemFree(name, ptr, tracyFromCUpti(cuptiTime)); +} + +void tracyAnnounceGpuTimestamp(TracyTimestamp apiStart, TracyTimestamp apiEnd, + uint16_t queryId, uint8_t gpuContextId, + const tracy::SourceLocationData* sourceLocation, uint32_t threadId) { + using namespace tracy; + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuZoneBeginSerial); + tracyMemWrite(item->gpuZoneBegin.cpuTime, apiStart); + tracyMemWrite(item->gpuZoneBegin.srcloc, (uint64_t)sourceLocation); + tracyMemWrite(item->gpuZoneBegin.thread, threadId); + tracyMemWrite(item->gpuZoneBegin.queryId, uint16_t(queryId+0)); + tracyMemWrite(item->gpuZoneBegin.context, gpuContextId); + Profiler::QueueSerialFinish(); + + item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuZoneEndSerial); + tracyMemWrite(item->gpuZoneEnd.cpuTime, apiEnd); + tracyMemWrite(item->gpuZoneEnd.thread, threadId); + tracyMemWrite(item->gpuZoneEnd.queryId, uint16_t(queryId+1)); + tracyMemWrite(item->gpuZoneEnd.context, gpuContextId); + Profiler::QueueSerialFinish(); +} + +void tracySubmitGpuTimestamp(CUptiTimestamp gpuStart, CUptiTimestamp gpuEnd, + uint16_t queryId, uint8_t gpuContextId) { + using namespace tracy; + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuTime); + tracyMemWrite(item->gpuTime.gpuTime, (int64_t)gpuStart); + tracyMemWrite(item->gpuTime.queryId, uint16_t(queryId+0)); + tracyMemWrite(item->gpuTime.context, gpuContextId); + Profiler::QueueSerialFinish(); + + item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuTime); + tracyMemWrite(item->gpuTime.gpuTime, (int64_t)gpuEnd); + tracyMemWrite(item->gpuTime.queryId, uint16_t(queryId+1)); + tracyMemWrite(item->gpuTime.context, gpuContextId); + Profiler::QueueSerialFinish(); +} + +#define CUPTI_API_CALL(call) CUptiCallChecked(call, #call, __FILE__, __LINE__) + +#define DRIVER_API_CALL(call) cudaDriverCallChecked(call, #call, __FILE__, __LINE__) + +CUptiResult CUptiCallChecked(CUptiResult result, const char* call, const char* file, int line) noexcept { + if (result == CUPTI_SUCCESS) + return result; + const char* resultMsg = ""; + CUPTI_API_CALL(cuptiGetResultString(result, &resultMsg)); // maybe not a good idea to recurse here... + fprintf(stderr, "ERROR:\t%s:%d:\n\tfunction '%s' failed with error '%s'.\n", file, line, call, resultMsg); + //assert(result == CUPTI_SUCCESS); + return result; +} + +CUresult cudaDriverCallChecked(CUresult result, const char* call, const char* file, int line) noexcept { + if (result == CUDA_SUCCESS) + return result; + const char* resultMsg = ""; + DRIVER_API_CALL(cuGetErrorString(result, &resultMsg)); // maybe not a good idea to recurse here... + fprintf(stderr, "ERROR:\t%s:%d:\n\tfunction '%s' failed with error '%s'.\n", file, line, call, resultMsg); + //assert(result == CUDA_SUCCESS); + return result; +} + +template +struct ConcurrentHashMap { + static constexpr bool instrument = false; + auto acquire_read_lock() { + if (m.try_lock_shared()) + return std::shared_lock(m, std::adopt_lock); + ZoneNamedC(rwlock, tracy::Color::Tomato, instrument); + return std::shared_lock(m); + } + auto acquire_write_lock() { + if (m.try_lock()) + return std::unique_lock(m, std::adopt_lock); + ZoneNamedC(wxlock, tracy::Color::Tomato, instrument); + return std::unique_lock(m); + } + std::unordered_map mapping; + std::shared_mutex m; + auto& operator[](TKey key) { + { + auto lock = acquire_read_lock(); + auto it = mapping.find(key); + if (it != mapping.end()) { + return it->second; + } + } + return emplace(key, TValue{}).first->second; + } + auto find(TKey key) { + ZoneNamed(find, instrument); + auto lock = acquire_read_lock(); + return mapping.find(key); + } + auto fetch(TKey key, TValue& value) { + ZoneNamed(fetch, instrument); + auto it = mapping.find(key); + if (it != mapping.end()) { + value = it->second; + return true; + } + return false; + } + auto end() { + ZoneNamed(end, instrument); + auto lock = acquire_read_lock(); + return mapping.end(); + } + template + auto emplace(TKey key, Args&&... args) { + ZoneNamed(emplace, instrument); + auto lock = acquire_write_lock(); + return mapping.emplace(std::forward(key), std::forward(args)...); + } + auto erase(TKey key) { + ZoneNamed(erase, instrument); + auto lock = acquire_write_lock(); + return mapping.erase(key); + } +}; + +#if TRACY_CUDA_ENABLE_CUDA_CALL_STATS +struct ProfilerStats { + static constexpr bool instrument = false; + + ConcurrentHashMap> apiCallCount; + + void update(CUpti_CallbackDomain domain, CUpti_CallbackId cbid) { + ZoneNamed(update, instrument); + uint32_t key = (domain << 24) | (cbid & 0x00'FFFFFF); + auto it = apiCallCount.find(key); + if (it == apiCallCount.end()) { + it = apiCallCount.emplace(key, 0).first; + } + it->second.fetch_add(1, std::memory_order::memory_order_relaxed); + } +}; +#endif + +// StringTable: string memoization/interning +struct StringTable { + static constexpr bool instrument = false; + + // TODO(marcos): this could be just a "ConcurrentHashSet" + ConcurrentHashMap table; + + ~StringTable() { /* TODO(marcos): free string copy */ } + + std::string_view operator[](std::string_view str) { + ZoneNamedN(lookup, "StringTable::lookup", instrument); + std::string_view memoized; + if (!table.fetch(str, memoized)) { + ZoneNamedN(lookup, "StringTable::insert", instrument); + char* copy = (char*)tracyMalloc(str.size() + 1); + strncpy(copy, str.data(), str.size()); + copy[str.size()] = '\0'; + std::string_view value (copy, str.size()); + auto [it, inserted] = table.emplace(value, value); + if (!inserted) { + // another thread inserted it while we were trying to: cleanup + tracyFree(copy); + } + memoized = it->second; + } + assert(str == memoized); + return memoized; + } +}; + +struct SourceLocationMap { + static constexpr bool instrument = false; + + // NOTE(marcos): the address of an unordered_map value may become invalid + // later on (e.g., during a rehash), so mapping to a pointer is necessary + ConcurrentHashMap locations; + + ~SourceLocationMap() { /* TODO(marcos): free SourceLocationData* entries */ } + + tracy::SourceLocationData* retrieve(std::string_view function) { + ZoneNamed(retrieve, instrument); + tracy::SourceLocationData* pSrcLoc = nullptr; + locations.fetch(function, pSrcLoc); + return pSrcLoc; + } + + tracy::SourceLocationData* add(std::string_view function, std::string_view file, int line, uint32_t color=0) { + ZoneNamed(emplace, instrument); + assert(*function.end() == '\0'); + assert(*file.end() == '\0'); + void* bytes = tracyMalloc(sizeof(tracy::SourceLocationData)); + auto pSrcLoc = new(bytes)tracy::SourceLocationData{ function.data(), TracyFunction, file.data(), (uint32_t)line, color }; + auto [it, inserted] = locations.emplace(function, pSrcLoc); + if (!inserted) { + // another thread inserted it while we were trying to: cleanup + tracyFree(pSrcLoc); // POD: no destructor to call + } + assert(it->second != nullptr); + return it->second; + } +}; + +struct SourceLocationLUT { + static constexpr bool instrument = false; + + ~SourceLocationLUT() { /* no action needed: no dynamic allocation */ } + + tracy::SourceLocationData runtime [CUpti_runtime_api_trace_cbid::CUPTI_RUNTIME_TRACE_CBID_SIZE] = {}; + tracy::SourceLocationData driver [CUpti_driver_api_trace_cbid::CUPTI_DRIVER_TRACE_CBID_SIZE] = {}; + + tracy::SourceLocationData* retrieve(CUpti_CallbackDomain domain, CUpti_CallbackId cbid, CUpti_CallbackData* apiInfo) { + ZoneNamed(retrieve, instrument); + tracy::SourceLocationData* pSrcLoc = nullptr; + switch (domain) { + case CUPTI_CB_DOMAIN_RUNTIME_API : + if ((cbid > 0) && (cbid < CUPTI_RUNTIME_TRACE_CBID_SIZE)) { + pSrcLoc = &runtime[cbid]; + } + break; + case CUPTI_CB_DOMAIN_DRIVER_API : + if ((cbid > 0) && (cbid < CUPTI_DRIVER_TRACE_CBID_SIZE)) { + pSrcLoc = &driver[cbid]; + } + break; + default: + break; + } + if (pSrcLoc->name == nullptr) { + const char* function = apiInfo->functionName ? apiInfo->functionName : "cuda???"; + // cuptiGetCallbackName includes the "version suffix" of the function/cbid + //CUPTI_API_CALL(cuptiGetCallbackName(domain, cbid, &function)); + *pSrcLoc = tracy::SourceLocationData{ function, TracyFunction, TracyFile, TracyLine, 0 }; + } + return pSrcLoc; + } +}; + +uint32_t tracyTimelineId(uint32_t contextId, uint32_t streamId) { + // 0xA7C5 = 42,949 => 42,949 * 100,000 = 4,294,900,000 + // 4,294,900,000 + 65,535 = 4,294,965,535 < 4,294,967,295 (max uint32) + assert(contextId <= 0xA7C5); + assert((streamId == CUPTI_INVALID_STREAM_ID) || (streamId < 0xFFFF)); + uint32_t packed = (contextId * 100'000) + (streamId & 0x0000'FFFF); + return packed; +} + +} // unnamed/anonymous namespace + +namespace tracy +{ + class CUDACtx + { + public: + static CUDACtx* Create() { + auto& s = Singleton::Get(); + std::unique_lock lock (s.m); + if (s.ref_count == 0) { + assert(s.ctx == nullptr); + s.ctx = new CUDACtx(s.ctx_id); + s.ref_count += 1; + s.ctx_id = s.ctx->m_tracyGpuContext; + } + return s.ctx; + } + + static void Destroy(CUDACtx* ctx) { + auto& s = Singleton::Get(); + std::unique_lock lock(s.m); + assert(ctx == s.ctx); + s.ref_count -= 1; + if (s.ref_count == 0) { + delete s.ctx; + s.ctx = nullptr; + } + } + + void Collect() + { + ZoneScoped; + CUPTI::FlushActivity(); + } + + void printStats() + { + #if TRACY_CUDA_ENABLE_CUDA_CALL_STATS + fprintf(stdout, "\nCUDA API stats:\n"); + { + struct Stats { CUpti_CallbackDomain domain; CUpti_CallbackId cbid; int count; }; + std::vector sorted; + for (auto&& api : stats.apiCallCount.mapping) { + auto domain = CUpti_CallbackDomain(api.first >> 24); + auto cbid = CUpti_CallbackId(api.first & 0x00'FFFFFF); + int count = api.second; + sorted.emplace_back(Stats{ domain, cbid, count }); + } + std::sort(sorted.begin(), sorted.end(), [](const Stats& x, const Stats& y) { return x.count > y.count; }); + for (auto&& api : sorted) { + const char* function = ""; + CUPTI_API_CALL(cuptiGetCallbackName(api.domain, api.cbid, &function)); + printf("- %s : %d\n", function, api.count); + } + } + #endif + } + + void StartProfiling() + { + ZoneScoped; + CUPTI::BeginInstrumentation(this); + } + + void StopProfiling() + { + ZoneScoped; + CUPTI::EndInstrumentation(); + printStats(); + } + + void Name(const char *name, uint16_t len) + { + auto ptr = (char*)tracyMalloc(len); + memcpy(ptr, name, len); + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuContextName); + tracyMemWrite(item->gpuContextNameFat.context, m_tracyGpuContext); + tracyMemWrite(item->gpuContextNameFat.ptr, (uint64_t)ptr); + tracyMemWrite(item->gpuContextNameFat.size, len); + SubmitQueueItem(item); + } + + tracy_force_inline void SubmitQueueItem(tracy::QueueItem *item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + static void QueryTimestamps(TracyTimestamp& tTracy, CUptiTimestamp& tCUpti) { + TracyTimestamp tTracy1 = tracyGetTimestamp(); + CUPTI_API_CALL(cuptiGetTimestamp(&tCUpti)); + TracyTimestamp tTracy2 = tracyGetTimestamp(); + // NOTE(marcos): giving more weight to 'tTracy2' + tTracy = (3*tTracy1 + 5*tTracy2) / 8; + } + + // NOTE(marcos): recalibration is 'static' since Tracy and CUPTI timestamps + // are "global" across all contexts; that said, each Tracy GPU context needs + // its own GpuCalibration message, but for now there's just a singleton context. + void Recalibrate() { + ZoneScoped; + // NOTE(marcos): only one thread should do the calibration, but there's + // no good reason to block threads that also trying to do the same + static std::mutex m; + if (!m.try_lock()) + return; + std::unique_lock lock (m, std::adopt_lock); + ZoneNamedNC(zone, "tracy::CUDACtx::Recalibrate[effective]", tracy::Color::Goldenrod, true); + TracyTimestamp tTracy; + CUptiTimestamp tCUpti; + QueryTimestamps(tTracy, tCUpti); + #if TRACY_CUDA_CALIBRATED_CONTEXT + static CUptiTimestamp prevCUptiTime = tCUpti; + int64_t deltaTicksCUpti = tCUpti - prevCUptiTime; + if (deltaTicksCUpti > 0) { + prevCUptiTime = tCUpti; + auto* item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuCalibration); + tracyMemWrite(item->gpuCalibration.gpuTime, (int64_t)tCUpti); + tracyMemWrite(item->gpuCalibration.cpuTime, tTracy); + tracyMemWrite(item->gpuCalibration.cpuDelta, deltaTicksCUpti); + tracyMemWrite(item->gpuCalibration.context, m_tracyGpuContext); + Profiler::QueueSerialFinish(); + } + #endif + // NOTE(marcos): update linear regression incrementally, which will refine + // the estimation of Tracy timestamps (Y) from CUpti timestamps (X) + static IncrementalRegression model; + model.addSample(double(tCUpti), double(tTracy)); + // NOTE(marcos): using orthogonal regression because the independet variable + // (X: CUpti timestamps) measurements are also imprecise + getCachedRegressionParameters() = model.orthogonal(); + } + + protected: + void EmitGpuZone(TracyTimestamp apiStart, TracyTimestamp apiEnd, + CUptiTimestamp gpuStart, CUptiTimestamp gpuEnd, + const tracy::SourceLocationData* pSrcLoc, + uint32_t cudaContextId, uint32_t cudaStreamId) { + //uint32_t timelineId = tracy::GetThreadHandle(); + uint32_t timelineId = tracyTimelineId(cudaContextId, cudaStreamId); + uint16_t queryId = m_queryIdGen.fetch_add(2); + tracyAnnounceGpuTimestamp(apiStart, apiEnd, queryId, m_tracyGpuContext, pSrcLoc, timelineId); + tracySubmitGpuTimestamp(gpuStart, gpuEnd, queryId, m_tracyGpuContext); + } + + void OnEventsProcessed() { + Recalibrate(); + } + + struct CUPTI { + static void CUPTIAPI OnBufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) + { + ZoneScoped; + // TODO(marcos): avoid malloc and instead suballocate from a large circular buffer; + // according to the CUPTI documentation: "To minimize profiling overhead the client + // should return as quickly as possible from these callbacks." + *size = 1 * 1024*1024; // 1MB + *buffer = (uint8_t*)tracyMalloc(*size); + assert(*buffer != nullptr); + FlushActivityAsync(); + } + + static void CUPTIAPI OnBufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t* buffer, size_t size, size_t validSize) + { + // CUDA 6.0 onwards: all buffers from this callback are "global" buffers + // (i.e. there is no context/stream specific buffer; ctx is always NULL) + ZoneScoped; + tracy::SetThreadName("NVIDIA CUPTI Worker"); + CUptiResult status; + CUpti_Activity* record = nullptr; + while ((status = cuptiActivityGetNextRecord(buffer, validSize, &record)) == CUPTI_SUCCESS) { + DoProcessDeviceEvent(record); + } + if (status != CUPTI_ERROR_MAX_LIMIT_REACHED) { + CUptiCallChecked(status, "cuptiActivityGetNextRecord", TracyFile, TracyLine); + } + size_t dropped = 0; + CUPTI_API_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + assert(dropped == 0); + tracyFree(buffer); + PersistentState::Get().profilerHost->OnEventsProcessed(); + } + + // correlationID -> [CPU start time, CPU end time, CUPTI start time] + using CorrelationID = uint32_t; + struct APICallInfo { TracyTimestamp start = 0, end = 0; CUptiTimestamp cupti = CUPTI_TIMESTAMP_UNKNOWN; CUDACtx* host = nullptr; }; + + static void CUPTIAPI OnCallbackAPI( + void* userdata, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid, + const void* cbdata) + { + static constexpr bool instrument = false; + + TracyTimestamp apiCallStartTime = tracyGetTimestamp(); + CUDACtx* profilerHost = (CUDACtx*)userdata; + + switch (domain) { + case CUPTI_CB_DOMAIN_RUNTIME_API: + case CUPTI_CB_DOMAIN_DRIVER_API: + break; + case CUPTI_CB_DOMAIN_RESOURCE: { + // match 'callbackId' with CUpti_CallbackIdResource + // interpret 'cbdata' as CUpti_ResourceData, + // or as CUpti_ModuleResourceData, + // or as CUpti_GraphData, + // or as CUpti_StreamAttrData, + // or as ... (what else?) + return; + } + case CUPTI_CB_DOMAIN_SYNCHRONIZE: { + // match 'callbackId' with CUpti_CallbackIdSync + // interpret 'cbdata' as CUpti_SynchronizeData + return; + } + case CUPTI_CB_DOMAIN_STATE: { + // match 'callbackId' with CUpti_CallbackIdState + // interpret 'cbdata' as CUpti_StateData + return; + } + case CUPTI_CB_DOMAIN_NVTX: { + // match 'callbackId' with CUpti_nvtx_api_trace_cbid + // interpret 'cbdata' as CUpti_NvtxData + return; + } + case CUPTI_CB_DOMAIN_FORCE_INT: + // NOTE(marcos): the "FORCE_INT" values in CUPTI enums exist only to + // force the enum to have a specific representation (signed 32bits) + case CUPTI_CB_DOMAIN_INVALID: + default: + // TODO(marcos): unexpected error! + return; + } + + // if we reached this point, then we are in the (runtime or driver) API domain + CUpti_CallbackData* apiInfo = (CUpti_CallbackData*)cbdata; + + // Emit the Tracy 'ZoneBegin' message upon entering the API call + // TODO(marcos): a RAII object could be useful here... + if (apiInfo->callbackSite == CUPTI_API_ENTER) { + #if TRACY_CUDA_ENABLE_CUDA_CALL_STATS + ctx->stats.update(domain, cbid); + #endif + + auto& cudaCallSourceLocation = PersistentState::Get().cudaCallSourceLocation; + auto pSrcLoc = cudaCallSourceLocation.retrieve(domain, cbid, apiInfo); + + // HACK(marcos): the SourceLocationLUT::retrieve zone (above) should + // not be emitted before its enclosing zone (below) actually begins, + // so we delay the beginning of the enclosing zone to "unstack" them + if (SourceLocationLUT::instrument) + apiCallStartTime = tracyGetTimestamp(); + tracyZoneBegin(apiCallStartTime, pSrcLoc); + } + + if (apiInfo->callbackSite == CUPTI_API_ENTER) { + ZoneNamedN(enter, "tracy::CUDACtx::OnCUptiCallback[enter]", instrument); + // Track API calls that generate device activity: + bool trackDeviceActivity = false; + CUstream hStream = nullptr; + if (domain == CUPTI_CB_DOMAIN_RUNTIME_API) { + #define GET_STREAM_FUNC(Params, field) [](CUpti_CallbackData* api) { return ((Params*)api->functionParams)->field; } + #define NON_STREAM_FUNC() [](CUpti_CallbackData*) { return cudaStream_t(nullptr); } + static std::unordered_map cbidRuntimeTrackers = { + // Runtime: Kernel + { CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000, GET_STREAM_FUNC(cudaLaunchKernel_v7000_params, stream) }, + { CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_ptsz_v7000, GET_STREAM_FUNC(cudaLaunchKernel_ptsz_v7000_params, stream) }, + { CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_v11060, GET_STREAM_FUNC(cudaLaunchKernelExC_v11060_params, config->stream) }, + { CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_ptsz_v11060, GET_STREAM_FUNC(cudaLaunchKernelExC_ptsz_v11060_params, config->stream) }, + // Runtime: Memory + { CUPTI_RUNTIME_TRACE_CBID_cudaMalloc_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaFree_v3020, NON_STREAM_FUNC() }, + // Runtime: Memcpy + { CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020, GET_STREAM_FUNC(cudaMemcpyAsync_v3020_params, stream) }, + // Runtime: Memset + { CUPTI_RUNTIME_TRACE_CBID_cudaMemset_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_v3020, GET_STREAM_FUNC(cudaMemsetAsync_v3020_params, stream) }, + // Runtime: Synchronization + { CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaEventQuery_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020, NON_STREAM_FUNC() }, + }; + #undef NON_STREAM_FUNC + #undef GET_STREAM_FUNC + auto it = cbidRuntimeTrackers.find(CUpti_runtime_api_trace_cbid(cbid)); + if (it != cbidRuntimeTrackers.end()) { + trackDeviceActivity = true; + hStream = (CUstream)it->second(apiInfo); + } + } + if (domain == CUPTI_CB_DOMAIN_DRIVER_API) { + #define GET_STREAM_FUNC(Params, field) [](CUpti_CallbackData* api) { return ((Params*)api->functionParams)->field; } + #define NON_STREAM_FUNC() [](CUpti_CallbackData*) { return CUstream(nullptr); } + static std::unordered_map cbidDriverTrackers = { + // Driver: Kernel + { CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel, GET_STREAM_FUNC(cuLaunchKernel_params, hStream) }, + { CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel_ptsz, GET_STREAM_FUNC(cuLaunchKernel_ptsz_params, hStream)} , + { CUPTI_DRIVER_TRACE_CBID_cuLaunchKernelEx, GET_STREAM_FUNC(cuLaunchKernelEx_params, config->hStream) }, + { CUPTI_DRIVER_TRACE_CBID_cuLaunchKernelEx_ptsz, GET_STREAM_FUNC(cuLaunchKernelEx_params, config->hStream) }, + }; + #undef NON_STREAM_FUNC + #undef GET_STREAM_FUNC + auto it = cbidDriverTrackers.find(CUpti_driver_api_trace_cbid(cbid)); + if (it != cbidDriverTrackers.end()) { + trackDeviceActivity = true; + hStream = it->second(apiInfo); + } + } + if (trackDeviceActivity) { + // NOTE(marcos): we should NOT track if the stream is being captured + CUstreamCaptureStatus status = {}; + DRIVER_API_CALL(cuStreamIsCapturing(hStream, &status)); + trackDeviceActivity = !(status == CU_STREAM_CAPTURE_STATUS_ACTIVE); + } + if (trackDeviceActivity) { + CUptiTimestamp tgpu; + // TODO(marcos): do a "reverse-estimate" to obtain CUpti time from Tracy time instead? + CUPTI_API_CALL(cuptiGetTimestamp(&tgpu)); + auto& cudaCallSiteInfo = PersistentState::Get().cudaCallSiteInfo; + cudaCallSiteInfo.emplace(apiInfo->correlationId, APICallInfo{ apiCallStartTime, apiCallStartTime, tgpu, profilerHost }); + } + auto& entryFlags = *apiInfo->correlationData; + assert(entryFlags == 0); + entryFlags |= trackDeviceActivity ? 0x8000 : 0; + } + + if (apiInfo->callbackSite == CUPTI_API_EXIT) { + APICallInfo* pApiInterval = [](CUpti_CallbackData* apiInfo) { + ZoneNamedN(exit, "tracy::CUDACtx::OnCUptiCallback[exit]", instrument); + auto entryFlags = *apiInfo->correlationData; + bool trackDeviceActivity = (entryFlags & 0x8000) != 0; + if (trackDeviceActivity) { + auto& cudaCallSiteInfo = PersistentState::Get().cudaCallSiteInfo; + auto it = cudaCallSiteInfo.find(apiInfo->correlationId); + if (it != cudaCallSiteInfo.end()) { + // WARN(marcos): leaking the address of a hash-map value could spell trouble + return &it->second; + } + } + // NOTE(marcos): this can happen if the GPU activity completes + // before the CUDA function that enqueued it returns (e.g., sync) + static APICallInfo sentinel; + return &sentinel; + }(apiInfo); + pApiInterval->end = tracyGetTimestamp(); + tracyZoneEnd(pApiInterval->end); + } + } + + static bool matchActivityToAPICall(uint32_t correlationId, APICallInfo& apiCallInfo) { + static constexpr bool instrument = false; + ZoneNamed(match, instrument); + auto& cudaCallSiteInfo = PersistentState::Get().cudaCallSiteInfo; + if (!cudaCallSiteInfo.fetch(correlationId, apiCallInfo)) { + return false; + } + cudaCallSiteInfo.erase(correlationId); + assert(apiCallInfo.host != nullptr); + return true; + } + + static void matchError(uint32_t correlationId, const char* kind) { + char msg [128]; + snprintf(msg, sizeof(msg), "ERROR: device activity '%s' has no matching CUDA API call (id=%u).", kind, correlationId); + TracyMessageC(msg, strlen(msg), tracy::Color::Tomato); + } + + static std::string extractActualName(char** name){ + //If name does not start with number, return empty string + if (!isdigit(**name)) + { + return std::string(); + } + // Assuming name starts with number followed by actual name + std::string actualName; + char* currStr = *name; + int num = 0; + while (*currStr >= '0' && *currStr <= '9') + { + num = num * 10 + (*currStr - '0'); + currStr++; + } + + // Return the string start at currStr ends at num + actualName = std::string(currStr, num); + // check if actualName starts with _GLOBAL__N__ + if (actualName.rfind("_GLOBAL__N__", 0) == 0) + { + // _GLOBAL__N__ with an id stands for anonymous namespace + actualName = std::string("(anonymous_namespace)"); + } + + *name = currStr + num; + return actualName; + } + + static std::string extractActualNameNested(const char* demangledName) + { + ZoneNamedN(demangle, "demangle_kernel", false); + //If name does not start with _Z, return a new std::string with original name + if (demangledName[0] != '_' || demangledName[1] != 'Z') + { + return std::string(demangledName); + } + std::string actualName; + char* currStr = (char*)demangledName + 2; + + if (*currStr == 'N') + { + currStr++; + // extract actual name from nested name + std::string nestedName = extractActualName(&currStr); + actualName += nestedName; + while (1) + { + //Loop until nested name is empty + nestedName = extractActualName(&currStr); + if (nestedName.empty()) + { + break; + } + actualName += "::" + nestedName; + } + } else + { + actualName = extractActualName(&currStr); + } + return actualName; + } + + static tracy::SourceLocationData* getKernelSourceLocation(const char* kernelName) + { + auto& kernelSrcLoc = PersistentState::Get().kernelSrcLoc; + std::string_view demangledName; + #ifndef _MSC_VER + // TODO(marcos): extractActualNameNested is the main bottleneck right now; + // we need a specialized StringTable mapping from "peristent" kernel names + // (const char*/uintptr_t) to memoized, lazily initialized demangled names + auto& demangledNameTable = PersistentState::Get().demangledNameTable; + std::string demangled = extractActualNameNested(kernelName); + demangledName = demangledNameTable[demangled]; + #else + demangledName = kernelName; + #endif + auto pSrcLoc = kernelSrcLoc.retrieve(demangledName); + if (pSrcLoc == nullptr) { + pSrcLoc = kernelSrcLoc.add(demangledName, TracyFile, TracyLine); + } + return pSrcLoc; + } + + static void DoProcessDeviceEvent(CUpti_Activity *record) + { + static constexpr bool instrument = false; + ZoneNamed(activity, instrument); + + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[kernel]", instrument); + CUpti_ActivityKernel9* kernel9 = (CUpti_ActivityKernel9*) record; + APICallInfo apiCall; + if (!matchActivityToAPICall(kernel9->correlationId, apiCall)) { + return matchError(kernel9->correlationId, "KERNEL"); + } + apiCall.host->EmitGpuZone(apiCall.start, apiCall.end, kernel9->start, kernel9->end, getKernelSourceLocation(kernel9->name), kernel9->contextId, kernel9->streamId); + auto latency_ms = (kernel9->start - apiCall.cupti) / 1'000'000.0; + tracyPlotBlip("Kernel Latency (ms)", kernel9->start, latency_ms); + break; + } + + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[memcpy]", instrument); + CUpti_ActivityMemcpy5* memcpy5 = (CUpti_ActivityMemcpy5*) record; + APICallInfo apiCall; + if (!matchActivityToAPICall(memcpy5->correlationId, apiCall)) { + return matchError(memcpy5->correlationId, "MEMCPY"); + } + static constexpr tracy::SourceLocationData TracyCUPTISrcLocDeviceMemcpy { "CUDA::memcpy", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Blue }; + apiCall.host->EmitGpuZone(apiCall.start, apiCall.end, memcpy5->start, memcpy5->end, &TracyCUPTISrcLocDeviceMemcpy, memcpy5->contextId, memcpy5->streamId); + static constexpr const char* graph_name = "CUDA Memory Copy"; + tracyEmitMemAlloc(graph_name, (void*)(uintptr_t)memcpy5->correlationId, memcpy5->bytes, memcpy5->start); + tracyEmitMemFree (graph_name, (void*)(uintptr_t)memcpy5->correlationId, memcpy5->end); + break; + } + + case CUPTI_ACTIVITY_KIND_MEMSET: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[memset]", instrument); + CUpti_ActivityMemset4* memset4 = (CUpti_ActivityMemset4*) record; + APICallInfo apiCall; + if (!matchActivityToAPICall(memset4->correlationId, apiCall)) { + return matchError(memset4->correlationId, "MEMSET"); + } + static constexpr tracy::SourceLocationData TracyCUPTISrcLocDeviceMemset { "CUDA::memset", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Blue }; + apiCall.host->EmitGpuZone(apiCall.start, apiCall.end, memset4->start, memset4->end, &TracyCUPTISrcLocDeviceMemset, memset4->contextId, memset4->streamId); + static constexpr const char* graph_name = "CUDA Memory Set"; + tracyEmitMemAlloc(graph_name, (void*)(uintptr_t)memset4->correlationId, memset4->bytes, memset4->start); + tracyEmitMemFree (graph_name, (void*)(uintptr_t)memset4->correlationId, memset4->end); + break; + } + + case CUPTI_ACTIVITY_KIND_SYNCHRONIZATION: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[sync]", instrument); + CUpti_ActivitySynchronization* synchronization = (CUpti_ActivitySynchronization*) record; + APICallInfo apiCall; + if (!matchActivityToAPICall(synchronization->correlationId, apiCall)) { + return matchError(synchronization->correlationId, "SYNCHRONIZATION"); + } + // NOTE(marcos): synchronization can happen at different levels/objects: + // a. on the entire context : cuCtxSynchronize() -> timeline(ctx,0) + // b. on a specific stream : cuStreamSynchronize() -> timeline(ctx,stream) + // c. on a specific event : cuEventSynchronize() -> timeline(ctx,0xffff) + static constexpr tracy::SourceLocationData TracyCUPTISrcLocContextSynchronization { "CUDA::Context::sync", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Magenta }; + auto* pSrcLoc = &TracyCUPTISrcLocContextSynchronization; + uint32_t cudaContextId = synchronization->contextId; + uint32_t cudaStreamId = 0; + if (synchronization->streamId != CUPTI_SYNCHRONIZATION_INVALID_VALUE) { + static constexpr tracy::SourceLocationData TracyCUPTISrcLocStreamSynchronization{ "CUDA::Stream::sync", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Magenta3 }; + pSrcLoc = &TracyCUPTISrcLocStreamSynchronization; + cudaStreamId = synchronization->streamId; + } + if (synchronization->cudaEventId != CUPTI_SYNCHRONIZATION_INVALID_VALUE) { + static constexpr tracy::SourceLocationData TracyCUPTISrcLocEventSynchronization{ "CUDA::Event::sync", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Magenta4 }; + pSrcLoc = &TracyCUPTISrcLocEventSynchronization; + cudaStreamId = 0xFFFFFFFF; + // TODO(marcos): CUpti_ActivitySynchronization2 introduces a new + // field 'cudaEventSyncId' which complements 'cudaEventId' + } + apiCall.host->EmitGpuZone(apiCall.start, apiCall.end, synchronization->start, synchronization->end, pSrcLoc, cudaContextId, cudaStreamId); + static constexpr const char* graph_name = "CUDA Synchronization"; + tracyEmitMemAlloc(graph_name, (void*)(uintptr_t)synchronization->correlationId, 1, synchronization->start); + tracyEmitMemFree (graph_name, (void*)(uintptr_t)synchronization->correlationId, synchronization->end); + break; + } + case CUPTI_ACTIVITY_KIND_MEMORY2: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[malloc/free]", instrument); + CUpti_ActivityMemory3* memory3 = (CUpti_ActivityMemory3*)record; + APICallInfo apiCall; + if (!matchActivityToAPICall(memory3->correlationId, apiCall)) { + return matchError(memory3->correlationId, "MEMORY"); + } + static constexpr const char* graph_name = "CUDA Memory Allocation"; + if (memory3->memoryOperationType == CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_ALLOCATION){ + auto& memAllocAddress = PersistentState::Get().memAllocAddress; + memAllocAddress[memory3->address] = 1; + tracyEmitMemAlloc(graph_name, (void*)memory3->address, memory3->bytes, memory3->timestamp); + } + else if (memory3->memoryOperationType == CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_RELEASE){ + auto& memAllocAddress = PersistentState::Get().memAllocAddress; + int dontCare; + if (!memAllocAddress.fetch(memory3->address, dontCare)){ + // Note(Frank): This is a hack to handle the case where the memory allocation + // corresponds to the memory release is not found. + // This can happen when the memory is allocated when profiling is not enabled. + matchError(memory3->correlationId, "MEMORY/RELEASE"); + tracyEmitMemAlloc(graph_name, (void*)memory3->address, memory3->bytes, memory3->timestamp); + } else { + memAllocAddress.erase(memory3->address); + } + tracyEmitMemFree(graph_name, (void*)memory3->address, memory3->timestamp); + } + break; + } + case CUPTI_ACTIVITY_KIND_CUDA_EVENT : + { + // NOTE(marcos): a byproduct of CUPTI_ACTIVITY_KIND_SYNCHRONIZATION + // (I think this is related to cudaEvent*() API calls) + CUpti_ActivityCudaEvent2* event = (CUpti_ActivityCudaEvent2*)record; + UNREFERENCED(event); + break; + } + default: + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "Unknown activity record (kind is %d)", record->kind); + TracyMessageC(buffer, strlen(buffer), tracy::Color::Crimson); + break; + } + } + } + + static constexpr CUpti_CallbackDomain domains[] = { + CUPTI_CB_DOMAIN_RUNTIME_API, + CUPTI_CB_DOMAIN_DRIVER_API, + //CUPTI_CB_DOMAIN_RESOURCE, + //CUPTI_CB_DOMAIN_SYNCHRONIZE, + //CUPTI_CB_DOMAIN_NVTX, + //CUPTI_CB_DOMAIN_STATE + }; + + static constexpr CUpti_ActivityKind activities[] = { + //CUPTI_ACTIVITY_KIND_KERNEL, // mutually exclusive with CONCURRENT_KERNEL + CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL, + CUPTI_ACTIVITY_KIND_MEMCPY, + CUPTI_ACTIVITY_KIND_MEMSET, + CUPTI_ACTIVITY_KIND_SYNCHRONIZATION, + CUPTI_ACTIVITY_KIND_MEMORY2, + //CUPTI_ACTIVITY_KIND_MEMCPY2, + //CUPTI_ACTIVITY_KIND_OVERHEAD, + //CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API, + //CUPTI_ACTIVITY_KIND_RUNTIME, + //CUPTI_ACTIVITY_KIND_DRIVER, + }; + + static void BeginInstrumentation(CUDACtx* profilerHost) { + auto& currentProfilerHost = PersistentState::Get().profilerHost; + if (currentProfilerHost != nullptr) { + return; + } + currentProfilerHost = profilerHost; + + // NOTE(frank): full-stop synchronization to ensure we only handle + // CUDA API calls and device activities that happens past this point + cudaDeviceSynchronize(); + + auto& subscriber = PersistentState::Get().subscriber; + CUPTI_API_CALL(cuptiSubscribe(&subscriber, CUPTI::OnCallbackAPI, profilerHost)); + CUPTI_API_CALL(cuptiActivityRegisterCallbacks(CUPTI::OnBufferRequested, CUPTI::OnBufferCompleted)); + for (auto domain : domains) { + CUPTI_API_CALL(cuptiEnableDomain(uint32_t(true), subscriber, domain)); + } + for (auto activity : activities) { + CUPTI_API_CALL(cuptiActivityEnable(activity)); + } + + #if TRACY_CUDA_ENABLE_COLLECTOR_THREAD + auto& collector = PersistentState::Get().collector; + collector.period = 160; + collector.signal.notify_one(); + #endif + } + + static void EndInstrumentation() { + auto& currentProfilerHost = PersistentState::Get().profilerHost; + if (currentProfilerHost == nullptr) { + return; + } + + // NOTE(frank): full-stop synchronization to ensure we catch + // and drain all the activities that has been tracked up to now. + cudaDeviceSynchronize(); + + FlushActivity(); + + auto& subscriber = PersistentState::Get().subscriber; + for (auto activity : activities) { + CUPTI_API_CALL(cuptiActivityDisable(activity)); + } + for (auto domain : domains) { + CUPTI_API_CALL(cuptiEnableDomain(uint32_t(false), subscriber, domain)); + } + // TODO(marcos): is here a counterpart for 'cuptiActivityRegisterCallbacks()'? + CUPTI_API_CALL(cuptiUnsubscribe(subscriber)); + + #if TRACY_CUDA_ENABLE_COLLECTOR_THREAD + auto& collector = PersistentState::Get().collector; + collector.period = ~uint32_t(0); + collector.signal.notify_one(); + #endif + + currentProfilerHost = nullptr; + } + + static void FlushActivity() + { + // NOTE(marcos): only one thread should do the collection at any given time, + // but there's no reason to block threads that are also trying to do the same + static std::mutex m; + if (!m.try_lock()) + return; + std::unique_lock lock (m, std::adopt_lock); + ZoneNamedNC(zone, "cuptiActivityFlushAll", tracy::Color::Red4, true); + CUPTI_API_CALL(cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_NONE)); + } + + #if TRACY_CUDA_ENABLE_COLLECTOR_THREAD + // WARN(marcos): technically, CUPTI already offers async flushing of + // activity records through cuptiActivityFlushPeriod(), but I haven't + // had much luck getting reliable, consistent delivery with it... + struct Collector { + std::atomic running = true; + volatile uint32_t period = ~uint32_t(0); + std::mutex mtx; + std::condition_variable signal; + std::thread thread = std::thread( + [this]() { + tracy::SetThreadName("Tracy CUDA Collector"); + atexit([]() { + auto& collector = CUPTI::PersistentState::Get().collector; + collector.running = false; + collector.signal.notify_one(); + collector.thread.join(); + }); + while (running) { + { + std::unique_lock lock(mtx); + signal.wait_for(lock, std::chrono::milliseconds(period)); + } + FlushActivity(); + } + } + ); + }; + #endif + + static void FlushActivityAsync() + { + #if TRACY_CUDA_ENABLE_COLLECTOR_THREAD + ZoneScoped; + auto& collector = PersistentState::Get().collector; + collector.signal.notify_one(); + #endif + } + + struct PersistentState { + // NOTE(marcos): these objects must remain in memory past the application + // returning from main() because the Tracy client worker thread may still + // be responding to string/source-location requests from the server + SourceLocationMap kernelSrcLoc; + StringTable demangledNameTable; + SourceLocationLUT cudaCallSourceLocation; + + // NOTE(marcos): these objects do not need to persist, but their relative + // footprint is trivial enough that we don't care if we let them leak + ConcurrentHashMap cudaCallSiteInfo; + ConcurrentHashMap memAllocAddress; + CUpti_SubscriberHandle subscriber = {}; + CUDACtx* profilerHost = nullptr; + + Collector collector; + + static PersistentState& Get() { + static PersistentState& persistent = *(new PersistentState()); + return persistent; + } + }; + + }; + + CUDACtx(uint8_t gpuContextID = 255) + { + ZoneScoped; + + if (gpuContextID != 255) { + m_tracyGpuContext = gpuContextID; + return; + } + + m_tracyGpuContext = GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed); + assert(m_tracyGpuContext != 255); + + TracyTimestamp tTracy; + CUptiTimestamp tCUpti; + QueryTimestamps(tTracy, tCUpti); + + // Announce to Tracy about a new GPU context/timeline: + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuNewContext); + tracyMemWrite(item->gpuNewContext.cpuTime, tTracy); + tracyMemWrite(item->gpuNewContext.gpuTime, (int64_t)tCUpti); // TODO: Be more careful about this cast + tracyMemWrite(item->gpuNewContext.thread, (uint32_t)0); + tracyMemWrite(item->gpuNewContext.period, 1.0f); + tracyMemWrite(item->gpuNewContext.type, GpuContextType::CUDA); + tracyMemWrite(item->gpuNewContext.context, m_tracyGpuContext); + #if TRACY_CUDA_CALIBRATED_CONTEXT + tracyMemWrite(item->gpuNewContext.flags, GpuContextCalibration); + #else + tracyMemWrite(item->gpuNewContext.flags, tracy::GpuContextFlags(0)); + #endif + Profiler::QueueSerialFinish(); + + constexpr const char* tracyCtxName = "CUDA GPU/Device Activity"; + this->Name(tracyCtxName, uint16_t(strlen(tracyCtxName))); + + // NOTE(marcos): a few rounds of calibation amorthized over 1 second + // in order to get a meaningful linear regression estimator + Recalibrate(); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + Recalibrate(); + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + Recalibrate(); + std::this_thread::sleep_for(std::chrono::milliseconds(300)); + Recalibrate(); + std::this_thread::sleep_for(std::chrono::milliseconds(400)); + Recalibrate(); + } + + ~CUDACtx() + { + ZoneScoped; + } + + struct Singleton { + CUDACtx* ctx = nullptr; + std::mutex m; + int ref_count = 0; + uint8_t ctx_id = 255; + static Singleton& Get() { + static Singleton singleton; + return singleton; + } + }; + + #if TRACY_CUDA_ENABLE_CUDA_CALL_STATS + ProfilerStats stats = {}; + #endif + + uint8_t m_tracyGpuContext = 255; + static constexpr size_t cacheline = 64; + alignas(cacheline) std::atomic m_queryIdGen = 0; + }; + +} + +#define TracyCUDAContext() tracy::CUDACtx::Create() +#define TracyCUDAContextDestroy(ctx) tracy::CUDACtx::Destroy(ctx) +#define TracyCUDAContextName(ctx, name, size) ctx->Name(name, size) + +#define TracyCUDAStartProfiling(ctx) ctx->StartProfiling() +#define TracyCUDAStopProfiling(ctx) ctx->StopProfiling() + +#define TracyCUDACollect(ctx) ctx->Collect() + +#endif + +#endif \ No newline at end of file diff --git a/external/sources/tracy/public/tracy/TracyD3D11.hpp b/external/sources/tracy/public/tracy/TracyD3D11.hpp index 8aebdb2653..acab383169 100644 --- a/external/sources/tracy/public/tracy/TracyD3D11.hpp +++ b/external/sources/tracy/public/tracy/TracyD3D11.hpp @@ -95,6 +95,10 @@ class D3D11Ctx int64_t tcpu0 = Profiler::GetTime(); WaitForQuery(m_disjointQuery); + // NOTE: one would expect that by waiting for the enclosing disjoint query to finish, + // all timestamp queries within would also be readily available, but that does not + // seem to be the case here... See https://github.com/wolfpld/tracy/issues/947 + WaitForQuery(m_queries[0]); int64_t tcpu1 = Profiler::GetTime(); D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; @@ -109,7 +113,7 @@ class D3D11Ctx UINT64 timestamp = 0; if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK) - continue; // this should never happen, since the enclosing disjoint query succeeded + continue; // this should never happen (we waited for the query to finish above) tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2; tgpu = timestamp * (1000000000 / disjoint.Frequency); @@ -307,13 +311,21 @@ class D3D11ZoneScope WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); } - tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool active ) + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active ) : D3D11ZoneScope(ctx, active) { if( !m_active ) return; - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcloc)); + if( depth > 0 && has_callstack() ) + { + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcloc)); + } + else + { + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); + } } tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active) @@ -327,15 +339,23 @@ class D3D11ZoneScope WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); } - tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active) + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active) : D3D11ZoneScope(ctx, active) { if( !m_active ) return; const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + if ( depth > 0 && has_callstack() ) + { + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + } + else + { + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } } tracy_force_inline ~D3D11ZoneScope() @@ -357,7 +377,7 @@ class D3D11ZoneScope private: tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active ) #ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) + : m_active( active && GetProfiler().IsConnected() ) #else : m_active( active ) #endif diff --git a/external/sources/tracy/public/tracy/TracyD3D12.hpp b/external/sources/tracy/public/tracy/TracyD3D12.hpp index 41567937e8..d36253d7cd 100644 --- a/external/sources/tracy/public/tracy/TracyD3D12.hpp +++ b/external/sources/tracy/public/tracy/TracyD3D12.hpp @@ -385,7 +385,7 @@ namespace tracy WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcLocation)); } - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active) + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active) : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; @@ -405,7 +405,7 @@ namespace tracy WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); } - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active) + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active) : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; diff --git a/external/sources/tracy/public/tracy/TracyLua.hpp b/external/sources/tracy/public/tracy/TracyLua.hpp index c972ffb26d..f0c5c406ea 100644 --- a/external/sources/tracy/public/tracy/TracyLua.hpp +++ b/external/sources/tracy/public/tracy/TracyLua.hpp @@ -120,6 +120,8 @@ static inline void LuaRemove( char* script ) } } +static inline void LuaHook( lua_State* L, lua_Debug* ar ) {} + } #else @@ -143,6 +145,13 @@ TRACY_API LuaZoneState& GetLuaZoneState(); namespace detail { +static inline void LuaShortenSrc( char* dst, const char* src ) +{ + size_t l = std::min( (size_t)255, strlen( src ) ); + memcpy( dst, src, l ); + dst[l] = 0; +} + #ifdef TRACY_HAS_CALLSTACK static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth ) { @@ -207,7 +216,9 @@ static inline int LuaZoneBeginS( lua_State* L ) lua_Debug dbg; lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src ); + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -237,8 +248,10 @@ static inline int LuaZoneBeginNS( lua_State* L ) lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); size_t nsz; + char src[256]; + LuaShortenSrc( src, dbg.source ); const auto name = lua_tolstring( L, 1, &nsz ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -264,7 +277,9 @@ static inline int LuaZoneBegin( lua_State* L ) lua_Debug dbg; lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src ); + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -290,8 +305,10 @@ static inline int LuaZoneBeginN( lua_State* L ) lua_getstack( L, 1, &dbg ); lua_getinfo( L, "Snl", &dbg ); size_t nsz; + char src[256]; + LuaShortenSrc( src, dbg.source ); const auto name = lua_tolstring( L, 1, &nsz ); - const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, dbg.source, dbg.name ? dbg.name : dbg.short_src, name, nsz ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); @@ -424,6 +441,44 @@ static inline void LuaRegister( lua_State* L ) static inline void LuaRemove( char* script ) {} +static inline void LuaHook( lua_State* L, lua_Debug* ar ) +{ + if ( ar->event == LUA_HOOKCALL ) + { +#ifdef TRACY_ON_DEMAND + const auto zoneCnt = GetLuaZoneState().counter++; + if ( zoneCnt != 0 && !GetLuaZoneState().active ) return; + GetLuaZoneState().active = GetProfiler().IsConnected(); + if ( !GetLuaZoneState().active ) return; +#endif + lua_getinfo( L, "Snl", ar ); + + char src[256]; + detail::LuaShortenSrc( src, ar->short_src ); + + const auto srcloc = Profiler::AllocSourceLocation( ar->currentline, src, ar->name ? ar->name : ar->short_src ); + TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); + MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommit( zoneBeginThread ); + } + else if (ar->event == LUA_HOOKRET) { +#ifdef TRACY_ON_DEMAND + assert( GetLuaZoneState().counter != 0 ); + GetLuaZoneState().counter--; + if ( !GetLuaZoneState().active ) return; + if ( !GetProfiler().IsConnected() ) + { + GetLuaZoneState().active = false; + return; + } +#endif + TracyQueuePrepare( QueueType::ZoneEnd ); + MemWrite( &item->zoneEnd.time, Profiler::GetTime() ); + TracyQueueCommit( zoneEndThread ); + } +} + } #endif diff --git a/external/sources/tracy/public/tracy/TracyMetal.hmm b/external/sources/tracy/public/tracy/TracyMetal.hmm new file mode 100644 index 0000000000..a4b4cb5216 --- /dev/null +++ b/external/sources/tracy/public/tracy/TracyMetal.hmm @@ -0,0 +1,644 @@ +#ifndef __TRACYMETAL_HMM__ +#define __TRACYMETAL_HMM__ + +/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple + Silicon devices, but it should also work on Intel-based Macs and older iOS devices). + The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan, + Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where + a command encoder is created. This is because not all hardware supports timestamps at + command granularity, and can only provide timestamps around an entire command encoder. + This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone() + will be added to support the habitual command-level granularity of Tracy GPU back-ends. + Metal also imposes a few restrictions that make the process of requesting and collecting + queries more complicated in Tracy: + a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes) + b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes, + and there's no way to reset them back to zero after timestamps get resolved; the only + way to clear the timestamps is by allocating a new timestamp query buffer + c) if a command encoder records no commands and its corresponding command buffer ends up + committed to the command queue, Metal will "optimize-away" the encoder along with any + timestamp queries associated with it (the timestamp will remain as zero and will never + get resolved) + Because of the limitations above, two timestamp buffers are managed internally. Once one + of the buffers fills up with requests, the second buffer can start serving new requests. + Once all requests in a buffer get resolved and collected, the entire buffer is discarded + and a new one allocated for future requests. (Proper cycling through a ring buffer would + require bookkeeping and completion handlers to collect only the known complete queries.) + In the current implementation, there is potential for a race condition when the buffer is + discarded and reallocated. In practice, the race condition will never materialize so long + as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low. + Finally, there's a timeout mechanism during timestamp collection to detect "empty" command + encoders and ensure progress. +*/ + +#ifndef TRACY_ENABLE + +#define TracyMetalContext(device) nullptr +#define TracyMetalDestroy(ctx) +#define TracyMetalContextName(ctx, name, size) + +#define TracyMetalZone(ctx, encoderDesc, name) +#define TracyMetalZoneC(ctx, encoderDesc, name, color) +#define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active) +#define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active) + +#define TracyMetalCollect(ctx) + +namespace tracy +{ +class MetalZoneScope {}; +} + +using TracyMetalCtx = void; + +#else + +#if not __has_feature(objc_arc) +#error TracyMetal requires ARC to be enabled. +#endif + +#include +#include +#include + +#include "Tracy.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyAlign.hpp" +#include "../common/TracyAlloc.hpp" + +// ok to import if in obj-c code +#import + +#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__ + +#define TracyMetalPanic(ret, msg, ...) do { \ + char buffer [1024]; \ + snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \ + TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \ + fprintf(stderr, "%s\n", buffer); \ + ret; \ + } while(false); + +#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT +#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f +#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT + +#ifndef TRACY_METAL_DEBUG_MASK +#define TRACY_METAL_DEBUG_MASK (0) +#endif//TRACY_METAL_DEBUG_MASK + +#if TRACY_METAL_DEBUG_MASK + #define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; } +#else + #define TracyMetalDebugMasked(mask, ...) +#endif + +#if TRACY_METAL_DEBUG_MASK & (1 << 1) + #define TracyMetalDebug_0b00010(...) __VA_ARGS__; +#else + #define TracyMetalDebug_0b00010(...) +#endif + +#if TRACY_METAL_DEBUG_MASK & (1 << 4) + #define TracyMetalDebug_0b10000(...) __VA_ARGS__; +#else + #define TracyMetalDebug_0b10000(...) +#endif + +#ifndef TracyMetalDebugZoneScopeWireTap +#define TracyMetalDebugZoneScopeWireTap +#endif//TracyMetalDebugZoneScopeWireTap + +namespace tracy +{ + +class MetalCtx +{ + friend class MetalZoneScope; + + enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_... + +public: + static MetalCtx* Create(id device) + { + ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4); + auto ctx = static_cast(tracy_malloc(sizeof(MetalCtx))); + new (ctx) MetalCtx(device); + if (ctx->m_contextId == 255) + { + TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context."); + Destroy(ctx); + } + return ctx; + } + + static void Destroy(MetalCtx* ctx) + { + ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4); + ctx->~MetalCtx(); + tracy_free(ctx); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_contextId ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); + SubmitQueueItem(item); + } + + bool Collect() + { + ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4); + +#ifdef TRACY_ON_DEMAND + if (!GetProfiler().IsConnected()) + { + return true; + } +#endif + + // Only one thread is allowed to collect timestamps at any given time + // but there's no need to block contending threads + if (!m_collectionMutex.try_lock()) + { + return true; + } + + std::unique_lock lock (m_collectionMutex, std::adopt_lock); + + uintptr_t begin = m_previousCheckpoint.load(); + uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?; + TracyMetalDebugMasked(1<<3, ZoneValue(begin)); + TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint)); + + uint32_t count = RingCount(begin, latestCheckpoint); + if (count == 0) // no pending timestamp queries + { + //uintptr_t nextCheckpoint = m_queryCounter.load(); + //if (nextCheckpoint != latestCheckpoint) + //{ + // // TODO: signal event / fence now? + //} + return true; + } + + // resolve up until the ring buffer boundary and let a subsequenty call + // to Collect handle the wrap-around + bool reallocateBuffer = false; + if (RingIndex(begin) + count >= RingSize()) + { + count = RingSize() - RingIndex(begin); + reallocateBuffer = true; + } + TracyMetalDebugMasked(1<<3, ZoneValue(count)); + + auto buffer_idx = (begin / MaxQueries) % 2; + auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx]; + + if (count >= RingSize()) + { + TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count); + } + + TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count)); + + NSRange range = NSMakeRange(RingIndex(begin), count); + NSData* data = [counterSampleBuffer resolveCounterRange:range]; + NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp); + MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes); + if (timestamps == nil) + { + TracyMetalPanic(return false, "Collect: unable to resolve timestamps."); + } + + if (numResolvedTimestamps != count) + { + TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count); + } + + int resolved = 0; + for (auto i = 0; i < numResolvedTimestamps; i += 2) + { + TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") ); + MTLTimestamp t_start = timestamps[i+0].timestamp; + MTLTimestamp t_end = timestamps[i+1].timestamp; + uint32_t k = RingIndex(begin + i); + TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start))); + if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue)) + { + TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k); + break; + } + // Metal will initialize timestamp buffer with zeroes; encountering a zero-value + // timestamp means that the timestamp has not been written and resolved yet + if ((t_start == 0) || (t_end == 0)) + { + auto checkTime = std::chrono::high_resolution_clock::now(); + auto requestTime = m_timestampRequestTime[k]; + auto ms_in_flight = std::chrono::duration(checkTime-requestTime).count()*1000.0f; + TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight)); + const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f; + if (ms_in_flight < timeout_ms) + break; + TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") ); + TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight); + t_start = m_mostRecentTimestamp + 5; + t_end = t_start + 5; + } + TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone")); + TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone")); + { + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(t_start)); + MemWrite(&item->gpuTime.queryId, static_cast(k)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + { + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(t_end)); + MemWrite(&item->gpuTime.queryId, static_cast(k+1)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp; + TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId")); + resolved += 2; + } + TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load()))); + + m_previousCheckpoint += resolved; + + // Check whether the timestamp buffer has been fully resolved/collected: + // WARN: there's technically a race condition here: NextQuery() may reference the + // buffer that is being released instead of the new one. In practice, this should + // never happen so long as Collect is called frequently enough to prevent pending + // timestamp query requests from piling up too quickly. + if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0) + { + m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries); + } + + //RecalibrateClocks(); // to account for drift + + return true; + } + +private: + MetalCtx(id device) + : m_device(device) + { + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue)); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample)); + + if (m_device == nil) + { + TracyMetalPanic({assert(false);} return, "device is nil."); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary]) + { + TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported."); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n")); + } + + m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries); + m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries); + + m_timestampRequestTime.resize(MaxQueries); + + MTLTimestamp cpuTimestamp = 0; + MTLTimestamp gpuTimestamp = 0; + [m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp]; + m_mostRecentTimestamp = gpuTimestamp; + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp)); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp)); + + cpuTimestamp = Profiler::GetTime(); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp)); + + float period = 1.0f; + + m_contextId = GetGpuCtxCounter().fetch_add(1); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuNewContext); + MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp)); + MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp)); + MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()? + MemWrite(&item->gpuNewContext.period, period); + MemWrite(&item->gpuNewContext.context, m_contextId); + //MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); + MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); + MemWrite(&item->gpuNewContext.type, GpuContextType::Metal); + SubmitQueueItem(item); + } + + ~MetalCtx() + { + // collect the last remnants of Metal GPU activity... + // TODO: add a timeout to this loop? + while (m_previousCheckpoint.load() != m_queryCounter.load()) + Collect(); + } + + tracy_force_inline void SubmitQueueItem(QueueItem* item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + tracy_force_inline uint32_t RingIndex(uintptr_t index) + { + index %= MaxQueries; + return static_cast(index); + } + + tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) + { + // wrap-around safe: all unsigned + uintptr_t count = end - begin; + return static_cast(count); + } + + tracy_force_inline uint32_t RingSize() const + { + return MaxQueries; + } + + struct Query { id buffer; uint32_t idx; }; + + tracy_force_inline Query NextQuery() + { + TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) ); + auto id = m_queryCounter.fetch_add(2); + TracyMetalDebug_0b00010( ZoneValue(id) ); + auto count = RingCount(m_previousCheckpoint, id); + if (count >= MaxQueries) + { + // TODO: return a proper (hidden) "sentinel" query + Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 }; + TracyMetalPanic( + return sentinel, + "NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)", + m_previousCheckpoint.load(), id, count + ); + } + uint32_t buffer_idx = (id / MaxQueries) % 2; + TracyMetalDebug_0b00010( ZoneValue(buffer_idx) ); + auto buffer = m_counterSampleBuffers[buffer_idx]; + if (buffer == nil) + TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id); + uint32_t idx = RingIndex(id); + TracyMetalDebug_0b00010( ZoneValue(idx) ); + TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") ); + m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now(); + return Query{ buffer, idx }; + } + + tracy_force_inline uint8_t GetContextId() const + { + return m_contextId; + } + + static id NewTimestampSampleBuffer(id device, size_t count) + { + ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer"); + + id timestampCounterSet = nil; + for (id counterSet in device.counterSets) + { + if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp]) + { + timestampCounterSet = counterSet; + break; + } + } + if (timestampCounterSet == nil) + { + TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform."); + } + + MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init]; + sampleDescriptor.counterSet = timestampCounterSet; + sampleDescriptor.sampleCount = MaxQueries; + sampleDescriptor.storageMode = MTLStorageModeShared; + sampleDescriptor.label = @"TracyMetalTimestampPool"; + + NSError* error = nil; + id counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error]; + if (error != nil) + { + //NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason); + TracyMetalPanic({assert(false);} return nil, + "ERROR: unable to create sample buffer for timestamp counters : %s | %s", + [error.localizedDescription cString], [error.localizedFailureReason cString]); + } + + return counterSampleBuffer; + } + + uint8_t m_contextId = 255; + + id m_device = nil; + id m_counterSampleBuffers [2] = {}; + + using atomic_counter = std::atomic; + static_assert(atomic_counter::is_always_lock_free); + atomic_counter m_queryCounter = 0; + + atomic_counter m_previousCheckpoint = 0; + MTLTimestamp m_mostRecentTimestamp = 0; + + std::vector m_timestampRequestTime; + + std::mutex m_collectionMutex; +}; + +class MetalZoneScope +{ +public: + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample; + desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample; + desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + /* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it + tracy_force_inline MetalZoneScope( MetalCtx* ctx, id cmdEncoder, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_ctx = ctx; + m_cmdEncoder = cmdEncoder; + + auto& query = m_query = ctx->NextQueryId(); + + [m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES]; + + SubmitZoneBeginGpu(ctx, query.idx, srcloc); + } + */ + + tracy_force_inline ~MetalZoneScope() + { + if( !m_active ) return; + + SubmitZoneEndGpu(m_ctx, m_query.idx + 1); + } + + TracyMetalDebugZoneScopeWireTap; + +private: + const bool m_active; + + MetalCtx* m_ctx; + + /* TODO: declare it for "command-level" profiling + id m_cmdEncoder; + */ + + static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc) + { + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + + TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); + } + + static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId) + { + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + + TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); + } + + MetalCtx::Query m_query = {}; +}; + +} + +using TracyMetalCtx = tracy::MetalCtx; + +#define TracyMetalContext(device) tracy::MetalCtx::Create(device) +#define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx) +#define TracyMetalContextName(ctx, name, size) ctx->Name(name, size) + +#define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true ) +#define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true ) +#define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); +#define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); + +#define TracyMetalCollect( ctx ) ctx->Collect(); + + + +#undef TracyMetalDebug_ZoneScopeWireTap +#undef TracyMetalDebug_0b00010 +#undef TracyMetalDebug_0b10000 +#undef TracyMetalDebugMasked +#undef TRACY_METAL_DEBUG_MASK +#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT +#undef TracyMetalPanic +#undef TRACY_METAL_VA_ARGS + +#endif + +#endif//__TRACYMETAL_HMM__ diff --git a/external/sources/tracy/public/tracy/TracyOpenCL.hpp b/external/sources/tracy/public/tracy/TracyOpenCL.hpp index 34466ccc97..ede5c4613b 100644 --- a/external/sources/tracy/public/tracy/TracyOpenCL.hpp +++ b/external/sources/tracy/public/tracy/TracyOpenCL.hpp @@ -255,7 +255,7 @@ namespace tracy { Profiler::QueueSerialFinish(); } - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active) + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int32_t depth, bool is_active) #ifdef TRACY_ON_DEMAND : m_active(is_active&& GetProfiler().IsConnected()) #else @@ -304,7 +304,7 @@ namespace tracy { Profiler::QueueSerialFinish(); } - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active) + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active) #ifdef TRACY_ON_DEMAND : m_active(is_active && GetProfiler().IsConnected()) #else @@ -373,9 +373,9 @@ namespace tracy { using TracyCLCtx = tracy::OpenCLCtx*; -#define TracyCLContext(context, device) tracy::CreateCLContext(context, device); +#define TracyCLContext(ctx, device) tracy::CreateCLContext(ctx, device); #define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx); -#define TracyCLContextName(context, name, size) ctx->Name(name, size); +#define TracyCLContextName(ctx, name, size) ctx->Name(name, size); #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK # define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); # define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); diff --git a/external/sources/tracy/public/tracy/TracyOpenGL.hpp b/external/sources/tracy/public/tracy/TracyOpenGL.hpp index 3bdadccee5..30abd4fd05 100644 --- a/external/sources/tracy/public/tracy/TracyOpenGL.hpp +++ b/external/sources/tracy/public/tracy/TracyOpenGL.hpp @@ -25,7 +25,7 @@ class GpuCtxScope { public: GpuCtxScope( const SourceLocationData*, bool ) {} - GpuCtxScope( const SourceLocationData*, int, bool ) {} + GpuCtxScope( const SourceLocationData*, int32_t, bool ) {} }; } @@ -222,7 +222,7 @@ class GpuCtxScope TracyLfqCommit; } - tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active ) + tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -271,7 +271,7 @@ class GpuCtxScope TracyLfqCommit; } - tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active ) + tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else diff --git a/external/sources/tracy/public/tracy/TracyVulkan.hpp b/external/sources/tracy/public/tracy/TracyVulkan.hpp index 2d079f7b5a..72643188f8 100644 --- a/external/sources/tracy/public/tracy/TracyVulkan.hpp +++ b/external/sources/tracy/public/tracy/TracyVulkan.hpp @@ -61,7 +61,9 @@ namespace tracy Operation(vkResetQueryPool) #define LoadVkDeviceExtensionSymbols(Operation) \ - Operation(vkGetCalibratedTimestampsEXT) \ + Operation(vkGetCalibratedTimestampsEXT) + +#define LoadVkInstanceExtensionSymbols(Operation) \ Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) #define LoadVkInstanceCoreSymbols(Operation) \ @@ -72,6 +74,7 @@ struct VkSymbolTable #define MAKE_PFN(name) PFN_##name name; LoadVkDeviceCoreSymbols(MAKE_PFN) LoadVkDeviceExtensionSymbols(MAKE_PFN) + LoadVkInstanceExtensionSymbols(MAKE_PFN) LoadVkInstanceCoreSymbols(MAKE_PFN) #undef MAKE_PFN }; @@ -215,7 +218,9 @@ class VkCtx WriteInitialItem( physdev, tcpu, tgpu ); - m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); + // We need the buffer to be twice as large for availability values + size_t resSize = sizeof( int64_t ) * m_queryCount * 2; + m_res = (int64_t*)tracy_malloc( resSize ); } #endif @@ -260,7 +265,7 @@ class VkCtx } #endif assert( head > m_tail ); - + const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount ); unsigned int cnt; @@ -280,17 +285,22 @@ class VkCtx } - if( VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY ) ) - { - m_oldCnt = cnt; - return; - } + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) ); for( unsigned int idx=0; idxhdr.type, QueueType::GpuTime ); - MemWrite( &item->gpuTime.gpuTime, m_res[idx] ); + MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] ); MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) ); MemWrite( &item->gpuTime.context, m_context ); Profiler::QueueSerialFinish(); @@ -320,7 +330,6 @@ class VkCtx m_tail += cnt; } -private: tracy_force_inline unsigned int NextQueryId() { const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed); @@ -332,6 +341,12 @@ class VkCtx return m_context; } + tracy_force_inline VkQueryPool GetQueryPool() const + { + return m_query; + } + +private: tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) { assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); @@ -402,11 +417,11 @@ class VkCtx }; uint64_t ts[2]; uint64_t deviation[NumProbes]; - for( int i=0; i deviation[i] ) { minDeviation = deviation[i]; } @@ -457,6 +472,7 @@ class VkCtx LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL ) LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL ) LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL ) #undef VK_GET_DEVICE_SYMBOL #undef VK_LOAD_DEVICE_SYMBOL @@ -472,7 +488,9 @@ class VkCtx VkSymbolTable m_symbols; #endif uint64_t m_deviation; +#ifdef _WIN32 int64_t m_qpcToNs; +#endif int64_t m_prevCalibration; uint8_t m_context; @@ -513,7 +531,7 @@ class VkCtxScope Profiler::QueueSerialFinish(); } - tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active ) + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -527,8 +545,17 @@ class VkCtxScope const auto queryId = ctx->NextQueryId(); CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); - auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); + QueueItem *item; + if( depth > 0 && has_callstack() ) + { + item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); + } + else + { + item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + } MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); @@ -562,7 +589,7 @@ class VkCtxScope Profiler::QueueSerialFinish(); } - tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active ) + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -577,8 +604,17 @@ class VkCtxScope CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); + QueueItem *item; + if( depth > 0 && has_callstack() ) + { + item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); + } + else + { + item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); + } MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); diff --git a/external/sources/tracy/tracy.pdf b/external/sources/tracy/tracy.pdf index 860feb749b..717bf57f91 100644 Binary files a/external/sources/tracy/tracy.pdf and b/external/sources/tracy/tracy.pdf differ