diff --git a/include/hx/TelemetryTracy.h b/include/hx/TelemetryTracy.h index 178932afc..d55bc9d86 100644 --- a/include/hx/TelemetryTracy.h +++ b/include/hx/TelemetryTracy.h @@ -7,8 +7,8 @@ #define TRACY_ENABLE #include -#include "../../project/thirdparty/tracy-0.11.1/tracy/TracyC.h" -#include "../../project/thirdparty/tracy-0.11.1/tracy/Tracy.hpp" +#include "../../project/thirdparty/tracy-0.12.0/tracy/TracyC.h" +#include "../../project/thirdparty/tracy-0.12.0/tracy/Tracy.hpp" #ifdef HXCPP_TRACY_MEMORY #ifdef HXCPP_GC_MOVING @@ -30,7 +30,7 @@ ::hx::strbuf TracyConcat(_hx_tracy_str_buffer, TracyLine); \ int TracyConcat(_hx_tracy_str_length, TracyLine); \ const char *TracyConcat(_hx_tracy_str_buffer_ptr, TracyLine) = name.utf8_str(&TracyConcat(_hx_tracy_str_buffer, TracyLine), false, &TracyConcat(_hx_tracy_str_length, TracyLine)); \ - ::tracy::ScopedZone TracyConcat(_hx_tracy_scoped_zone,TracyLine)(_hx_stackframe.lineNumber, _hx_stackframe.position->fileName, strlen(_hx_stackframe.position->fileName), _hx_stackframe.position->fullName, strlen(_hx_stackframe.position->fullName), TracyConcat(_hx_tracy_str_buffer_ptr, TracyLine), TracyConcat(_hx_tracy_str_length, TracyLine)); + ::tracy::ScopedZone TracyConcat(_hx_tracy_scoped_zone,TracyLine)(_hx_stackframe.lineNumber, _hx_stackframe.position->fileName, strlen(_hx_stackframe.position->fileName), _hx_stackframe.position->fullName, strlen(_hx_stackframe.position->fullName), TracyConcat(_hx_tracy_str_buffer_ptr, TracyLine), TracyConcat(_hx_tracy_str_length, TracyLine), -1); #endif void __hxcpp_tracy_framemark(); diff --git a/project/thirdparty/tracy-0.11.1/tracy/Tracy.hpp b/project/thirdparty/tracy-0.11.1/tracy/Tracy.hpp deleted file mode 100644 index e75d02ce9..000000000 --- a/project/thirdparty/tracy-0.11.1/tracy/Tracy.hpp +++ /dev/null @@ -1,300 +0,0 @@ -#ifndef __TRACY_HPP__ -#define __TRACY_HPP__ - -#include "../common/TracyColor.hpp" -#include "../common/TracySystem.hpp" - -#ifndef TracyFunction -# define TracyFunction __FUNCTION__ -#endif - -#ifndef TracyFile -# define TracyFile __FILE__ -#endif - -#ifndef TracyLine -# define TracyLine __LINE__ -#endif - -#ifndef TRACY_ENABLE - -#define TracyNoop - -#define ZoneNamed(x,y) -#define ZoneNamedN(x,y,z) -#define ZoneNamedC(x,y,z) -#define ZoneNamedNC(x,y,z,w) - -#define ZoneTransient(x,y) -#define ZoneTransientN(x,y,z) - -#define ZoneScoped -#define ZoneScopedN(x) -#define ZoneScopedC(x) -#define ZoneScopedNC(x,y) - -#define ZoneText(x,y) -#define ZoneTextV(x,y,z) -#define ZoneTextF(x,...) -#define ZoneTextVF(x,y,...) -#define ZoneName(x,y) -#define ZoneNameV(x,y,z) -#define ZoneNameF(x,...) -#define ZoneNameVF(x,y,...) -#define ZoneColor(x) -#define ZoneColorV(x,y) -#define ZoneValue(x) -#define ZoneValueV(x,y) -#define ZoneIsActive false -#define ZoneIsActiveV(x) false - -#define FrameMark -#define FrameMarkNamed(x) -#define FrameMarkStart(x) -#define FrameMarkEnd(x) - -#define FrameImage(x,y,z,w,a) - -#define TracyLockable( type, varname ) type varname -#define TracyLockableN( type, varname, desc ) type varname -#define TracySharedLockable( type, varname ) type varname -#define TracySharedLockableN( type, varname, desc ) type varname -#define LockableBase( type ) type -#define SharedLockableBase( type ) type -#define LockMark(x) (void)x -#define LockableName(x,y,z) - -#define TracyPlot(x,y) -#define TracyPlotConfig(x,y,z,w,a) - -#define TracyMessage(x,y) -#define TracyMessageL(x) -#define TracyMessageC(x,y,z) -#define TracyMessageLC(x,y) -#define TracyAppInfo(x,y) - -#define TracyAlloc(x,y) -#define TracyFree(x) -#define TracySecureAlloc(x,y) -#define TracySecureFree(x) - -#define TracyAllocN(x,y,z) -#define TracyFreeN(x,y) -#define TracySecureAllocN(x,y,z) -#define TracySecureFreeN(x,y) - -#define ZoneNamedS(x,y,z) -#define ZoneNamedNS(x,y,z,w) -#define ZoneNamedCS(x,y,z,w) -#define ZoneNamedNCS(x,y,z,w,a) - -#define ZoneTransientS(x,y,z) -#define ZoneTransientNS(x,y,z,w) - -#define ZoneScopedS(x) -#define ZoneScopedNS(x,y) -#define ZoneScopedCS(x,y) -#define ZoneScopedNCS(x,y,z) - -#define TracyAllocS(x,y,z) -#define TracyFreeS(x,y) -#define TracySecureAllocS(x,y,z) -#define TracySecureFreeS(x,y) - -#define TracyAllocNS(x,y,z,w) -#define TracyFreeNS(x,y,z) -#define TracySecureAllocNS(x,y,z,w) -#define TracySecureFreeNS(x,y,z) - -#define TracyMessageS(x,y,z) -#define TracyMessageLS(x,y) -#define TracyMessageCS(x,y,z,w) -#define TracyMessageLCS(x,y,z) - -#define TracySourceCallbackRegister(x,y) -#define TracyParameterRegister(x,y) -#define TracyParameterSetup(x,y,z,w) -#define TracyIsConnected false -#define TracyIsStarted false -#define TracySetProgramName(x) - -#define TracyFiberEnter(x) -#define TracyFiberEnterHint(x,y) -#define TracyFiberLeave - -#else - -#include - -#include "../client/TracyLock.hpp" -#include "../client/TracyProfiler.hpp" -#include "../client/TracyScoped.hpp" - -#define TracyNoop tracy::ProfilerAvailable() - -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) -# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) - -# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) -# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) -# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active ) -#else -# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) -# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) - -# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, active ) -# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ) -# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, active ) -#endif - -#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) -#define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true ) -#define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true ) -#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true ) - -#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size ) -#define ZoneTextV( varname, txt, size ) varname.Text( txt, size ) -#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ ) -#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ ) -#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size ) -#define ZoneNameV( varname, txt, size ) varname.Name( txt, size ) -#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ ) -#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ ) -#define ZoneColor( color ) ___tracy_scoped_zone.Color( color ) -#define ZoneColorV( varname, color ) varname.Color( color ) -#define ZoneValue( value ) ___tracy_scoped_zone.Value( value ) -#define ZoneValueV( varname, value ) varname.Value( value ) -#define ZoneIsActive ___tracy_scoped_zone.IsActive() -#define ZoneIsActiveV( varname ) varname.IsActive() - -#define FrameMark tracy::Profiler::SendFrameMark( nullptr ) -#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name ) -#define FrameMarkStart( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ) -#define FrameMarkEnd( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ) - -#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip ) - -#define TracyLockable( type, varname ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } -#define TracyLockableN( type, varname, desc ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } -#define TracySharedLockable( type, varname ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } -#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } -#define LockableBase( type ) tracy::Lockable -#define SharedLockableBase( type ) tracy::SharedLockable -#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##varname ) -#define LockableName( varname, txt, size ) varname.CustomName( txt, size ) - -#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ) -#define TracyPlotConfig( name, type, step, fill, color ) tracy::Profiler::ConfigurePlot( name, type, step, fill, color ) - -#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size ) - -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK ) -# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK ) -# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ) -# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) - -# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ) -# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ) -# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ) -# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ) - -# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ) -# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ) -# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ) -# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ) -#else -# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 ) -# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 ) -# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 ) -# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 ) - -# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false ) -# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false ) -# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true ) -# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true ) - -# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name ) -# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name ) -# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name ) -# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name ) -#endif - -#ifdef TRACY_HAS_CALLSTACK -# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) -# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) - -# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) -# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) - -# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) -# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) -# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) -# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) - -# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ) -# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ) -# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ) -# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ) - -# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ) -# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ) -# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ) -# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ) - -# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ) -# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ) -# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth ) -# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth ) -#else -# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active ) -# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active ) -# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active ) -# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active ) - -# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active ) -# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active ) - -# define ZoneScopedS( depth ) ZoneScoped -# define ZoneScopedNS( name, depth ) ZoneScopedN( name ) -# define ZoneScopedCS( color, depth ) ZoneScopedC( color ) -# define ZoneScopedNCS( name, color, depth ) ZoneScopedNC( name, color ) - -# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size ) -# define TracyFreeS( ptr, depth ) TracyFree( ptr ) -# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size ) -# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr ) - -# define TracyAllocNS( ptr, size, depth, name ) TracyAllocN( ptr, size, name ) -# define TracyFreeNS( ptr, depth, name ) TracyFreeN( ptr, name ) -# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAllocN( ptr, size, name ) -# define TracySecureFreeNS( ptr, depth, name ) TracySecureFreeN( ptr, name ) - -# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size ) -# define TracyMessageLS( txt, depth ) TracyMessageL( txt ) -# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color ) -# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color ) -#endif - -#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data ) -#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) -#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ) -#define TracyIsConnected tracy::GetProfiler().IsConnected() -#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); - -#ifdef TRACY_FIBERS -# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 ) -# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint ) -# define TracyFiberLeave tracy::Profiler::LeaveFiber() -#endif - -#endif - -#endif diff --git a/project/thirdparty/tracy-0.11.1/README.md b/project/thirdparty/tracy-0.12.0/README.md similarity index 100% rename from project/thirdparty/tracy-0.11.1/README.md rename to project/thirdparty/tracy-0.12.0/README.md diff --git a/project/thirdparty/tracy-0.11.1/TracyClient.cpp b/project/thirdparty/tracy-0.12.0/TracyClient.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/TracyClient.cpp rename to project/thirdparty/tracy-0.12.0/TracyClient.cpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyAlloc.cpp b/project/thirdparty/tracy-0.12.0/client/TracyAlloc.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyAlloc.cpp rename to project/thirdparty/tracy-0.12.0/client/TracyAlloc.cpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyArmCpuTable.hpp b/project/thirdparty/tracy-0.12.0/client/TracyArmCpuTable.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyArmCpuTable.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyArmCpuTable.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyCallstack.cpp b/project/thirdparty/tracy-0.12.0/client/TracyCallstack.cpp similarity index 98% rename from project/thirdparty/tracy-0.11.1/client/TracyCallstack.cpp rename to project/thirdparty/tracy-0.12.0/client/TracyCallstack.cpp index 946a19721..bd3290604 100644 --- a/project/thirdparty/tracy-0.11.1/client/TracyCallstack.cpp +++ b/project/thirdparty/tracy-0.12.0/client/TracyCallstack.cpp @@ -282,7 +282,12 @@ extern "C" t_SymFromInlineContext _SymFromInlineContext = 0; t_SymGetLineFromInlineContext _SymGetLineFromInlineContext = 0; - TRACY_API ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain = 0; + typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long ); + ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChainPtr = nullptr; + TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void** callers, unsigned long count, unsigned long flags) + { + return ___tracy_RtlWalkFrameChainPtr(callers, count, flags); + } } struct ModuleCache @@ -307,7 +312,7 @@ size_t s_krnlCacheCnt; void InitCallstackCritical() { - ___tracy_RtlWalkFrameChain = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" ); + ___tracy_RtlWalkFrameChainPtr = (___tracy_t_RtlWalkFrameChain)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "RtlWalkFrameChain" ); } void DbgHelpInit() diff --git a/project/thirdparty/tracy-0.11.1/client/TracyCallstack.h b/project/thirdparty/tracy-0.12.0/client/TracyCallstack.h similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyCallstack.h rename to project/thirdparty/tracy-0.12.0/client/TracyCallstack.h diff --git a/project/thirdparty/tracy-0.11.1/client/TracyCallstack.hpp b/project/thirdparty/tracy-0.12.0/client/TracyCallstack.hpp similarity index 86% rename from project/thirdparty/tracy-0.11.1/client/TracyCallstack.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyCallstack.hpp index fdc9345df..1d8cd654f 100644 --- a/project/thirdparty/tracy-0.11.1/client/TracyCallstack.hpp +++ b/project/thirdparty/tracy-0.12.0/client/TracyCallstack.hpp @@ -9,7 +9,8 @@ namespace tracy { -static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; } +static constexpr bool has_callstack() { return false; } +static tracy_force_inline void* Callstack( int32_t /*depth*/ ) { return nullptr; } } #else @@ -38,6 +39,8 @@ static tracy_force_inline void* Callstack( int /*depth*/ ) { return nullptr; } namespace tracy { +static constexpr bool has_callstack() { return true; } + struct CallstackSymbolData { const char* file; @@ -79,11 +82,10 @@ debuginfod_client* GetDebuginfodClient(); extern "C" { - typedef unsigned long (__stdcall *___tracy_t_RtlWalkFrameChain)( void**, unsigned long, unsigned long ); - TRACY_API extern ___tracy_t_RtlWalkFrameChain ___tracy_RtlWalkFrameChain; + TRACY_API unsigned long ___tracy_RtlWalkFrameChain( void**, unsigned long, unsigned long ); } -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 && depth < 63 ); auto trace = (uintptr_t*)tracy_malloc( ( 1 + depth ) * sizeof( uintptr_t ) ); @@ -112,7 +114,7 @@ static _Unwind_Reason_Code tracy_unwind_callback( struct _Unwind_Context* ctx, v return _URC_NO_REASON; } -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 && depth < 63 ); @@ -127,7 +129,7 @@ static tracy_force_inline void* Callstack( int depth ) #elif TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 -static tracy_force_inline void* Callstack( int depth ) +static tracy_force_inline void* Callstack( int32_t depth ) { assert( depth >= 1 ); diff --git a/project/thirdparty/tracy-0.11.1/client/TracyCpuid.hpp b/project/thirdparty/tracy-0.12.0/client/TracyCpuid.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyCpuid.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyCpuid.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyDebug.hpp b/project/thirdparty/tracy-0.12.0/client/TracyDebug.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyDebug.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyDebug.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyDxt1.cpp b/project/thirdparty/tracy-0.12.0/client/TracyDxt1.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyDxt1.cpp rename to project/thirdparty/tracy-0.12.0/client/TracyDxt1.cpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyDxt1.hpp b/project/thirdparty/tracy-0.12.0/client/TracyDxt1.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyDxt1.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyDxt1.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyFastVector.hpp b/project/thirdparty/tracy-0.12.0/client/TracyFastVector.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyFastVector.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyFastVector.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyKCore.cpp b/project/thirdparty/tracy-0.12.0/client/TracyKCore.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyKCore.cpp rename to project/thirdparty/tracy-0.12.0/client/TracyKCore.cpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyKCore.hpp b/project/thirdparty/tracy-0.12.0/client/TracyKCore.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyKCore.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyKCore.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyLock.hpp b/project/thirdparty/tracy-0.12.0/client/TracyLock.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyLock.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyLock.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyOverride.cpp b/project/thirdparty/tracy-0.12.0/client/TracyOverride.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyOverride.cpp rename to project/thirdparty/tracy-0.12.0/client/TracyOverride.cpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyProfiler.cpp b/project/thirdparty/tracy-0.12.0/client/TracyProfiler.cpp similarity index 94% rename from project/thirdparty/tracy-0.11.1/client/TracyProfiler.cpp rename to project/thirdparty/tracy-0.12.0/client/TracyProfiler.cpp index 3b8687441..6fe786809 100644 --- a/project/thirdparty/tracy-0.11.1/client/TracyProfiler.cpp +++ b/project/thirdparty/tracy-0.12.0/client/TracyProfiler.cpp @@ -81,6 +81,10 @@ #include "TracySysTrace.hpp" #include "../tracy/TracyC.h" +#if defined TRACY_MANUAL_LIFETIME && !defined(TRACY_DELAYED_INIT) +# error "TRACY_MANUAL_LIFETIME requires enabled TRACY_DELAYED_INIT" +#endif + #ifdef TRACY_PORT # ifndef TRACY_DATA_PORT # define TRACY_DATA_PORT TRACY_PORT @@ -106,9 +110,12 @@ # include extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PROCESSOR_RELATIONSHIP, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD ); +extern "C" typedef char* (WINAPI *t_WineGetVersion)(); +extern "C" typedef char* (WINAPI *t_WineGetBuildId)(); #else # include # include +# include #endif #if defined __linux__ # include @@ -521,7 +528,16 @@ static const char* GetHostInfo() # ifdef __MINGW32__ ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber ); # else - ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); + auto WineGetVersion = (t_WineGetVersion)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_version" ); + auto WineGetBuildId = (t_WineGetBuildId)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "wine_get_build_id" ); + if( WineGetVersion && WineGetBuildId ) + { + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu (Wine %s [%s])\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber, WineGetVersion(), WineGetBuildId() ); + } + else + { + ptr += sprintf( ptr, "OS: Windows %lu.%lu.%lu\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber ); + } # endif } #elif defined __linux__ @@ -1378,6 +1394,8 @@ TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; } TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } TRACY_API bool ProfilerAllocatorAvailable() { return !RpThreadShutdown; } +constexpr static size_t SafeSendBufferSize = 65536; + Profiler::Profiler() : m_timeBegin( 0 ) , m_mainThread( detail::GetThreadHandleImpl() ) @@ -1451,6 +1469,21 @@ Profiler::Profiler() m_userPort = atoi( userPort ); } + m_safeSendBuffer = (char*)tracy_malloc( SafeSendBufferSize ); + +#ifndef _WIN32 + pipe(m_pipe); +# if defined __APPLE__ || defined BSD + // FreeBSD/XNU don't have F_SETPIPE_SZ, so use the default + m_pipeBufSize = 16384; +# else + m_pipeBufSize = (int)(ptrdiff_t)SafeSendBufferSize; + while( fcntl( m_pipe[0], F_SETPIPE_SZ, m_pipeBufSize ) < 0 && errno == EPERM ) m_pipeBufSize /= 2; // too big; reduce + m_pipeBufSize = fcntl( m_pipe[0], F_GETPIPE_SZ ); +# endif + fcntl( m_pipe[1], F_SETFL, O_NONBLOCK ); +#endif + #if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) SpawnWorkerThreads(); #endif @@ -1476,7 +1509,9 @@ void Profiler::InstallCrashHandler() #endif #if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER - m_exceptionHandler = AddVectoredExceptionHandler( 1, CrashFilter ); + // We cannot use Vectored Exception handling because it catches application-wide frame-based SEH blocks. We only + // want to catch unhandled exceptions. + m_prevHandler = SetUnhandledExceptionFilter( CrashFilter ); #endif #ifndef TRACY_NO_CRASH_HANDLER @@ -1487,20 +1522,29 @@ void Profiler::InstallCrashHandler() void Profiler::RemoveCrashHandler() { -#if defined _WIN32 && !defined TRACY_UWP - if( m_crashHandlerInstalled ) RemoveVectoredExceptionHandler( m_exceptionHandler ); +#if defined _WIN32 && !defined TRACY_UWP && !defined TRACY_NO_CRASH_HANDLER + if( m_crashHandlerInstalled ) + { + auto prev = SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER)m_prevHandler ); + if( prev != CrashFilter ) SetUnhandledExceptionFilter( prev ); // A different exception filter was installed over ours => put it back + } #endif #if defined __linux__ && !defined TRACY_NO_CRASH_HANDLER if( m_crashHandlerInstalled ) { - sigaction( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr, nullptr ); - sigaction( SIGILL, &m_prevSignal.ill, nullptr ); - sigaction( SIGFPE, &m_prevSignal.fpe, nullptr ); - sigaction( SIGSEGV, &m_prevSignal.segv, nullptr ); - sigaction( SIGPIPE, &m_prevSignal.pipe, nullptr ); - sigaction( SIGBUS, &m_prevSignal.bus, nullptr ); - sigaction( SIGABRT, &m_prevSignal.abrt, nullptr ); + auto restore = []( int signum, struct sigaction* prev ) { + struct sigaction old; + sigaction( signum, prev, &old ); + if( old.sa_sigaction != CrashHandler ) sigaction( signum, &old, nullptr ); // A different signal handler was installed over ours => put it back + }; + restore( TRACY_CRASH_SIGNAL, &m_prevSignal.pwr ); + restore( SIGILL, &m_prevSignal.ill ); + restore( SIGFPE, &m_prevSignal.fpe ); + restore( SIGSEGV, &m_prevSignal.segv ); + restore( SIGPIPE, &m_prevSignal.pipe ); + restore( SIGBUS, &m_prevSignal.bus ); + restore( SIGABRT, &m_prevSignal.abrt ); } #endif m_crashHandlerInstalled = false; @@ -1589,6 +1633,12 @@ Profiler::~Profiler() tracy_free( m_kcore ); #endif +#ifndef _WIN32 + close( m_pipe[0] ); + close( m_pipe[1] ); +#endif + tracy_free( m_safeSendBuffer ); + tracy_free( m_lz4Buf ); tracy_free( m_buffer ); LZ4_freeStream( (LZ4_stream_t*)m_stream ); @@ -2816,6 +2866,15 @@ Profiler::DequeueStatus Profiler::DequeueSerial() MemWrite( &item->memFree.time, dt ); break; } + case QueueType::MemDiscard: + case QueueType::MemDiscardCallstack: + { + int64_t t = MemRead( &item->memDiscard.time ); + int64_t dt = t - refSerial; + refSerial = t; + MemWrite( &item->memDiscard.time, dt ); + break; + } case QueueType::GpuZoneBeginSerial: case QueueType::GpuZoneBeginCallstackSerial: { @@ -3052,6 +3111,62 @@ bool Profiler::CommitData() return ret; } +char* Profiler::SafeCopyProlog( const char* data, size_t size ) +{ + bool success = true; + char* buf = m_safeSendBuffer; +#ifndef NDEBUG + assert( !m_inUse.exchange(true) ); +#endif + + if( size > SafeSendBufferSize ) buf = (char*)tracy_malloc( size ); + +#ifdef _WIN32 + __try + { + memcpy( buf, data, size ); + } + __except( 1 /*EXCEPTION_EXECUTE_HANDLER*/ ) + { + success = false; + } +#else + // Send through the pipe to ensure safe reads + for( size_t offset = 0; offset != size; /*in loop*/ ) + { + size_t sendsize = size - offset; + ssize_t result1, result2; + while( ( result1 = write( m_pipe[1], data + offset, sendsize ) ) < 0 && errno == EINTR ) { /* retry */ } + if( result1 < 0 ) + { + success = false; + break; + } + while( ( result2 = read( m_pipe[0], buf + offset, result1 ) ) < 0 && errno == EINTR ) { /* retry */ } + if( result2 != result1 ) + { + success = false; + break; + } + offset += result1; + } +#endif + + if( success ) return buf; + + SafeCopyEpilog( buf ); + return nullptr; +} + +void Profiler::SafeCopyEpilog( char* buf ) +{ + if( buf != m_safeSendBuffer ) tracy_free( buf ); + +#ifndef NDEBUG + m_inUse.store( false ); +#endif +} + bool Profiler::SendData( const char* data, size_t len ) { const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 ); @@ -3890,17 +4005,23 @@ void Profiler::ReportTopology() sprintf( path, "%s%i/topology/core_id", basePath, i ); f = fopen( path, "rb" ); - read = fread( buf, 1, 1024, f ); - buf[read] = '\0'; - fclose( f ); - cpuData[i].core = uint32_t( atoi( buf ) ); + if( f ) + { + read = fread( buf, 1, 1024, f ); + buf[read] = '\0'; + fclose( f ); + cpuData[i].core = uint32_t( atoi( buf ) ); + } sprintf( path, "%s%i/topology/die_id", basePath, i ); f = fopen( path, "rb" ); - read = fread( buf, 1, 1024, f ); - buf[read] = '\0'; - fclose( f ); - cpuData[i].die = uint32_t( atoi( buf ) ); + if( f ) + { + read = fread( buf, 1, 1024, f ); + buf[read] = '\0'; + fclose( f ); + cpuData[i].die = uint32_t( atoi( buf ) ); + } } for( int i=0; i 0 && tracy::has_callstack() ) { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginCallstack ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommitC( zoneBeginThread ); + tracy::GetProfiler().SendCallstack( depth ); + zoneQueue = tracy::QueueType::ZoneBeginCallstack; } + TracyQueuePrepareC( zoneQueue ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); + TracyQueueCommitC( zoneBeginThread ); + return ctx; } -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -4217,7 +4341,7 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int act return ctx; } -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ) +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ) { ___tracy_c_zone_context ctx; #ifdef TRACY_ON_DEMAND @@ -4240,13 +4364,17 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srclo TracyQueueCommitC( zoneValidationThread ); } #endif - tracy::GetProfiler().SendCallstack( depth ); + auto zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLoc; + if( depth > 0 && tracy::has_callstack() ) { - TracyQueuePrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack ); - tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); - tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommitC( zoneBeginThread ); + tracy::GetProfiler().SendCallstack( depth ); + zoneQueue = tracy::QueueType::ZoneBeginAllocSrcLocCallstack; } + TracyQueuePrepareC( zoneQueue ); + tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); + tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommitC( zoneBeginThread ); + return ctx; } @@ -4344,26 +4472,78 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) } } -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); } -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ) { tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ) { tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); + } + else + { + tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); + } + else + { + tracy::Profiler::MemFree( ptr, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ) { tracy::Profiler::MemDiscard( name, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemDiscardCallstack( name, secure != 0, depth ); + } + else + { + tracy::Profiler::MemDiscard( name, secure != 0 ); + } +} +TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ) { tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, secure != 0, name ); + } + else + { + tracy::Profiler::MemAllocNamed( ptr, size, secure != 0, name ); + } +} +TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ) { tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); } +TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ) +{ + if( depth > 0 && tracy::has_callstack() ) + { + tracy::Profiler::MemFreeCallstackNamed( ptr, depth, secure != 0, name ); + } + else + { + tracy::Profiler::MemFreeNamed( ptr, secure != 0, name ); + } +} TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); } +TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip != 0 ); } TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_float( const char* name, float val ) { tracy::Profiler::PlotData( name, val ); } TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ) { tracy::Profiler::PlotData( name, val ); } -TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step, fill, color ); } -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); } -TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); } -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); } -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } +TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ) { tracy::Profiler::ConfigurePlot( name, tracy::PlotFormatType(type), step != 0, fill != 0, color ); } +TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ) { tracy::Profiler::Message( txt, size, callstack_depth ); } +TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ) { tracy::Profiler::Message( txt, callstack_depth ); } +TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, size, color, callstack_depth ); } +TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ) { tracy::Profiler::MessageColor( txt, color, callstack_depth ); } TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ) { @@ -4661,7 +4841,7 @@ TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_ tracy::tracy_free((void*)lockdata); } -TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) +TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) { #ifdef TRACY_ON_DEMAND bool queue = false; @@ -4673,7 +4853,7 @@ TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context if( active != connected ) lockdata->m_active.store( connected, std::memory_order_relaxed ); if( connected ) queue = true; } - if( !queue ) return false; + if( !queue ) return static_cast(false); #endif auto item = tracy::Profiler::QueueSerial(); @@ -4682,7 +4862,7 @@ TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context tracy::MemWrite( &item->lockWait.id, lockdata->m_id ); tracy::MemWrite( &item->lockWait.time, tracy::Profiler::GetTime() ); tracy::Profiler::QueueSerialFinish(); - return true; + return static_cast(true); } TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ) @@ -4714,7 +4894,7 @@ TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_conte tracy::Profiler::QueueSerialFinish(); } -TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int acquired ) +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ) { #ifdef TRACY_ON_DEMAND if( !acquired ) return; @@ -4779,9 +4959,9 @@ TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_contex tracy::Profiler::QueueSerialFinish(); } -TRACY_API int ___tracy_connected( void ) +TRACY_API int32_t ___tracy_connected( void ) { - return tracy::GetProfiler().IsConnected(); + return static_cast( tracy::GetProfiler().IsConnected() ); } #ifdef TRACY_FIBERS @@ -4789,7 +4969,7 @@ TRACY_API void ___tracy_fiber_enter( const char* fiber ){ tracy::Profiler::Enter TRACY_API void ___tracy_fiber_leave( void ){ tracy::Profiler::LeaveFiber(); } #endif -# ifdef TRACY_MANUAL_LIFETIME +# if defined TRACY_MANUAL_LIFETIME && defined TRACY_DELAYED_INIT TRACY_API void ___tracy_startup_profiler( void ) { tracy::StartupProfiler(); @@ -4800,9 +4980,9 @@ TRACY_API void ___tracy_shutdown_profiler( void ) tracy::ShutdownProfiler(); } -TRACY_API int ___tracy_profiler_started( void ) +TRACY_API int32_t ___tracy_profiler_started( void ) { - return tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ); + return static_cast( tracy::s_isProfilerStarted.load( std::memory_order_seq_cst ) ); } # endif diff --git a/project/thirdparty/tracy-0.11.1/client/TracyProfiler.hpp b/project/thirdparty/tracy-0.12.0/client/TracyProfiler.hpp similarity index 81% rename from project/thirdparty/tracy-0.11.1/client/TracyProfiler.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyProfiler.hpp index 46f11f3d0..8d1690586 100644 --- a/project/thirdparty/tracy-0.11.1/client/TracyProfiler.hpp +++ b/project/thirdparty/tracy-0.12.0/client/TracyProfiler.hpp @@ -114,11 +114,11 @@ struct LuaZoneState #define TracyLfqPrepare( _type ) \ - moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ - auto __token = GetToken(); \ + tracy::moodycamel::ConcurrentQueueDefaultTraits::index_t __magic; \ + auto __token = tracy::GetToken(); \ auto& __tail = __token->get_tail_index(); \ auto item = __token->enqueue_begin( __magic ); \ - MemWrite( &item->hdr.type, _type ); + tracy::MemWrite( &item->hdr.type, _type ); #define TracyLfqCommit \ __tail.store( __magic + 1, std::memory_order_release ); @@ -136,11 +136,11 @@ struct LuaZoneState #ifdef TRACY_FIBERS # define TracyQueuePrepare( _type ) \ - auto item = Profiler::QueueSerial(); \ - MemWrite( &item->hdr.type, _type ); + auto item = tracy::Profiler::QueueSerial(); \ + tracy::MemWrite( &item->hdr.type, _type ); # define TracyQueueCommit( _name ) \ - MemWrite( &item->_name.thread, GetThreadHandle() ); \ - Profiler::QueueSerialFinish(); + tracy::MemWrite( &item->_name.thread, tracy::GetThreadHandle() ); \ + tracy::Profiler::QueueSerialFinish(); # define TracyQueuePrepareC( _type ) \ auto item = tracy::Profiler::QueueSerial(); \ tracy::MemWrite( &item->hdr.type, _type ); @@ -387,58 +387,58 @@ class Profiler TracyLfqCommit; } - static tracy_force_inline void Message( const char* txt, size_t size, int callstack ) + static tracy_force_inline void Message( const char* txt, size_t size, int32_t callstack_depth ) { assert( size < (std::numeric_limits::max)() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - TracyQueuePrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::Message : QueueType::MessageCallstack ); MemWrite( &item->messageFat.time, GetTime() ); MemWrite( &item->messageFat.text, (uint64_t)ptr ); MemWrite( &item->messageFat.size, (uint16_t)size ); TracyQueueCommit( messageFatThread ); } - static tracy_force_inline void Message( const char* txt, int callstack ) + static tracy_force_inline void Message( const char* txt, int32_t callstack_depth ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } - TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); MemWrite( &item->messageLiteral.time, GetTime() ); MemWrite( &item->messageLiteral.text, (uint64_t)txt ); TracyQueueCommit( messageLiteralThread ); } - static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack ) + static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ) { assert( size < (std::numeric_limits::max)() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - TracyQueuePrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); MemWrite( &item->messageColorFat.time, GetTime() ); MemWrite( &item->messageColorFat.text, (uint64_t)ptr ); MemWrite( &item->messageColorFat.b, uint8_t( ( color ) & 0xFF ) ); @@ -448,17 +448,17 @@ class Profiler TracyQueueCommit( messageColorFatThread ); } - static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack ) + static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int32_t callstack_depth ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - if( callstack != 0 ) + if( callstack_depth != 0 && has_callstack() ) { - tracy::GetProfiler().SendCallstack( callstack ); + tracy::GetProfiler().SendCallstack( callstack_depth ); } - TracyQueuePrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); + TracyQueuePrepare( callstack_depth == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); MemWrite( &item->messageColorLiteral.time, GetTime() ); MemWrite( &item->messageColorLiteral.text, (uint64_t)txt ); MemWrite( &item->messageColorLiteral.b, uint8_t( ( color ) & 0xFF ) ); @@ -510,29 +510,31 @@ class Profiler GetProfiler().m_serialLock.unlock(); } - static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure ) + static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int32_t depth, bool secure ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - MemAlloc( ptr, size, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); + profiler.m_serialLock.unlock(); + } + else + { + MemAlloc( ptr, size, secure ); + } } - static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure ) + static tracy_force_inline void MemFreeCallstack( const void* ptr, int32_t depth, bool secure ) { if( secure && !ProfilerAvailable() ) return; if( !ProfilerAllocatorAvailable() ) @@ -540,23 +542,25 @@ class Profiler MemFree( ptr, secure ); return; } -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - MemFree( ptr, secure ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); + profiler.m_serialLock.unlock(); + } + else + { + MemFree( ptr, secure ); + } } static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name ) @@ -587,62 +591,101 @@ class Profiler GetProfiler().m_serialLock.unlock(); } - static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ) + static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int32_t depth, bool secure, const char* name ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemName( name ); - SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - MemAllocNamed( ptr, size, secure, name ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemName( name ); + SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size ); + profiler.m_serialLock.unlock(); + } + else + { + MemAllocNamed( ptr, size, secure, name ); + } } - static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ) + static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int32_t depth, bool secure, const char* name ) { if( secure && !ProfilerAvailable() ) return; -#ifdef TRACY_HAS_CALLSTACK - auto& profiler = GetProfiler(); + if( depth > 0 && has_callstack() ) + { + auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND - if( !profiler.IsConnected() ) return; + if( !profiler.IsConnected() ) return; # endif - const auto thread = GetThreadHandle(); + const auto thread = GetThreadHandle(); - auto callstack = Callstack( depth ); + auto callstack = Callstack( depth ); - profiler.m_serialLock.lock(); - SendCallstackSerial( callstack ); - SendMemName( name ); - SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr ); - profiler.m_serialLock.unlock(); -#else - static_cast(depth); // unused - MemFreeNamed( ptr, secure, name ); -#endif + profiler.m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemName( name ); + SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr ); + profiler.m_serialLock.unlock(); + } + else + { + MemFreeNamed( ptr, secure, name ); + } } - static tracy_force_inline void SendCallstack( int depth ) + static tracy_force_inline void MemDiscard( const char* name, bool secure ) { -#ifdef TRACY_HAS_CALLSTACK - auto ptr = Callstack( depth ); - TracyQueuePrepare( QueueType::Callstack ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - TracyQueueCommit( callstackFatThread ); -#else - static_cast(depth); // unused + if( secure && !ProfilerAvailable() ) return; +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; #endif + const auto thread = GetThreadHandle(); + + GetProfiler().m_serialLock.lock(); + SendMemDiscard( QueueType::MemDiscard, thread, name ); + GetProfiler().m_serialLock.unlock(); + } + + static tracy_force_inline void MemDiscardCallstack( const char* name, bool secure, int32_t depth ) + { + if( secure && !ProfilerAvailable() ) return; + if( depth > 0 && has_callstack() ) + { +# ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +# endif + const auto thread = GetThreadHandle(); + + auto callstack = Callstack( depth ); + + GetProfiler().m_serialLock.lock(); + SendCallstackSerial( callstack ); + SendMemDiscard( QueueType::MemDiscard, thread, name ); + GetProfiler().m_serialLock.unlock(); + } + else + { + MemDiscard( name, secure ); + } + } + + static tracy_force_inline void SendCallstack( int32_t depth ) + { + if( depth > 0 && has_callstack() ) + { + auto ptr = Callstack( depth ); + TracyQueuePrepare( QueueType::Callstack ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); + TracyQueueCommit( callstackFatThread ); + } } static tracy_force_inline void ParameterRegister( ParameterCallback cb, void* data ) @@ -692,7 +735,7 @@ class Profiler } #endif - void SendCallstack( int depth, const char* skipBefore ); + void SendCallstack( int32_t depth, const char* skipBefore ); static void CutCallstack( void* callstack, const char* skipBefore ); static bool ShouldExit(); @@ -800,7 +843,7 @@ class Profiler void InstallCrashHandler(); void RemoveCrashHandler(); - + void ClearQueues( tracy::moodycamel::ConsumerToken& token ); void ClearSerial(); DequeueStatus Dequeue( tracy::moodycamel::ConsumerToken& token ); @@ -833,6 +876,21 @@ class Profiler m_bufferOffset += int( len ); } + char* SafeCopyProlog( const char* p, size_t size ); + void SafeCopyEpilog( char* buf ); + + template // must be void( const char* buf, size_t size ) + bool WithSafeCopy( const char* p, size_t size, Callable&& callable ) + { + if( char* buf = SafeCopyProlog( p, size ) ) + { + callable( buf, size ); + SafeCopyEpilog( buf ); + return true; + } + return false; + } + bool SendData( const char* data, size_t len ); void SendLongString( uint64_t ptr, const char* str, size_t len, QueueType type ); void SendSourceLocation( uint64_t ptr ); @@ -862,14 +920,13 @@ class Profiler static tracy_force_inline void SendCallstackSerial( void* ptr ) { -#ifdef TRACY_HAS_CALLSTACK - auto item = GetProfiler().m_serialQueue.prepare_next(); - MemWrite( &item->hdr.type, QueueType::CallstackSerial ); - MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); - GetProfiler().m_serialQueue.commit_next(); -#else - static_cast(ptr); // unused -#endif + if( has_callstack() ) + { + auto item = GetProfiler().m_serialQueue.prepare_next(); + MemWrite( &item->hdr.type, QueueType::CallstackSerial ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); + GetProfiler().m_serialQueue.commit_next(); + } } static tracy_force_inline void SendMemAlloc( QueueType type, const uint32_t thread, const void* ptr, size_t size ) @@ -907,6 +964,18 @@ class Profiler GetProfiler().m_serialQueue.commit_next(); } + static tracy_force_inline void SendMemDiscard( QueueType type, const uint32_t thread, const char* name ) + { + assert( type == QueueType::MemDiscard || type == QueueType::MemDiscardCallstack ); + + auto item = GetProfiler().m_serialQueue.prepare_next(); + MemWrite( &item->hdr.type, type ); + MemWrite( &item->memDiscard.time, GetTime() ); + MemWrite( &item->memDiscard.thread, thread ); + MemWrite( &item->memDiscard.name, (uint64_t)name ); + GetProfiler().m_serialQueue.commit_next(); + } + static tracy_force_inline void SendMemName( const char* name ) { assert( name ); @@ -990,9 +1059,19 @@ class Profiler char* m_queryData; char* m_queryDataPtr; +#ifndef NDEBUG + // m_safeSendBuffer and m_pipe should only be used by the Tracy Profiler thread; this ensures that in debug builds. + std::atomic_bool m_inUse{ false }; +#endif + char* m_safeSendBuffer; + #if defined _WIN32 - void* m_exceptionHandler; + void* m_prevHandler; +#else + int m_pipe[2]; + int m_pipeBufSize; #endif + #ifdef __linux__ struct { struct sigaction pwr, ill, fpe, segv, pipe, bus, abrt; diff --git a/project/thirdparty/tracy-0.11.1/client/TracyRingBuffer.hpp b/project/thirdparty/tracy-0.12.0/client/TracyRingBuffer.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyRingBuffer.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyRingBuffer.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyScoped.hpp b/project/thirdparty/tracy-0.12.0/client/TracyScoped.hpp similarity index 71% rename from project/thirdparty/tracy-0.11.1/client/TracyScoped.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyScoped.hpp index 8e81c998f..7f9256d8c 100644 --- a/project/thirdparty/tracy-0.11.1/client/TracyScoped.hpp +++ b/project/thirdparty/tracy-0.12.0/client/TracyScoped.hpp @@ -10,6 +10,7 @@ #include "../common/TracyAlign.hpp" #include "../common/TracyAlloc.hpp" #include "TracyProfiler.hpp" +#include "TracyCallstack.hpp" namespace tracy { @@ -22,7 +23,7 @@ class ScopedZone ScopedZone& operator=( const ScopedZone& ) = delete; ScopedZone& operator=( ScopedZone&& ) = delete; - tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true ) + tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int32_t depth = -1, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -33,13 +34,19 @@ class ScopedZone #ifdef TRACY_ON_DEMAND m_connectionId = GetProfiler().ConnectionId(); #endif - TracyQueuePrepare( QueueType::ZoneBegin ); + auto zoneQueue = QueueType::ZoneBegin; + if( depth > 0 && has_callstack() ) + { + GetProfiler().SendCallstack( depth ); + zoneQueue = QueueType::ZoneBeginCallstack; + } + TracyQueuePrepare( zoneQueue ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); TracyQueueCommit( zoneBeginThread ); } - tracy_force_inline ScopedZone( const SourceLocationData* srcloc, int depth, bool is_active = true ) + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int32_t depth = -1, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -50,55 +57,21 @@ class ScopedZone #ifdef TRACY_ON_DEMAND m_connectionId = GetProfiler().ConnectionId(); #endif - GetProfiler().SendCallstack( depth ); - - TracyQueuePrepare( QueueType::ZoneBeginCallstack ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); - TracyQueueCommit( zoneBeginThread ); - } - - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, bool is_active = true ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - m_connectionId = GetProfiler().ConnectionId(); -#endif - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); - MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); - MemWrite( &item->zoneBegin.srcloc, srcloc ); - TracyQueueCommit( zoneBeginThread ); - } - - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, static_cast(0), is_active ) {} - - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color, int depth, bool is_active = true ) -#ifdef TRACY_ON_DEMAND - : m_active( is_active && GetProfiler().IsConnected() ) -#else - : m_active( is_active ) -#endif - { - if( !m_active ) return; -#ifdef TRACY_ON_DEMAND - m_connectionId = GetProfiler().ConnectionId(); -#endif - GetProfiler().SendCallstack( depth ); - - TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); - const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); + auto zoneQueue = QueueType::ZoneBeginAllocSrcLoc; + if( depth > 0 && has_callstack() ) + { + GetProfiler().SendCallstack( depth ); + zoneQueue = QueueType::ZoneBeginAllocSrcLocCallstack; + } + TracyQueuePrepare( zoneQueue ); + const auto srcloc = + Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz, color ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, srcloc ); TracyQueueCommit( zoneBeginThread ); } - tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {} + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active = true ) : ScopedZone( line, source, sourceSz, function, functionSz, name, nameSz, 0, depth, is_active ) {} tracy_force_inline ~ScopedZone() { diff --git a/project/thirdparty/tracy-0.11.1/client/TracyStringHelpers.hpp b/project/thirdparty/tracy-0.12.0/client/TracyStringHelpers.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyStringHelpers.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyStringHelpers.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracySysPower.cpp b/project/thirdparty/tracy-0.12.0/client/TracySysPower.cpp similarity index 98% rename from project/thirdparty/tracy-0.11.1/client/TracySysPower.cpp rename to project/thirdparty/tracy-0.12.0/client/TracySysPower.cpp index bd5939da2..6ad1d6478 100644 --- a/project/thirdparty/tracy-0.11.1/client/TracySysPower.cpp +++ b/project/thirdparty/tracy-0.12.0/client/TracySysPower.cpp @@ -85,7 +85,7 @@ void SysPower::ScanDirectory( const char* path, int parent ) FILE* f = fopen( tmp, "r" ); if( f ) { - fscanf( f, "%" PRIu64, &maxRange ); + (void)fscanf( f, "%" PRIu64, &maxRange ); fclose( f ); } } diff --git a/project/thirdparty/tracy-0.11.1/client/TracySysPower.hpp b/project/thirdparty/tracy-0.12.0/client/TracySysPower.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracySysPower.hpp rename to project/thirdparty/tracy-0.12.0/client/TracySysPower.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracySysTime.cpp b/project/thirdparty/tracy-0.12.0/client/TracySysTime.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracySysTime.cpp rename to project/thirdparty/tracy-0.12.0/client/TracySysTime.cpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracySysTime.hpp b/project/thirdparty/tracy-0.12.0/client/TracySysTime.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracySysTime.hpp rename to project/thirdparty/tracy-0.12.0/client/TracySysTime.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracySysTrace.cpp b/project/thirdparty/tracy-0.12.0/client/TracySysTrace.cpp similarity index 91% rename from project/thirdparty/tracy-0.11.1/client/TracySysTrace.cpp rename to project/thirdparty/tracy-0.12.0/client/TracySysTrace.cpp index 0fd1d0ac5..8e7f6139b 100644 --- a/project/thirdparty/tracy-0.11.1/client/TracySysTrace.cpp +++ b/project/thirdparty/tracy-0.12.0/client/TracySysTrace.cpp @@ -173,8 +173,11 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record ) MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId ); MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId ); MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber ); - MemWrite( &item->contextSwitch.reason, cswitch->oldThreadWaitReason ); - MemWrite( &item->contextSwitch.state, cswitch->oldThreadState ); + MemWrite( &item->contextSwitch.oldThreadWaitReason, cswitch->oldThreadWaitReason ); + MemWrite( &item->contextSwitch.oldThreadState, cswitch->oldThreadState ); + MemWrite( &item->contextSwitch.newThreadPriority, cswitch->newThreadPriority ); + MemWrite( &item->contextSwitch.oldThreadPriority, cswitch->oldThreadPriority ); + MemWrite( &item->contextSwitch.previousCState, cswitch->previousCState ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == 50 ) @@ -183,7 +186,10 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record ) TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart ); + MemWrite( &item->threadWakeup.cpu, record->BufferContext.ProcessorNumber ); MemWrite( &item->threadWakeup.thread, rt->threadId ); + MemWrite( &item->threadWakeup.adjustReason, rt->adjustReason ); + MemWrite( &item->threadWakeup.adjustIncrement, rt->adjustIncrement ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == 1 || hdr.EventDescriptor.Opcode == 3 ) @@ -498,11 +504,11 @@ void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const ch if( _GetThreadDescription ) { PWSTR tmp; - _GetThreadDescription( hnd, &tmp ); - char buf[256]; - if( tmp ) + if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) ) { + char buf[256]; auto ret = wcstombs( buf, tmp, 256 ); + LocalFree(tmp); if( ret != 0 ) { threadName = CopyString( buf, ret ); @@ -678,7 +684,7 @@ enum TraceEventId EventBranchMiss, EventVsync, EventContextSwitch, - EventWakeup, + EventWaking, }; static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid ) @@ -767,16 +773,16 @@ bool SysTraceStart( int64_t& samplingPeriod ) TracyDebug( "perf_event_paranoid: %i\n", paranoidLevel ); #endif - int switchId = -1, wakeupId = -1, vsyncId = -1; + int switchId = -1, wakingId = -1, vsyncId = -1; const auto switchIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_switch/id" ); if( switchIdStr ) switchId = atoi( switchIdStr ); - const auto wakeupIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_wakeup/id" ); - if( wakeupIdStr ) wakeupId = atoi( wakeupIdStr ); + const auto wakingIdStr = ReadFile( "/sys/kernel/debug/tracing/events/sched/sched_waking/id" ); + if( wakingIdStr ) wakingId = atoi( wakingIdStr ); const auto vsyncIdStr = ReadFile( "/sys/kernel/debug/tracing/events/drm/drm_vblank_event/id" ); if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr ); TracyDebug( "sched_switch id: %i\n", switchId ); - TracyDebug( "sched_wakeup id: %i\n", wakeupId ); + TracyDebug( "sched_waking id: %i\n", wakingId ); TracyDebug( "drm_vblank_event id: %i\n", vsyncId ); #ifdef TRACY_NO_SAMPLING @@ -831,7 +837,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) 2 + // CPU cycles + instructions retired 2 + // cache reference + miss 2 + // branch retired + miss - 2 + // context switches + wakeups + 2 + // context switches + waking ups 1 // vsync ); s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers ); @@ -1076,18 +1082,31 @@ bool SysTraceStart( int64_t& samplingPeriod ) } } - if( wakeupId != -1 ) + if( wakingId != -1 ) { - pe.config = wakeupId; - pe.config &= ~PERF_SAMPLE_CALLCHAIN; + pe = {}; + pe.type = PERF_TYPE_TRACEPOINT; + pe.size = sizeof( perf_event_attr ); + pe.sample_period = 1; + pe.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_RAW; + // Coult ask for callstack here + //pe.sample_type |= PERF_SAMPLE_CALLCHAIN; + pe.disabled = 1; + pe.inherit = 1; + pe.config = wakingId; + pe.read_format = 0; +#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) + pe.use_clockid = 1; + pe.clockid = CLOCK_MONOTONIC_RAW; +#endif - TracyDebug( "Setup wakeup capture\n" ); + TracyDebug( "Setup waking up capture\n" ); for( int i=0; i 0 ) { + // Find the earliest event from the active buffers int sel = -1; int selPos; int64_t t0 = std::numeric_limits::max(); @@ -1369,6 +1389,7 @@ void SysTraceWorker( void* ptr ) } } } + // Found any event if( sel >= 0 ) { auto& ring = ringArray[ctxBufferIdx + sel]; @@ -1384,10 +1405,10 @@ void SysTraceWorker( void* ptr ) const auto rid = ring.GetId(); if( rid == EventContextSwitch ) { - // Layout: - // u64 time - // u64 cnt - // u64 ip[cnt] + // Layout: See /sys/kernel/debug/tracing/events/sched/sched_switch/format + // u64 time // PERF_SAMPLE_TIME + // u64 cnt // PERF_SAMPLE_CALLCHAIN + // u64 ip[cnt] // PERF_SAMPLE_CALLCHAIN // u32 size // u8 data[size] // Data (not ABI stable, but has not changed since it was added, in 2009): @@ -1408,35 +1429,43 @@ void SysTraceWorker( void* ptr ) const auto traceOffset = offset; offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16; - uint32_t prev_pid, next_pid; + uint32_t prev_pid, prev_prio; + uint32_t next_pid, next_prio; long prev_state; ring.Read( &prev_pid, offset, sizeof( uint32_t ) ); - offset += sizeof( uint32_t ) + sizeof( uint32_t ); + offset += sizeof( uint32_t ); + ring.Read( &prev_prio, offset, sizeof( uint32_t ) ); + offset += sizeof( uint32_t ); ring.Read( &prev_state, offset, sizeof( long ) ); offset += sizeof( long ) + 16; ring.Read( &next_pid, offset, sizeof( uint32_t ) ); - - uint8_t reason = 100; - uint8_t state; - - if( prev_state & 0x0001 ) state = 104; - else if( prev_state & 0x0002 ) state = 101; - else if( prev_state & 0x0004 ) state = 105; - else if( prev_state & 0x0008 ) state = 106; - else if( prev_state & 0x0010 ) state = 108; - else if( prev_state & 0x0020 ) state = 109; - else if( prev_state & 0x0040 ) state = 110; - else if( prev_state & 0x0080 ) state = 102; - else state = 103; + offset += sizeof( uint32_t ); + ring.Read( &next_prio, offset, sizeof( uint32_t ) ); + + uint8_t oldThreadWaitReason = 100; + uint8_t oldThreadState; + + if( prev_state & 0x0001 ) oldThreadState = 104; + else if( prev_state & 0x0002 ) oldThreadState = 101; + else if( prev_state & 0x0004 ) oldThreadState = 105; + else if( prev_state & 0x0008 ) oldThreadState = 106; + else if( prev_state & 0x0010 ) oldThreadState = 108; + else if( prev_state & 0x0020 ) oldThreadState = 109; + else if( prev_state & 0x0040 ) oldThreadState = 110; + else if( prev_state & 0x0080 ) oldThreadState = 102; + else oldThreadState = 103; TracyLfqPrepare( QueueType::ContextSwitch ); MemWrite( &item->contextSwitch.time, t0 ); MemWrite( &item->contextSwitch.oldThread, prev_pid ); MemWrite( &item->contextSwitch.newThread, next_pid ); MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) ); - MemWrite( &item->contextSwitch.reason, reason ); - MemWrite( &item->contextSwitch.state, state ); + MemWrite( &item->contextSwitch.oldThreadWaitReason, oldThreadWaitReason ); + MemWrite( &item->contextSwitch.oldThreadState, oldThreadState ); + MemWrite( &item->contextSwitch.previousCState, uint8_t( 0 ) ); + MemWrite( &item->contextSwitch.newThreadPriority, int8_t( next_prio ) ); + MemWrite( &item->contextSwitch.oldThreadPriority, int8_t( prev_prio ) ); TracyLfqCommit; if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) ) @@ -1450,27 +1479,33 @@ void SysTraceWorker( void* ptr ) TracyLfqCommit; } } - else if( rid == EventWakeup ) + else if( rid == EventWaking) { + // See /sys/kernel/debug/tracing/events/sched/sched_waking/format // Layout: - // u64 time + // u64 time // PERF_SAMPLE_TIME // u32 size // u8 data[size] // Data: // u8 hdr[8] // u8 comm[16] // u32 pid - // u32 prio - // u64 target_cpu - - offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8 + 16; - + // i32 prio + // i32 target_cpu + const uint32_t dataOffset = sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ); + offset += dataOffset + 8 + 16; uint32_t pid; ring.Read( &pid, offset, sizeof( uint32_t ) ); - + TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, t0 ); MemWrite( &item->threadWakeup.thread, pid ); + MemWrite( &item->threadWakeup.cpu, (uint8_t)ring.GetCpu() ); + + int8_t adjustReason = -1; // Does not exist on Linux + int8_t adjustIncrement = 0; // Should perhaps store the new prio? + MemWrite( &item->threadWakeup.adjustReason, adjustReason ); + MemWrite( &item->threadWakeup.adjustIncrement, adjustIncrement ); TracyLfqCommit; } else diff --git a/project/thirdparty/tracy-0.11.1/client/TracySysTrace.hpp b/project/thirdparty/tracy-0.12.0/client/TracySysTrace.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracySysTrace.hpp rename to project/thirdparty/tracy-0.12.0/client/TracySysTrace.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/TracyThread.hpp b/project/thirdparty/tracy-0.12.0/client/TracyThread.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/TracyThread.hpp rename to project/thirdparty/tracy-0.12.0/client/TracyThread.hpp diff --git a/project/thirdparty/tracy-0.11.1/client/tracy_SPSCQueue.h b/project/thirdparty/tracy-0.12.0/client/tracy_SPSCQueue.h similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/tracy_SPSCQueue.h rename to project/thirdparty/tracy-0.12.0/client/tracy_SPSCQueue.h diff --git a/project/thirdparty/tracy-0.11.1/client/tracy_concurrentqueue.h b/project/thirdparty/tracy-0.12.0/client/tracy_concurrentqueue.h similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/tracy_concurrentqueue.h rename to project/thirdparty/tracy-0.12.0/client/tracy_concurrentqueue.h diff --git a/project/thirdparty/tracy-0.11.1/client/tracy_rpmalloc.cpp b/project/thirdparty/tracy-0.12.0/client/tracy_rpmalloc.cpp similarity index 99% rename from project/thirdparty/tracy-0.11.1/client/tracy_rpmalloc.cpp rename to project/thirdparty/tracy-0.12.0/client/tracy_rpmalloc.cpp index 4a0d0b400..315a40f96 100644 --- a/project/thirdparty/tracy-0.11.1/client/tracy_rpmalloc.cpp +++ b/project/thirdparty/tracy-0.12.0/client/tracy_rpmalloc.cpp @@ -690,7 +690,9 @@ static pthread_key_t _memory_thread_heap; # define _Thread_local __declspec(thread) # define TLS_MODEL # else -# ifndef __HAIKU__ +# if defined(__ANDROID__) && __ANDROID_API__ >= 29 && defined(__NDK_MAJOR__) && __NDK_MAJOR__ >= 26 +# define TLS_MODEL __attribute__((tls_model("local-dynamic"))) +# elif !defined(__HAIKU__) # define TLS_MODEL __attribute__((tls_model("initial-exec"))) # else # define TLS_MODEL diff --git a/project/thirdparty/tracy-0.11.1/client/tracy_rpmalloc.hpp b/project/thirdparty/tracy-0.12.0/client/tracy_rpmalloc.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/client/tracy_rpmalloc.hpp rename to project/thirdparty/tracy-0.12.0/client/tracy_rpmalloc.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyAlign.hpp b/project/thirdparty/tracy-0.12.0/common/TracyAlign.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyAlign.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyAlign.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyAlloc.hpp b/project/thirdparty/tracy-0.12.0/common/TracyAlloc.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyAlloc.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyAlloc.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyApi.h b/project/thirdparty/tracy-0.12.0/common/TracyApi.h similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyApi.h rename to project/thirdparty/tracy-0.12.0/common/TracyApi.h diff --git a/project/thirdparty/tracy-0.11.1/common/TracyColor.hpp b/project/thirdparty/tracy-0.12.0/common/TracyColor.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyColor.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyColor.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyForceInline.hpp b/project/thirdparty/tracy-0.12.0/common/TracyForceInline.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyForceInline.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyForceInline.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyMutex.hpp b/project/thirdparty/tracy-0.12.0/common/TracyMutex.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyMutex.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyMutex.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyProtocol.hpp b/project/thirdparty/tracy-0.12.0/common/TracyProtocol.hpp similarity index 98% rename from project/thirdparty/tracy-0.11.1/common/TracyProtocol.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyProtocol.hpp index 54124586a..40cf5e673 100644 --- a/project/thirdparty/tracy-0.11.1/common/TracyProtocol.hpp +++ b/project/thirdparty/tracy-0.12.0/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 69 }; +enum : uint32_t { ProtocolVersion = 74 }; enum : uint16_t { BroadcastVersion = 3 }; using lz4sz_t = uint32_t; diff --git a/project/thirdparty/tracy-0.11.1/common/TracyQueue.hpp b/project/thirdparty/tracy-0.12.0/common/TracyQueue.hpp similarity index 97% rename from project/thirdparty/tracy-0.11.1/common/TracyQueue.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyQueue.hpp index affbd67ab..daef3ec1b 100644 --- a/project/thirdparty/tracy-0.11.1/common/TracyQueue.hpp +++ b/project/thirdparty/tracy-0.12.0/common/TracyQueue.hpp @@ -42,6 +42,8 @@ enum class QueueType : uint8_t MemAllocCallstackNamed, MemFreeCallstack, MemFreeCallstackNamed, + MemDiscard, + MemDiscardCallstack, GpuZoneBegin, GpuZoneBeginCallstack, GpuZoneBeginAllocSrcLoc, @@ -401,7 +403,10 @@ enum class GpuContextType : uint8_t Vulkan, OpenCL, Direct3D12, - Direct3D11 + Direct3D11, + Metal, + Custom, + CUDA }; enum GpuContextFlags : uint8_t @@ -500,6 +505,13 @@ struct QueueMemFree uint64_t ptr; }; +struct QueueMemDiscard +{ + int64_t time; + uint32_t thread; + uint64_t name; +}; + struct QueueCallstackFat { uint64_t ptr; @@ -593,14 +605,20 @@ struct QueueContextSwitch uint32_t oldThread; uint32_t newThread; uint8_t cpu; - uint8_t reason; - uint8_t state; + uint8_t oldThreadWaitReason; + uint8_t oldThreadState; + uint8_t previousCState; + int8_t newThreadPriority; + int8_t oldThreadPriority; }; struct QueueThreadWakeup { int64_t time; uint32_t thread; + uint8_t cpu; + int8_t adjustReason; + int8_t adjustIncrement; }; struct QueueTidToPid @@ -740,6 +758,7 @@ struct QueueItem QueueGpuContextNameFat gpuContextNameFat; QueueMemAlloc memAlloc; QueueMemFree memFree; + QueueMemDiscard memDiscard; QueueMemNamePayload memName; QueueThreadGroupHint threadGroupHint; QueueCallstackFat callstackFat; @@ -811,6 +830,8 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named + sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), + sizeof( QueueHeader ) + sizeof( QueueMemDiscard ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBeginLean ),// allocated source location diff --git a/project/thirdparty/tracy-0.11.1/common/TracySocket.cpp b/project/thirdparty/tracy-0.12.0/common/TracySocket.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracySocket.cpp rename to project/thirdparty/tracy-0.12.0/common/TracySocket.cpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracySocket.hpp b/project/thirdparty/tracy-0.12.0/common/TracySocket.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracySocket.hpp rename to project/thirdparty/tracy-0.12.0/common/TracySocket.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyStackFrames.cpp b/project/thirdparty/tracy-0.12.0/common/TracyStackFrames.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyStackFrames.cpp rename to project/thirdparty/tracy-0.12.0/common/TracyStackFrames.cpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyStackFrames.hpp b/project/thirdparty/tracy-0.12.0/common/TracyStackFrames.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyStackFrames.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyStackFrames.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracySystem.cpp b/project/thirdparty/tracy-0.12.0/common/TracySystem.cpp similarity index 99% rename from project/thirdparty/tracy-0.11.1/common/TracySystem.cpp rename to project/thirdparty/tracy-0.12.0/common/TracySystem.cpp index d51f5d65b..a92a34578 100644 --- a/project/thirdparty/tracy-0.11.1/common/TracySystem.cpp +++ b/project/thirdparty/tracy-0.12.0/common/TracySystem.cpp @@ -26,7 +26,9 @@ # include #elif defined __FreeBSD__ # include -#elif defined __NetBSD__ || defined __DragonFly__ +#elif defined __NetBSD__ +# include +#elif defined __DragonFly__ # include #elif defined __QNX__ # include diff --git a/project/thirdparty/tracy-0.11.1/common/TracySystem.hpp b/project/thirdparty/tracy-0.12.0/common/TracySystem.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracySystem.hpp rename to project/thirdparty/tracy-0.12.0/common/TracySystem.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyUwp.hpp b/project/thirdparty/tracy-0.12.0/common/TracyUwp.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyUwp.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyUwp.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/TracyVersion.hpp b/project/thirdparty/tracy-0.12.0/common/TracyVersion.hpp similarity index 75% rename from project/thirdparty/tracy-0.11.1/common/TracyVersion.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyVersion.hpp index 12642d652..f1e3c0b2c 100644 --- a/project/thirdparty/tracy-0.11.1/common/TracyVersion.hpp +++ b/project/thirdparty/tracy-0.12.0/common/TracyVersion.hpp @@ -6,8 +6,8 @@ namespace tracy namespace Version { enum { Major = 0 }; -enum { Minor = 11 }; -enum { Patch = 2 }; +enum { Minor = 12 }; +enum { Patch = 0 }; } } diff --git a/project/thirdparty/tracy-0.11.1/common/TracyYield.hpp b/project/thirdparty/tracy-0.12.0/common/TracyYield.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/TracyYield.hpp rename to project/thirdparty/tracy-0.12.0/common/TracyYield.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/tracy_lz4.cpp b/project/thirdparty/tracy-0.12.0/common/tracy_lz4.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/tracy_lz4.cpp rename to project/thirdparty/tracy-0.12.0/common/tracy_lz4.cpp diff --git a/project/thirdparty/tracy-0.11.1/common/tracy_lz4.hpp b/project/thirdparty/tracy-0.12.0/common/tracy_lz4.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/tracy_lz4.hpp rename to project/thirdparty/tracy-0.12.0/common/tracy_lz4.hpp diff --git a/project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.cpp b/project/thirdparty/tracy-0.12.0/common/tracy_lz4hc.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.cpp rename to project/thirdparty/tracy-0.12.0/common/tracy_lz4hc.cpp diff --git a/project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.hpp b/project/thirdparty/tracy-0.12.0/common/tracy_lz4hc.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.hpp rename to project/thirdparty/tracy-0.12.0/common/tracy_lz4hc.hpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/LICENSE b/project/thirdparty/tracy-0.12.0/libbacktrace/LICENSE similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/LICENSE rename to project/thirdparty/tracy-0.12.0/libbacktrace/LICENSE diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/alloc.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/alloc.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/alloc.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/alloc.cpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/backtrace.hpp b/project/thirdparty/tracy-0.12.0/libbacktrace/backtrace.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/backtrace.hpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/backtrace.hpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/config.h b/project/thirdparty/tracy-0.12.0/libbacktrace/config.h similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/config.h rename to project/thirdparty/tracy-0.12.0/libbacktrace/config.h diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/dwarf.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/dwarf.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/dwarf.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/dwarf.cpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/elf.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/elf.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/elf.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/elf.cpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/fileline.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/fileline.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/fileline.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/fileline.cpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/filenames.hpp b/project/thirdparty/tracy-0.12.0/libbacktrace/filenames.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/filenames.hpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/filenames.hpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/internal.hpp b/project/thirdparty/tracy-0.12.0/libbacktrace/internal.hpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/internal.hpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/internal.hpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/macho.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/macho.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/macho.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/macho.cpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/mmapio.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/mmapio.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/mmapio.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/mmapio.cpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/posix.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/posix.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/posix.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/posix.cpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/sort.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/sort.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/sort.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/sort.cpp diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/state.cpp b/project/thirdparty/tracy-0.12.0/libbacktrace/state.cpp similarity index 100% rename from project/thirdparty/tracy-0.11.1/libbacktrace/state.cpp rename to project/thirdparty/tracy-0.12.0/libbacktrace/state.cpp diff --git a/project/thirdparty/tracy-0.12.0/tracy/Tracy.hpp b/project/thirdparty/tracy-0.12.0/tracy/Tracy.hpp new file mode 100644 index 000000000..605d149fa --- /dev/null +++ b/project/thirdparty/tracy-0.12.0/tracy/Tracy.hpp @@ -0,0 +1,254 @@ +#ifndef __TRACY_HPP__ +#define __TRACY_HPP__ + +#include "../common/TracyColor.hpp" +#include "../common/TracySystem.hpp" + +#ifndef TracyFunction +# define TracyFunction __FUNCTION__ +#endif + +#ifndef TracyFile +# define TracyFile __FILE__ +#endif + +#ifndef TracyLine +# define TracyLine TracyConcat(__LINE__,U) // MSVC Edit and continue __LINE__ is non-constant. See https://developercommunity.visualstudio.com/t/-line-cannot-be-used-as-an-argument-for-constexpr/195665 +#endif + +#ifndef TRACY_ENABLE + +#define TracyNoop + +#define ZoneNamed(x,y) +#define ZoneNamedN(x,y,z) +#define ZoneNamedC(x,y,z) +#define ZoneNamedNC(x,y,z,w) + +#define ZoneTransient(x,y) +#define ZoneTransientN(x,y,z) + +#define ZoneScoped +#define ZoneScopedN(x) +#define ZoneScopedC(x) +#define ZoneScopedNC(x,y) + +#define ZoneText(x,y) +#define ZoneTextV(x,y,z) +#define ZoneTextF(x,...) +#define ZoneTextVF(x,y,...) +#define ZoneName(x,y) +#define ZoneNameV(x,y,z) +#define ZoneNameF(x,...) +#define ZoneNameVF(x,y,...) +#define ZoneColor(x) +#define ZoneColorV(x,y) +#define ZoneValue(x) +#define ZoneValueV(x,y) +#define ZoneIsActive false +#define ZoneIsActiveV(x) false + +#define FrameMark +#define FrameMarkNamed(x) +#define FrameMarkStart(x) +#define FrameMarkEnd(x) + +#define FrameImage(x,y,z,w,a) + +#define TracyLockable( type, varname ) type varname +#define TracyLockableN( type, varname, desc ) type varname +#define TracySharedLockable( type, varname ) type varname +#define TracySharedLockableN( type, varname, desc ) type varname +#define LockableBase( type ) type +#define SharedLockableBase( type ) type +#define LockMark(x) (void)x +#define LockableName(x,y,z) + +#define TracyPlot(x,y) +#define TracyPlotConfig(x,y,z,w,a) + +#define TracyMessage(x,y) +#define TracyMessageL(x) +#define TracyMessageC(x,y,z) +#define TracyMessageLC(x,y) +#define TracyAppInfo(x,y) + +#define TracyAlloc(x,y) +#define TracyFree(x) +#define TracyMemoryDiscard(x) +#define TracySecureAlloc(x,y) +#define TracySecureFree(x) +#define TracySecureMemoryDiscard(x) + +#define TracyAllocN(x,y,z) +#define TracyFreeN(x,y) +#define TracySecureAllocN(x,y,z) +#define TracySecureFreeN(x,y) + +#define ZoneNamedS(x,y,z) +#define ZoneNamedNS(x,y,z,w) +#define ZoneNamedCS(x,y,z,w) +#define ZoneNamedNCS(x,y,z,w,a) + +#define ZoneTransientS(x,y,z) +#define ZoneTransientNS(x,y,z,w) + +#define ZoneScopedS(x) +#define ZoneScopedNS(x,y) +#define ZoneScopedCS(x,y) +#define ZoneScopedNCS(x,y,z) + +#define TracyAllocS(x,y,z) +#define TracyFreeS(x,y) +#define TracyMemoryDiscardS(x,y) +#define TracySecureAllocS(x,y,z) +#define TracySecureFreeS(x,y) +#define TracySecureMemoryDiscardS(x,y) + +#define TracyAllocNS(x,y,z,w) +#define TracyFreeNS(x,y,z) +#define TracySecureAllocNS(x,y,z,w) +#define TracySecureFreeNS(x,y,z) + +#define TracyMessageS(x,y,z) +#define TracyMessageLS(x,y) +#define TracyMessageCS(x,y,z,w) +#define TracyMessageLCS(x,y,z) + +#define TracySourceCallbackRegister(x,y) +#define TracyParameterRegister(x,y) +#define TracyParameterSetup(x,y,z,w) +#define TracyIsConnected false +#define TracyIsStarted false +#define TracySetProgramName(x) + +#define TracyFiberEnter(x) +#define TracyFiberEnterHint(x,y) +#define TracyFiberLeave + +#else + +#include + +#include "../client/TracyLock.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyScoped.hpp" + +#ifndef TRACY_CALLSTACK +#define TRACY_CALLSTACK 0 +#endif + +#define TracyNoop tracy::ProfilerAvailable() + +#define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +#define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) + +#define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) +#define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) +#define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active ) + +#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) +#define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true ) +#define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true ) +#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true ) + +#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size ) +#define ZoneTextV( varname, txt, size ) varname.Text( txt, size ) +#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ ) +#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ ) +#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size ) +#define ZoneNameV( varname, txt, size ) varname.Name( txt, size ) +#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ ) +#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ ) +#define ZoneColor( color ) ___tracy_scoped_zone.Color( color ) +#define ZoneColorV( varname, color ) varname.Color( color ) +#define ZoneValue( value ) ___tracy_scoped_zone.Value( value ) +#define ZoneValueV( varname, value ) varname.Value( value ) +#define ZoneIsActive ___tracy_scoped_zone.IsActive() +#define ZoneIsActiveV( varname ) varname.IsActive() + +#define FrameMark tracy::Profiler::SendFrameMark( nullptr ) +#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name ) +#define FrameMarkStart( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ) +#define FrameMarkEnd( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ) + +#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip ) + +#define TracyLockable( type, varname ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracyLockableN( type, varname, desc ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracySharedLockable( type, varname ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define LockableBase( type ) tracy::Lockable +#define SharedLockableBase( type ) tracy::SharedLockable +#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##__LINE__ { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##__LINE__ ) +#define LockableName( varname, txt, size ) varname.CustomName( txt, size ) + +#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ) +#define TracyPlotConfig( name, type, step, fill, color ) tracy::Profiler::ConfigurePlot( name, type, step, fill, color ) + +#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size ) + +#define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK ) +#define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK ) +#define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ) +#define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) + +#define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ) +#define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ) +#define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ) +#define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ) + +#define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ) +#define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ) +#define TracyMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, false, TRACY_CALLSTACK ) +#define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ) +#define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ) +#define TracySecureMemoryDiscard( name ) tracy::Profiler::MemDiscardCallstack( name, true, TRACY_CALLSTACK ) + +#define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +#define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) + +#define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) +#define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) + +#define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) +#define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) +#define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) +#define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) + +#define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ) +#define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ) +#define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ) +#define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ) + +#define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ) +#define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ) +#define TracyMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, false, depth ) +#define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ) +#define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ) +#define TracySecureMemoryDiscardS( name, depth ) tracy::Profiler::MemDiscardCallstack( name, true, depth ) + +#define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ) +#define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ) +#define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth ) +#define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth ) + +#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data ) +#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) +#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ) +#define TracyIsConnected tracy::GetProfiler().IsConnected() +#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); + +#ifdef TRACY_FIBERS +# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 ) +# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint ) +# define TracyFiberLeave tracy::Profiler::LeaveFiber() +#endif + +#endif + +#endif diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyC.h b/project/thirdparty/tracy-0.12.0/tracy/TracyC.h similarity index 54% rename from project/thirdparty/tracy-0.11.1/tracy/TracyC.h rename to project/thirdparty/tracy-0.12.0/tracy/TracyC.h index 8b447beb5..1b1373e0d 100644 --- a/project/thirdparty/tracy-0.11.1/tracy/TracyC.h +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyC.h @@ -4,7 +4,6 @@ #include #include -#include "../client/TracyCallstack.h" #include "../common/TracyApi.h" #ifdef __cplusplus @@ -53,8 +52,10 @@ typedef const void* TracyCLockCtx; #define TracyCAlloc(x,y) #define TracyCFree(x) +#define TracyCMemoryDiscard(x) #define TracyCSecureAlloc(x,y) #define TracyCSecureFree(x) +#define TracyCSecureMemoryDiscard(x) #define TracyCAllocN(x,y,z) #define TracyCFreeN(x,y) @@ -85,8 +86,10 @@ typedef const void* TracyCLockCtx; #define TracyCAllocS(x,y,z) #define TracyCFreeS(x,y) +#define TracyCMemoryDiscardS(x,y) #define TracyCSecureAllocS(x,y,z) #define TracyCSecureFreeS(x,y) +#define TracyCSecureMemoryDiscardS(x,y) #define TracyCAllocNS(x,y,z,w) #define TracyCFreeNS(x,y,z) @@ -137,7 +140,7 @@ struct ___tracy_source_location_data struct ___tracy_c_zone_context { uint32_t id; - int active; + int32_t active; }; struct ___tracy_gpu_time_data @@ -155,7 +158,7 @@ struct ___tracy_gpu_zone_begin_data { struct ___tracy_gpu_zone_begin_callstack_data { uint64_t srcloc; - int depth; + int32_t depth; uint16_t queryId; uint8_t context; }; @@ -201,7 +204,7 @@ typedef struct __tracy_lockable_context_data* TracyCLockCtx; #ifdef TRACY_MANUAL_LIFETIME TRACY_API void ___tracy_startup_profiler(void); TRACY_API void ___tracy_shutdown_profiler(void); -TRACY_API int ___tracy_profiler_started(void); +TRACY_API int32_t ___tracy_profiler_started(void); # define TracyCIsStarted ___tracy_profiler_started() #else @@ -211,10 +214,10 @@ TRACY_API int ___tracy_profiler_started(void); TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ); TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ); -TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int32_t depth, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int32_t active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int32_t depth, int32_t active ); TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ); TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ); TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ); @@ -243,20 +246,17 @@ TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_ TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data ); TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data ); -TRACY_API int ___tracy_connected(void); +TRACY_API int32_t ___tracy_connected(void); -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); -#else -# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); -# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); +#ifndef TRACY_CALLSTACK +#define TRACY_CALLSTACK 0 #endif +#define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); + #define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx ); #define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size ); @@ -265,57 +265,44 @@ TRACY_API int ___tracy_connected(void); #define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value ); -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ); -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ); -TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ); -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ); -TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ); -TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ); - -TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ); -TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ); -TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ); -TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ); - -#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) -# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) -# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) -# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) - -# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name ) -# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name ) -# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name ) -# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name ) - -# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); -# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); -# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); -# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); -#else -# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 ); -# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 ); -# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 ); -# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 ); - -# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name ); -# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name ); -# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name ); -# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name ); - -# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 ); -# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 ); -# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 ); -# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 ); -#endif +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int32_t secure ); +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int32_t depth, int32_t secure ); +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int32_t secure ); +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int32_t depth, int32_t secure ); +TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int32_t depth, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int32_t depth, int32_t secure, const char* name ); +TRACY_API void ___tracy_emit_memory_discard( const char* name, int32_t secure ); +TRACY_API void ___tracy_emit_memory_discard_callstack( const char* name, int32_t secure, int32_t depth ); + +TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageL( const char* txt, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int32_t callstack_depth ); +TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int32_t callstack_depth ); + +#define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) +#define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) +#define TracyCMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 0, TRACY_CALLSTACK ); +#define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) +#define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) +#define TracyCSecureMemoryDiscard( name ) ___tracy_emit_memory_discard_callstack( name, 1, TRACY_CALLSTACK ); + +#define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name ) +#define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name ) +#define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name ) +#define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name ) + +#define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); +#define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); +#define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); +#define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); TRACY_API void ___tracy_emit_frame_mark( const char* name ); TRACY_API void ___tracy_emit_frame_mark_start( const char* name ); TRACY_API void ___tracy_emit_frame_mark_end( const char* name ); -TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ); +TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int32_t flip ); #define TracyCFrameMark ___tracy_emit_frame_mark( 0 ); #define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name ); @@ -327,7 +314,7 @@ TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_ TRACY_API void ___tracy_emit_plot( const char* name, double val ); TRACY_API void ___tracy_emit_plot_float( const char* name, float val ); TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ); -TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ); +TRACY_API void ___tracy_emit_plot_config( const char* name, int32_t type, int32_t step, int32_t fill, uint32_t color ); TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); #define TracyCPlot( name, val ) ___tracy_emit_plot( name, val ); @@ -337,55 +324,35 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); #define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size ); -#ifdef TRACY_HAS_CALLSTACK -# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +#define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); -# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) -# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) -# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) -# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) +#define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) +#define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) +#define TracyCMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 0, depth ) +#define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) +#define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) +#define TracyCSecureMemoryDiscardS( name, depth ) ___tracy_emit_memory_discard_callstack( name, 1, depth ) -# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name ) -# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name ) -# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name ) -# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name ) +#define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name ) +#define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name ) +#define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name ) +#define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name ) -# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); -# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); -# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth ); -# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth ); -#else -# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active ) -# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active ) -# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active ) -# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active ) - -# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size ) -# define TracyCFreeS( ptr, depth ) TracyCFree( ptr ) -# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size ) -# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr ) - -# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name ) -# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name ) -# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name ) -# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name ) - -# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size ) -# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt ) -# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color ) -# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color ) -#endif +#define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); +#define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); +#define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth ); +#define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth ); TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ); TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); -TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API int32_t ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); -TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int acquired ); +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int32_t acquired ); TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ); TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ); diff --git a/project/thirdparty/tracy-0.12.0/tracy/TracyCUDA.hpp b/project/thirdparty/tracy-0.12.0/tracy/TracyCUDA.hpp new file mode 100644 index 000000000..40ff55dc2 --- /dev/null +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyCUDA.hpp @@ -0,0 +1,1325 @@ +#ifndef __TRACYCUDA_HPP__ +#define __TRACYCUDA_HPP__ + +#ifndef TRACY_ENABLE + +#define TracyCUDAContext() nullptr +#define TracyCUDAContextDestroy(ctx) +#define TracyCUDAContextName(ctx, name, size) + +#define TracyCUDAStartProfiling(ctx) +#define TracyCUDAStopProfiling(ctx) + +#define TracyCUDACollect(ctx) + +#else +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef _MSC_VER +#include +#endif + +#include + +#ifndef UNREFERENCED +#define UNREFERENCED(x) (void)x +#endif//UNREFERENCED + +#ifndef TRACY_CUDA_CALIBRATED_CONTEXT +#define TRACY_CUDA_CALIBRATED_CONTEXT (1) +#endif//TRACY_CUDA_CALIBRATED_CONTEXT + +#ifndef TRACY_CUDA_ENABLE_COLLECTOR_THREAD +#define TRACY_CUDA_ENABLE_COLLECTOR_THREAD (1) +#endif//TRACY_CUDA_ENABLE_COLLECTOR_THREAD + +#ifndef TRACY_CUDA_ENABLE_CUDA_CALL_STATS +#define TRACY_CUDA_ENABLE_CUDA_CALL_STATS (0) +#endif//TRACY_CUDA_ENABLE_CUDA_CALL_STATS + +namespace { + +// TODO(marcos): wrap these in structs for better type safety +using CUptiTimestamp = uint64_t; +using TracyTimestamp = int64_t; + +struct IncrementalRegression { + using float_t = double; + struct Parameters { + float_t slope, intercept; + }; + + int n = 0; + float_t x_mean = 0; + float_t y_mean = 0; + float_t x_svar = 0; + float_t y_svar = 0; + float_t xy_scov = 0; + + auto parameters() const { + float_t slope = xy_scov / x_svar; + float_t intercept = y_mean - slope * x_mean; + return Parameters{ slope, intercept }; + } + + auto orthogonal() const { + // NOTE(marcos): orthogonal regression is Deming regression with delta = 1 + float_t delta = float_t(1); // delta = 1 -> orthogonal regression + float_t k = y_svar - delta * x_svar; + float_t slope = (k + sqrt(k * k + 4 * delta * xy_scov * xy_scov)) / (2 * xy_scov); + float_t intercept = y_mean - slope * x_mean; + return Parameters{ slope, intercept }; + } + + void addSample(float_t x, float_t y) { + ++n; + float_t x_mean_prev = x_mean; + float_t y_mean_prev = y_mean; + x_mean += (x - x_mean) / n; + y_mean += (y - y_mean) / n; + x_svar += (x - x_mean_prev) * (x - x_mean); + y_svar += (y - y_mean_prev) * (y - y_mean); + xy_scov += (x - x_mean_prev) * (y - y_mean); + } +}; + +tracy_force_inline TracyTimestamp tracyGetTimestamp() { + return tracy::Profiler::GetTime(); +} + +auto& getCachedRegressionParameters() { + // WARN(marcos): in theory, these linear regression parameters would be loaded/stored atomically; + // in practice, however, it should not matter so long as the loads/stores are not "sliced" + static IncrementalRegression::Parameters cached; + return cached; +} + +TracyTimestamp tracyFromCUpti(CUptiTimestamp cuptiTime) { + // NOTE(marcos): linear regression estimate + // y_hat = slope * x + intercept | X: CUptiTimestamp, Y: TracyTimestamp + auto [slope, intercept] = getCachedRegressionParameters(); + double y_hat = slope * cuptiTime + intercept; + TracyTimestamp tracyTime = TracyTimestamp(y_hat); + assert(tracyTime >= 0); + return tracyTime; +} + +template +tracy_force_inline void tracyMemWrite(T& where,U what) { + static_assert(std::is_same_v, "tracy::MemWrite: type mismatch."); + tracy::MemWrite(&where, what); +} + +void* tracyMalloc(size_t bytes) { + return tracy::tracy_malloc(bytes); +} + +void tracyFree(void* ptr) { + tracy::tracy_free(ptr); +} + +void tracyZoneBegin(TracyTimestamp time, tracy::SourceLocationData* srcLoc) { + using namespace tracy; + TracyQueuePrepare(QueueType::ZoneBegin); + tracyMemWrite(item->zoneBegin.time, time); + tracyMemWrite(item->zoneBegin.srcloc, (uint64_t)srcLoc); + TracyQueueCommit(zoneBeginThread); +} + +void tracyZoneEnd(TracyTimestamp time) { + using namespace tracy; + TracyQueuePrepare(QueueType::ZoneEnd); + tracyMemWrite(item->zoneEnd.time, time); + TracyQueueCommit(zoneEndThread); +} + +void tracyPlot(const char* name, float value, TracyTimestamp time) { + using namespace tracy; + TracyLfqPrepare(QueueType::PlotDataFloat); + tracyMemWrite(item->plotDataFloat.name, (uint64_t)name); + tracyMemWrite(item->plotDataFloat.time, time); + tracyMemWrite(item->plotDataFloat.val, value); + TracyLfqCommit; +} + +void tracyPlot(const char* name, float value, CUptiTimestamp time) { + tracyPlot(name, value, tracyFromCUpti(time)); +} + +void tracyPlotActivity(const char* name, TracyTimestamp start, TracyTimestamp end, float value = 1.0f, float baseline = 0.0f) { + tracyPlot(name, baseline, start); + tracyPlot(name, value, start + 3); + tracyPlot(name, value, end - 3); + tracyPlot(name, baseline, end); +} + +void tracyPlotActivity(const char* name, CUptiTimestamp start, CUptiTimestamp end, float value = 1.0f, float baseline = 0.0f) { + tracyPlotActivity(name, tracyFromCUpti(start), tracyFromCUpti(end), value, baseline); +} + +void tracyPlotBlip(const char* name, TracyTimestamp time, float value = 1.0f, float baseline = 0.0f) { + tracyPlot(name, baseline, time - 3); + tracyPlot(name, value, time); + tracyPlot(name, baseline, time + 3); +} + +void tracyPlotBlip(const char* name, CUptiTimestamp time, float value = 1.0f, float baseline = 0.0f) { + tracyPlotBlip(name, tracyFromCUpti(time), value, baseline); +} + +void tracyEmitMemAlloc(const char* name, const void* ptr, size_t size, TracyTimestamp time) { + using namespace tracy; + const auto thread = GetThreadHandle(); + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::MemNamePayload); + tracyMemWrite(item->memName.name, (uint64_t)name); + Profiler::QueueSerialFinish(); + + item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::MemAllocNamed); + tracyMemWrite(item->memAlloc.time, time); + tracyMemWrite(item->memAlloc.thread, thread); + tracyMemWrite(item->memAlloc.ptr, (uint64_t)ptr); + + if (compile_time_condition::value) + { + memcpy(&item->memAlloc.size, &size, 4); + memset(&item->memAlloc.size + 4, 0, 2); + } + else + { + assert(sizeof(size) == 8); + memcpy(&item->memAlloc.size, &size, 4); + memcpy(((char *)&item->memAlloc.size) + 4, ((char *)&size) + 4, 2); + } + Profiler::QueueSerialFinish(); +} + +void tracyEmitMemFree(const char* name, const void* ptr, TracyTimestamp time) { + using namespace tracy; + const auto thread = GetThreadHandle(); + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::MemNamePayload); + tracyMemWrite(item->memName.name, (uint64_t)name); + Profiler::QueueSerialFinish(); + + item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::MemFreeNamed); + tracyMemWrite(item->memFree.time, time); + tracyMemWrite(item->memFree.thread, thread); + tracyMemWrite(item->memFree.ptr, (uint64_t)ptr); + Profiler::QueueSerialFinish(); +} + +void tracyEmitMemAlloc(const char* name, const void* ptr, size_t size, CUptiTimestamp cuptiTime) { + tracyEmitMemAlloc(name, ptr, size, tracyFromCUpti(cuptiTime)); +} + +void tracyEmitMemFree(const char* name, const void* ptr, CUptiTimestamp cuptiTime) { + tracyEmitMemFree(name, ptr, tracyFromCUpti(cuptiTime)); +} + +void tracyAnnounceGpuTimestamp(TracyTimestamp apiStart, TracyTimestamp apiEnd, + uint16_t queryId, uint8_t gpuContextId, + const tracy::SourceLocationData* sourceLocation, uint32_t threadId) { + using namespace tracy; + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuZoneBeginSerial); + tracyMemWrite(item->gpuZoneBegin.cpuTime, apiStart); + tracyMemWrite(item->gpuZoneBegin.srcloc, (uint64_t)sourceLocation); + tracyMemWrite(item->gpuZoneBegin.thread, threadId); + tracyMemWrite(item->gpuZoneBegin.queryId, uint16_t(queryId+0)); + tracyMemWrite(item->gpuZoneBegin.context, gpuContextId); + Profiler::QueueSerialFinish(); + + item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuZoneEndSerial); + tracyMemWrite(item->gpuZoneEnd.cpuTime, apiEnd); + tracyMemWrite(item->gpuZoneEnd.thread, threadId); + tracyMemWrite(item->gpuZoneEnd.queryId, uint16_t(queryId+1)); + tracyMemWrite(item->gpuZoneEnd.context, gpuContextId); + Profiler::QueueSerialFinish(); +} + +void tracySubmitGpuTimestamp(CUptiTimestamp gpuStart, CUptiTimestamp gpuEnd, + uint16_t queryId, uint8_t gpuContextId) { + using namespace tracy; + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuTime); + tracyMemWrite(item->gpuTime.gpuTime, (int64_t)gpuStart); + tracyMemWrite(item->gpuTime.queryId, uint16_t(queryId+0)); + tracyMemWrite(item->gpuTime.context, gpuContextId); + Profiler::QueueSerialFinish(); + + item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuTime); + tracyMemWrite(item->gpuTime.gpuTime, (int64_t)gpuEnd); + tracyMemWrite(item->gpuTime.queryId, uint16_t(queryId+1)); + tracyMemWrite(item->gpuTime.context, gpuContextId); + Profiler::QueueSerialFinish(); +} + +#define CUPTI_API_CALL(call) CUptiCallChecked(call, #call, __FILE__, __LINE__) + +#define DRIVER_API_CALL(call) cudaDriverCallChecked(call, #call, __FILE__, __LINE__) + +CUptiResult CUptiCallChecked(CUptiResult result, const char* call, const char* file, int line) noexcept { + if (result == CUPTI_SUCCESS) + return result; + const char* resultMsg = ""; + CUPTI_API_CALL(cuptiGetResultString(result, &resultMsg)); // maybe not a good idea to recurse here... + fprintf(stderr, "ERROR:\t%s:%d:\n\tfunction '%s' failed with error '%s'.\n", file, line, call, resultMsg); + //assert(result == CUPTI_SUCCESS); + return result; +} + +CUresult cudaDriverCallChecked(CUresult result, const char* call, const char* file, int line) noexcept { + if (result == CUDA_SUCCESS) + return result; + const char* resultMsg = ""; + DRIVER_API_CALL(cuGetErrorString(result, &resultMsg)); // maybe not a good idea to recurse here... + fprintf(stderr, "ERROR:\t%s:%d:\n\tfunction '%s' failed with error '%s'.\n", file, line, call, resultMsg); + //assert(result == CUDA_SUCCESS); + return result; +} + +template +struct ConcurrentHashMap { + static constexpr bool instrument = false; + auto acquire_read_lock() { + if (m.try_lock_shared()) + return std::shared_lock(m, std::adopt_lock); + ZoneNamedC(rwlock, tracy::Color::Tomato, instrument); + return std::shared_lock(m); + } + auto acquire_write_lock() { + if (m.try_lock()) + return std::unique_lock(m, std::adopt_lock); + ZoneNamedC(wxlock, tracy::Color::Tomato, instrument); + return std::unique_lock(m); + } + std::unordered_map mapping; + std::shared_mutex m; + auto& operator[](TKey key) { + { + auto lock = acquire_read_lock(); + auto it = mapping.find(key); + if (it != mapping.end()) { + return it->second; + } + } + return emplace(key, TValue{}).first->second; + } + auto find(TKey key) { + ZoneNamed(find, instrument); + auto lock = acquire_read_lock(); + return mapping.find(key); + } + auto fetch(TKey key, TValue& value) { + ZoneNamed(fetch, instrument); + auto it = mapping.find(key); + if (it != mapping.end()) { + value = it->second; + return true; + } + return false; + } + auto end() { + ZoneNamed(end, instrument); + auto lock = acquire_read_lock(); + return mapping.end(); + } + template + auto emplace(TKey key, Args&&... args) { + ZoneNamed(emplace, instrument); + auto lock = acquire_write_lock(); + return mapping.emplace(std::forward(key), std::forward(args)...); + } + auto erase(TKey key) { + ZoneNamed(erase, instrument); + auto lock = acquire_write_lock(); + return mapping.erase(key); + } +}; + +#if TRACY_CUDA_ENABLE_CUDA_CALL_STATS +struct ProfilerStats { + static constexpr bool instrument = false; + + ConcurrentHashMap> apiCallCount; + + void update(CUpti_CallbackDomain domain, CUpti_CallbackId cbid) { + ZoneNamed(update, instrument); + uint32_t key = (domain << 24) | (cbid & 0x00'FFFFFF); + auto it = apiCallCount.find(key); + if (it == apiCallCount.end()) { + it = apiCallCount.emplace(key, 0).first; + } + it->second.fetch_add(1, std::memory_order::memory_order_relaxed); + } +}; +#endif + +// StringTable: string memoization/interning +struct StringTable { + static constexpr bool instrument = false; + + // TODO(marcos): this could be just a "ConcurrentHashSet" + ConcurrentHashMap table; + + ~StringTable() { /* TODO(marcos): free string copy */ } + + std::string_view operator[](std::string_view str) { + ZoneNamedN(lookup, "StringTable::lookup", instrument); + std::string_view memoized; + if (!table.fetch(str, memoized)) { + ZoneNamedN(lookup, "StringTable::insert", instrument); + char* copy = (char*)tracyMalloc(str.size() + 1); + strncpy(copy, str.data(), str.size()); + copy[str.size()] = '\0'; + std::string_view value (copy, str.size()); + auto [it, inserted] = table.emplace(value, value); + if (!inserted) { + // another thread inserted it while we were trying to: cleanup + tracyFree(copy); + } + memoized = it->second; + } + assert(str == memoized); + return memoized; + } +}; + +struct SourceLocationMap { + static constexpr bool instrument = false; + + // NOTE(marcos): the address of an unordered_map value may become invalid + // later on (e.g., during a rehash), so mapping to a pointer is necessary + ConcurrentHashMap locations; + + ~SourceLocationMap() { /* TODO(marcos): free SourceLocationData* entries */ } + + tracy::SourceLocationData* retrieve(std::string_view function) { + ZoneNamed(retrieve, instrument); + tracy::SourceLocationData* pSrcLoc = nullptr; + locations.fetch(function, pSrcLoc); + return pSrcLoc; + } + + tracy::SourceLocationData* add(std::string_view function, std::string_view file, int line, uint32_t color=0) { + ZoneNamed(emplace, instrument); + assert(*function.end() == '\0'); + assert(*file.end() == '\0'); + void* bytes = tracyMalloc(sizeof(tracy::SourceLocationData)); + auto pSrcLoc = new(bytes)tracy::SourceLocationData{ function.data(), TracyFunction, file.data(), (uint32_t)line, color }; + auto [it, inserted] = locations.emplace(function, pSrcLoc); + if (!inserted) { + // another thread inserted it while we were trying to: cleanup + tracyFree(pSrcLoc); // POD: no destructor to call + } + assert(it->second != nullptr); + return it->second; + } +}; + +struct SourceLocationLUT { + static constexpr bool instrument = false; + + ~SourceLocationLUT() { /* no action needed: no dynamic allocation */ } + + tracy::SourceLocationData runtime [CUpti_runtime_api_trace_cbid::CUPTI_RUNTIME_TRACE_CBID_SIZE] = {}; + tracy::SourceLocationData driver [CUpti_driver_api_trace_cbid::CUPTI_DRIVER_TRACE_CBID_SIZE] = {}; + + tracy::SourceLocationData* retrieve(CUpti_CallbackDomain domain, CUpti_CallbackId cbid, CUpti_CallbackData* apiInfo) { + ZoneNamed(retrieve, instrument); + tracy::SourceLocationData* pSrcLoc = nullptr; + switch (domain) { + case CUPTI_CB_DOMAIN_RUNTIME_API : + if ((cbid > 0) && (cbid < CUPTI_RUNTIME_TRACE_CBID_SIZE)) { + pSrcLoc = &runtime[cbid]; + } + break; + case CUPTI_CB_DOMAIN_DRIVER_API : + if ((cbid > 0) && (cbid < CUPTI_DRIVER_TRACE_CBID_SIZE)) { + pSrcLoc = &driver[cbid]; + } + break; + default: + break; + } + if (pSrcLoc->name == nullptr) { + const char* function = apiInfo->functionName ? apiInfo->functionName : "cuda???"; + // cuptiGetCallbackName includes the "version suffix" of the function/cbid + //CUPTI_API_CALL(cuptiGetCallbackName(domain, cbid, &function)); + *pSrcLoc = tracy::SourceLocationData{ function, TracyFunction, TracyFile, TracyLine, 0 }; + } + return pSrcLoc; + } +}; + +uint32_t tracyTimelineId(uint32_t contextId, uint32_t streamId) { + // 0xA7C5 = 42,949 => 42,949 * 100,000 = 4,294,900,000 + // 4,294,900,000 + 65,535 = 4,294,965,535 < 4,294,967,295 (max uint32) + assert(contextId <= 0xA7C5); + assert((streamId == CUPTI_INVALID_STREAM_ID) || (streamId < 0xFFFF)); + uint32_t packed = (contextId * 100'000) + (streamId & 0x0000'FFFF); + return packed; +} + +} // unnamed/anonymous namespace + +namespace tracy +{ + class CUDACtx + { + public: + static CUDACtx* Create() { + auto& s = Singleton::Get(); + std::unique_lock lock (s.m); + if (s.ref_count == 0) { + assert(s.ctx == nullptr); + s.ctx = new CUDACtx(s.ctx_id); + s.ref_count += 1; + s.ctx_id = s.ctx->m_tracyGpuContext; + } + return s.ctx; + } + + static void Destroy(CUDACtx* ctx) { + auto& s = Singleton::Get(); + std::unique_lock lock(s.m); + assert(ctx == s.ctx); + s.ref_count -= 1; + if (s.ref_count == 0) { + delete s.ctx; + s.ctx = nullptr; + } + } + + void Collect() + { + ZoneScoped; + CUPTI::FlushActivity(); + } + + void printStats() + { + #if TRACY_CUDA_ENABLE_CUDA_CALL_STATS + fprintf(stdout, "\nCUDA API stats:\n"); + { + struct Stats { CUpti_CallbackDomain domain; CUpti_CallbackId cbid; int count; }; + std::vector sorted; + for (auto&& api : stats.apiCallCount.mapping) { + auto domain = CUpti_CallbackDomain(api.first >> 24); + auto cbid = CUpti_CallbackId(api.first & 0x00'FFFFFF); + int count = api.second; + sorted.emplace_back(Stats{ domain, cbid, count }); + } + std::sort(sorted.begin(), sorted.end(), [](const Stats& x, const Stats& y) { return x.count > y.count; }); + for (auto&& api : sorted) { + const char* function = ""; + CUPTI_API_CALL(cuptiGetCallbackName(api.domain, api.cbid, &function)); + printf("- %s : %d\n", function, api.count); + } + } + #endif + } + + void StartProfiling() + { + ZoneScoped; + CUPTI::BeginInstrumentation(this); + } + + void StopProfiling() + { + ZoneScoped; + CUPTI::EndInstrumentation(); + printStats(); + } + + void Name(const char *name, uint16_t len) + { + auto ptr = (char*)tracyMalloc(len); + memcpy(ptr, name, len); + + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuContextName); + tracyMemWrite(item->gpuContextNameFat.context, m_tracyGpuContext); + tracyMemWrite(item->gpuContextNameFat.ptr, (uint64_t)ptr); + tracyMemWrite(item->gpuContextNameFat.size, len); + SubmitQueueItem(item); + } + + tracy_force_inline void SubmitQueueItem(tracy::QueueItem *item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + static void QueryTimestamps(TracyTimestamp& tTracy, CUptiTimestamp& tCUpti) { + TracyTimestamp tTracy1 = tracyGetTimestamp(); + CUPTI_API_CALL(cuptiGetTimestamp(&tCUpti)); + TracyTimestamp tTracy2 = tracyGetTimestamp(); + // NOTE(marcos): giving more weight to 'tTracy2' + tTracy = (3*tTracy1 + 5*tTracy2) / 8; + } + + // NOTE(marcos): recalibration is 'static' since Tracy and CUPTI timestamps + // are "global" across all contexts; that said, each Tracy GPU context needs + // its own GpuCalibration message, but for now there's just a singleton context. + void Recalibrate() { + ZoneScoped; + // NOTE(marcos): only one thread should do the calibration, but there's + // no good reason to block threads that also trying to do the same + static std::mutex m; + if (!m.try_lock()) + return; + std::unique_lock lock (m, std::adopt_lock); + ZoneNamedNC(zone, "tracy::CUDACtx::Recalibrate[effective]", tracy::Color::Goldenrod, true); + TracyTimestamp tTracy; + CUptiTimestamp tCUpti; + QueryTimestamps(tTracy, tCUpti); + #if TRACY_CUDA_CALIBRATED_CONTEXT + static CUptiTimestamp prevCUptiTime = tCUpti; + int64_t deltaTicksCUpti = tCUpti - prevCUptiTime; + if (deltaTicksCUpti > 0) { + prevCUptiTime = tCUpti; + auto* item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuCalibration); + tracyMemWrite(item->gpuCalibration.gpuTime, (int64_t)tCUpti); + tracyMemWrite(item->gpuCalibration.cpuTime, tTracy); + tracyMemWrite(item->gpuCalibration.cpuDelta, deltaTicksCUpti); + tracyMemWrite(item->gpuCalibration.context, m_tracyGpuContext); + Profiler::QueueSerialFinish(); + } + #endif + // NOTE(marcos): update linear regression incrementally, which will refine + // the estimation of Tracy timestamps (Y) from CUpti timestamps (X) + static IncrementalRegression model; + model.addSample(double(tCUpti), double(tTracy)); + // NOTE(marcos): using orthogonal regression because the independet variable + // (X: CUpti timestamps) measurements are also imprecise + getCachedRegressionParameters() = model.orthogonal(); + } + + protected: + void EmitGpuZone(TracyTimestamp apiStart, TracyTimestamp apiEnd, + CUptiTimestamp gpuStart, CUptiTimestamp gpuEnd, + const tracy::SourceLocationData* pSrcLoc, + uint32_t cudaContextId, uint32_t cudaStreamId) { + //uint32_t timelineId = tracy::GetThreadHandle(); + uint32_t timelineId = tracyTimelineId(cudaContextId, cudaStreamId); + uint16_t queryId = m_queryIdGen.fetch_add(2); + tracyAnnounceGpuTimestamp(apiStart, apiEnd, queryId, m_tracyGpuContext, pSrcLoc, timelineId); + tracySubmitGpuTimestamp(gpuStart, gpuEnd, queryId, m_tracyGpuContext); + } + + void OnEventsProcessed() { + Recalibrate(); + } + + struct CUPTI { + static void CUPTIAPI OnBufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords) + { + ZoneScoped; + // TODO(marcos): avoid malloc and instead suballocate from a large circular buffer; + // according to the CUPTI documentation: "To minimize profiling overhead the client + // should return as quickly as possible from these callbacks." + *size = 1 * 1024*1024; // 1MB + *buffer = (uint8_t*)tracyMalloc(*size); + assert(*buffer != nullptr); + FlushActivityAsync(); + } + + static void CUPTIAPI OnBufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t* buffer, size_t size, size_t validSize) + { + // CUDA 6.0 onwards: all buffers from this callback are "global" buffers + // (i.e. there is no context/stream specific buffer; ctx is always NULL) + ZoneScoped; + tracy::SetThreadName("NVIDIA CUPTI Worker"); + CUptiResult status; + CUpti_Activity* record = nullptr; + while ((status = cuptiActivityGetNextRecord(buffer, validSize, &record)) == CUPTI_SUCCESS) { + DoProcessDeviceEvent(record); + } + if (status != CUPTI_ERROR_MAX_LIMIT_REACHED) { + CUptiCallChecked(status, "cuptiActivityGetNextRecord", TracyFile, TracyLine); + } + size_t dropped = 0; + CUPTI_API_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); + assert(dropped == 0); + tracyFree(buffer); + PersistentState::Get().profilerHost->OnEventsProcessed(); + } + + // correlationID -> [CPU start time, CPU end time, CUPTI start time] + using CorrelationID = uint32_t; + struct APICallInfo { TracyTimestamp start = 0, end = 0; CUptiTimestamp cupti = CUPTI_TIMESTAMP_UNKNOWN; CUDACtx* host = nullptr; }; + + static void CUPTIAPI OnCallbackAPI( + void* userdata, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid, + const void* cbdata) + { + static constexpr bool instrument = false; + + TracyTimestamp apiCallStartTime = tracyGetTimestamp(); + CUDACtx* profilerHost = (CUDACtx*)userdata; + + switch (domain) { + case CUPTI_CB_DOMAIN_RUNTIME_API: + case CUPTI_CB_DOMAIN_DRIVER_API: + break; + case CUPTI_CB_DOMAIN_RESOURCE: { + // match 'callbackId' with CUpti_CallbackIdResource + // interpret 'cbdata' as CUpti_ResourceData, + // or as CUpti_ModuleResourceData, + // or as CUpti_GraphData, + // or as CUpti_StreamAttrData, + // or as ... (what else?) + return; + } + case CUPTI_CB_DOMAIN_SYNCHRONIZE: { + // match 'callbackId' with CUpti_CallbackIdSync + // interpret 'cbdata' as CUpti_SynchronizeData + return; + } + case CUPTI_CB_DOMAIN_STATE: { + // match 'callbackId' with CUpti_CallbackIdState + // interpret 'cbdata' as CUpti_StateData + return; + } + case CUPTI_CB_DOMAIN_NVTX: { + // match 'callbackId' with CUpti_nvtx_api_trace_cbid + // interpret 'cbdata' as CUpti_NvtxData + return; + } + case CUPTI_CB_DOMAIN_FORCE_INT: + // NOTE(marcos): the "FORCE_INT" values in CUPTI enums exist only to + // force the enum to have a specific representation (signed 32bits) + case CUPTI_CB_DOMAIN_INVALID: + default: + // TODO(marcos): unexpected error! + return; + } + + // if we reached this point, then we are in the (runtime or driver) API domain + CUpti_CallbackData* apiInfo = (CUpti_CallbackData*)cbdata; + + // Emit the Tracy 'ZoneBegin' message upon entering the API call + // TODO(marcos): a RAII object could be useful here... + if (apiInfo->callbackSite == CUPTI_API_ENTER) { + #if TRACY_CUDA_ENABLE_CUDA_CALL_STATS + ctx->stats.update(domain, cbid); + #endif + + auto& cudaCallSourceLocation = PersistentState::Get().cudaCallSourceLocation; + auto pSrcLoc = cudaCallSourceLocation.retrieve(domain, cbid, apiInfo); + + // HACK(marcos): the SourceLocationLUT::retrieve zone (above) should + // not be emitted before its enclosing zone (below) actually begins, + // so we delay the beginning of the enclosing zone to "unstack" them + if (SourceLocationLUT::instrument) + apiCallStartTime = tracyGetTimestamp(); + tracyZoneBegin(apiCallStartTime, pSrcLoc); + } + + if (apiInfo->callbackSite == CUPTI_API_ENTER) { + ZoneNamedN(enter, "tracy::CUDACtx::OnCUptiCallback[enter]", instrument); + // Track API calls that generate device activity: + bool trackDeviceActivity = false; + CUstream hStream = nullptr; + if (domain == CUPTI_CB_DOMAIN_RUNTIME_API) { + #define GET_STREAM_FUNC(Params, field) [](CUpti_CallbackData* api) { return ((Params*)api->functionParams)->field; } + #define NON_STREAM_FUNC() [](CUpti_CallbackData*) { return cudaStream_t(nullptr); } + static std::unordered_map cbidRuntimeTrackers = { + // Runtime: Kernel + { CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000, GET_STREAM_FUNC(cudaLaunchKernel_v7000_params, stream) }, + { CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_ptsz_v7000, GET_STREAM_FUNC(cudaLaunchKernel_ptsz_v7000_params, stream) }, + { CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_v11060, GET_STREAM_FUNC(cudaLaunchKernelExC_v11060_params, config->stream) }, + { CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_ptsz_v11060, GET_STREAM_FUNC(cudaLaunchKernelExC_ptsz_v11060_params, config->stream) }, + // Runtime: Memory + { CUPTI_RUNTIME_TRACE_CBID_cudaMalloc_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaFree_v3020, NON_STREAM_FUNC() }, + // Runtime: Memcpy + { CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020, GET_STREAM_FUNC(cudaMemcpyAsync_v3020_params, stream) }, + // Runtime: Memset + { CUPTI_RUNTIME_TRACE_CBID_cudaMemset_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaMemsetAsync_v3020, GET_STREAM_FUNC(cudaMemsetAsync_v3020_params, stream) }, + // Runtime: Synchronization + { CUPTI_RUNTIME_TRACE_CBID_cudaStreamSynchronize_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaEventQuery_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaStreamWaitEvent_v3020, NON_STREAM_FUNC() }, + { CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020, NON_STREAM_FUNC() }, + }; + #undef NON_STREAM_FUNC + #undef GET_STREAM_FUNC + auto it = cbidRuntimeTrackers.find(CUpti_runtime_api_trace_cbid(cbid)); + if (it != cbidRuntimeTrackers.end()) { + trackDeviceActivity = true; + hStream = (CUstream)it->second(apiInfo); + } + } + if (domain == CUPTI_CB_DOMAIN_DRIVER_API) { + #define GET_STREAM_FUNC(Params, field) [](CUpti_CallbackData* api) { return ((Params*)api->functionParams)->field; } + #define NON_STREAM_FUNC() [](CUpti_CallbackData*) { return CUstream(nullptr); } + static std::unordered_map cbidDriverTrackers = { + // Driver: Kernel + { CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel, GET_STREAM_FUNC(cuLaunchKernel_params, hStream) }, + { CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel_ptsz, GET_STREAM_FUNC(cuLaunchKernel_ptsz_params, hStream)} , + { CUPTI_DRIVER_TRACE_CBID_cuLaunchKernelEx, GET_STREAM_FUNC(cuLaunchKernelEx_params, config->hStream) }, + { CUPTI_DRIVER_TRACE_CBID_cuLaunchKernelEx_ptsz, GET_STREAM_FUNC(cuLaunchKernelEx_params, config->hStream) }, + }; + #undef NON_STREAM_FUNC + #undef GET_STREAM_FUNC + auto it = cbidDriverTrackers.find(CUpti_driver_api_trace_cbid(cbid)); + if (it != cbidDriverTrackers.end()) { + trackDeviceActivity = true; + hStream = it->second(apiInfo); + } + } + if (trackDeviceActivity) { + // NOTE(marcos): we should NOT track if the stream is being captured + CUstreamCaptureStatus status = {}; + DRIVER_API_CALL(cuStreamIsCapturing(hStream, &status)); + trackDeviceActivity = !(status == CU_STREAM_CAPTURE_STATUS_ACTIVE); + } + if (trackDeviceActivity) { + CUptiTimestamp tgpu; + // TODO(marcos): do a "reverse-estimate" to obtain CUpti time from Tracy time instead? + CUPTI_API_CALL(cuptiGetTimestamp(&tgpu)); + auto& cudaCallSiteInfo = PersistentState::Get().cudaCallSiteInfo; + cudaCallSiteInfo.emplace(apiInfo->correlationId, APICallInfo{ apiCallStartTime, apiCallStartTime, tgpu, profilerHost }); + } + auto& entryFlags = *apiInfo->correlationData; + assert(entryFlags == 0); + entryFlags |= trackDeviceActivity ? 0x8000 : 0; + } + + if (apiInfo->callbackSite == CUPTI_API_EXIT) { + APICallInfo* pApiInterval = [](CUpti_CallbackData* apiInfo) { + ZoneNamedN(exit, "tracy::CUDACtx::OnCUptiCallback[exit]", instrument); + auto entryFlags = *apiInfo->correlationData; + bool trackDeviceActivity = (entryFlags & 0x8000) != 0; + if (trackDeviceActivity) { + auto& cudaCallSiteInfo = PersistentState::Get().cudaCallSiteInfo; + auto it = cudaCallSiteInfo.find(apiInfo->correlationId); + if (it != cudaCallSiteInfo.end()) { + // WARN(marcos): leaking the address of a hash-map value could spell trouble + return &it->second; + } + } + // NOTE(marcos): this can happen if the GPU activity completes + // before the CUDA function that enqueued it returns (e.g., sync) + static APICallInfo sentinel; + return &sentinel; + }(apiInfo); + pApiInterval->end = tracyGetTimestamp(); + tracyZoneEnd(pApiInterval->end); + } + } + + static bool matchActivityToAPICall(uint32_t correlationId, APICallInfo& apiCallInfo) { + static constexpr bool instrument = false; + ZoneNamed(match, instrument); + auto& cudaCallSiteInfo = PersistentState::Get().cudaCallSiteInfo; + if (!cudaCallSiteInfo.fetch(correlationId, apiCallInfo)) { + return false; + } + cudaCallSiteInfo.erase(correlationId); + assert(apiCallInfo.host != nullptr); + return true; + } + + static void matchError(uint32_t correlationId, const char* kind) { + char msg [128]; + snprintf(msg, sizeof(msg), "ERROR: device activity '%s' has no matching CUDA API call (id=%u).", kind, correlationId); + TracyMessageC(msg, strlen(msg), tracy::Color::Tomato); + } + + static std::string extractActualName(char** name){ + //If name does not start with number, return empty string + if (!isdigit(**name)) + { + return std::string(); + } + // Assuming name starts with number followed by actual name + std::string actualName; + char* currStr = *name; + int num = 0; + while (*currStr >= '0' && *currStr <= '9') + { + num = num * 10 + (*currStr - '0'); + currStr++; + } + + // Return the string start at currStr ends at num + actualName = std::string(currStr, num); + // check if actualName starts with _GLOBAL__N__ + if (actualName.rfind("_GLOBAL__N__", 0) == 0) + { + // _GLOBAL__N__ with an id stands for anonymous namespace + actualName = std::string("(anonymous_namespace)"); + } + + *name = currStr + num; + return actualName; + } + + static std::string extractActualNameNested(const char* demangledName) + { + ZoneNamedN(demangle, "demangle_kernel", false); + //If name does not start with _Z, return a new std::string with original name + if (demangledName[0] != '_' || demangledName[1] != 'Z') + { + return std::string(demangledName); + } + std::string actualName; + char* currStr = (char*)demangledName + 2; + + if (*currStr == 'N') + { + currStr++; + // extract actual name from nested name + std::string nestedName = extractActualName(&currStr); + actualName += nestedName; + while (1) + { + //Loop until nested name is empty + nestedName = extractActualName(&currStr); + if (nestedName.empty()) + { + break; + } + actualName += "::" + nestedName; + } + } else + { + actualName = extractActualName(&currStr); + } + return actualName; + } + + static tracy::SourceLocationData* getKernelSourceLocation(const char* kernelName) + { + auto& kernelSrcLoc = PersistentState::Get().kernelSrcLoc; + std::string_view demangledName; + #ifndef _MSC_VER + // TODO(marcos): extractActualNameNested is the main bottleneck right now; + // we need a specialized StringTable mapping from "peristent" kernel names + // (const char*/uintptr_t) to memoized, lazily initialized demangled names + auto& demangledNameTable = PersistentState::Get().demangledNameTable; + std::string demangled = extractActualNameNested(kernelName); + demangledName = demangledNameTable[demangled]; + #else + demangledName = kernelName; + #endif + auto pSrcLoc = kernelSrcLoc.retrieve(demangledName); + if (pSrcLoc == nullptr) { + pSrcLoc = kernelSrcLoc.add(demangledName, TracyFile, TracyLine); + } + return pSrcLoc; + } + + static void DoProcessDeviceEvent(CUpti_Activity *record) + { + static constexpr bool instrument = false; + ZoneNamed(activity, instrument); + + switch (record->kind) + { + case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[kernel]", instrument); + CUpti_ActivityKernel9* kernel9 = (CUpti_ActivityKernel9*) record; + APICallInfo apiCall; + if (!matchActivityToAPICall(kernel9->correlationId, apiCall)) { + return matchError(kernel9->correlationId, "KERNEL"); + } + apiCall.host->EmitGpuZone(apiCall.start, apiCall.end, kernel9->start, kernel9->end, getKernelSourceLocation(kernel9->name), kernel9->contextId, kernel9->streamId); + auto latency_ms = (kernel9->start - apiCall.cupti) / 1'000'000.0; + tracyPlotBlip("Kernel Latency (ms)", kernel9->start, latency_ms); + break; + } + + case CUPTI_ACTIVITY_KIND_MEMCPY: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[memcpy]", instrument); + CUpti_ActivityMemcpy5* memcpy5 = (CUpti_ActivityMemcpy5*) record; + APICallInfo apiCall; + if (!matchActivityToAPICall(memcpy5->correlationId, apiCall)) { + return matchError(memcpy5->correlationId, "MEMCPY"); + } + static constexpr tracy::SourceLocationData TracyCUPTISrcLocDeviceMemcpy { "CUDA::memcpy", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Blue }; + apiCall.host->EmitGpuZone(apiCall.start, apiCall.end, memcpy5->start, memcpy5->end, &TracyCUPTISrcLocDeviceMemcpy, memcpy5->contextId, memcpy5->streamId); + static constexpr const char* graph_name = "CUDA Memory Copy"; + tracyEmitMemAlloc(graph_name, (void*)(uintptr_t)memcpy5->correlationId, memcpy5->bytes, memcpy5->start); + tracyEmitMemFree (graph_name, (void*)(uintptr_t)memcpy5->correlationId, memcpy5->end); + break; + } + + case CUPTI_ACTIVITY_KIND_MEMSET: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[memset]", instrument); + CUpti_ActivityMemset4* memset4 = (CUpti_ActivityMemset4*) record; + APICallInfo apiCall; + if (!matchActivityToAPICall(memset4->correlationId, apiCall)) { + return matchError(memset4->correlationId, "MEMSET"); + } + static constexpr tracy::SourceLocationData TracyCUPTISrcLocDeviceMemset { "CUDA::memset", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Blue }; + apiCall.host->EmitGpuZone(apiCall.start, apiCall.end, memset4->start, memset4->end, &TracyCUPTISrcLocDeviceMemset, memset4->contextId, memset4->streamId); + static constexpr const char* graph_name = "CUDA Memory Set"; + tracyEmitMemAlloc(graph_name, (void*)(uintptr_t)memset4->correlationId, memset4->bytes, memset4->start); + tracyEmitMemFree (graph_name, (void*)(uintptr_t)memset4->correlationId, memset4->end); + break; + } + + case CUPTI_ACTIVITY_KIND_SYNCHRONIZATION: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[sync]", instrument); + CUpti_ActivitySynchronization* synchronization = (CUpti_ActivitySynchronization*) record; + APICallInfo apiCall; + if (!matchActivityToAPICall(synchronization->correlationId, apiCall)) { + return matchError(synchronization->correlationId, "SYNCHRONIZATION"); + } + // NOTE(marcos): synchronization can happen at different levels/objects: + // a. on the entire context : cuCtxSynchronize() -> timeline(ctx,0) + // b. on a specific stream : cuStreamSynchronize() -> timeline(ctx,stream) + // c. on a specific event : cuEventSynchronize() -> timeline(ctx,0xffff) + static constexpr tracy::SourceLocationData TracyCUPTISrcLocContextSynchronization { "CUDA::Context::sync", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Magenta }; + auto* pSrcLoc = &TracyCUPTISrcLocContextSynchronization; + uint32_t cudaContextId = synchronization->contextId; + uint32_t cudaStreamId = 0; + if (synchronization->streamId != CUPTI_SYNCHRONIZATION_INVALID_VALUE) { + static constexpr tracy::SourceLocationData TracyCUPTISrcLocStreamSynchronization{ "CUDA::Stream::sync", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Magenta3 }; + pSrcLoc = &TracyCUPTISrcLocStreamSynchronization; + cudaStreamId = synchronization->streamId; + } + if (synchronization->cudaEventId != CUPTI_SYNCHRONIZATION_INVALID_VALUE) { + static constexpr tracy::SourceLocationData TracyCUPTISrcLocEventSynchronization{ "CUDA::Event::sync", TracyFunction, TracyFile, (uint32_t)TracyLine, tracy::Color::Magenta4 }; + pSrcLoc = &TracyCUPTISrcLocEventSynchronization; + cudaStreamId = 0xFFFFFFFF; + // TODO(marcos): CUpti_ActivitySynchronization2 introduces a new + // field 'cudaEventSyncId' which complements 'cudaEventId' + } + apiCall.host->EmitGpuZone(apiCall.start, apiCall.end, synchronization->start, synchronization->end, pSrcLoc, cudaContextId, cudaStreamId); + static constexpr const char* graph_name = "CUDA Synchronization"; + tracyEmitMemAlloc(graph_name, (void*)(uintptr_t)synchronization->correlationId, 1, synchronization->start); + tracyEmitMemFree (graph_name, (void*)(uintptr_t)synchronization->correlationId, synchronization->end); + break; + } + case CUPTI_ACTIVITY_KIND_MEMORY2: + { + ZoneNamedN(kernel, "tracy::CUDACtx::DoProcessDeviceEvent[malloc/free]", instrument); + CUpti_ActivityMemory3* memory3 = (CUpti_ActivityMemory3*)record; + APICallInfo apiCall; + if (!matchActivityToAPICall(memory3->correlationId, apiCall)) { + return matchError(memory3->correlationId, "MEMORY"); + } + static constexpr const char* graph_name = "CUDA Memory Allocation"; + if (memory3->memoryOperationType == CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_ALLOCATION){ + auto& memAllocAddress = PersistentState::Get().memAllocAddress; + memAllocAddress[memory3->address] = 1; + tracyEmitMemAlloc(graph_name, (void*)memory3->address, memory3->bytes, memory3->timestamp); + } + else if (memory3->memoryOperationType == CUPTI_ACTIVITY_MEMORY_OPERATION_TYPE_RELEASE){ + auto& memAllocAddress = PersistentState::Get().memAllocAddress; + int dontCare; + if (!memAllocAddress.fetch(memory3->address, dontCare)){ + // Note(Frank): This is a hack to handle the case where the memory allocation + // corresponds to the memory release is not found. + // This can happen when the memory is allocated when profiling is not enabled. + matchError(memory3->correlationId, "MEMORY/RELEASE"); + tracyEmitMemAlloc(graph_name, (void*)memory3->address, memory3->bytes, memory3->timestamp); + } else { + memAllocAddress.erase(memory3->address); + } + tracyEmitMemFree(graph_name, (void*)memory3->address, memory3->timestamp); + } + break; + } + case CUPTI_ACTIVITY_KIND_CUDA_EVENT : + { + // NOTE(marcos): a byproduct of CUPTI_ACTIVITY_KIND_SYNCHRONIZATION + // (I think this is related to cudaEvent*() API calls) + CUpti_ActivityCudaEvent2* event = (CUpti_ActivityCudaEvent2*)record; + UNREFERENCED(event); + break; + } + default: + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "Unknown activity record (kind is %d)", record->kind); + TracyMessageC(buffer, strlen(buffer), tracy::Color::Crimson); + break; + } + } + } + + static constexpr CUpti_CallbackDomain domains[] = { + CUPTI_CB_DOMAIN_RUNTIME_API, + CUPTI_CB_DOMAIN_DRIVER_API, + //CUPTI_CB_DOMAIN_RESOURCE, + //CUPTI_CB_DOMAIN_SYNCHRONIZE, + //CUPTI_CB_DOMAIN_NVTX, + //CUPTI_CB_DOMAIN_STATE + }; + + static constexpr CUpti_ActivityKind activities[] = { + //CUPTI_ACTIVITY_KIND_KERNEL, // mutually exclusive with CONCURRENT_KERNEL + CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL, + CUPTI_ACTIVITY_KIND_MEMCPY, + CUPTI_ACTIVITY_KIND_MEMSET, + CUPTI_ACTIVITY_KIND_SYNCHRONIZATION, + CUPTI_ACTIVITY_KIND_MEMORY2, + //CUPTI_ACTIVITY_KIND_MEMCPY2, + //CUPTI_ACTIVITY_KIND_OVERHEAD, + //CUPTI_ACTIVITY_KIND_INTERNAL_LAUNCH_API, + //CUPTI_ACTIVITY_KIND_RUNTIME, + //CUPTI_ACTIVITY_KIND_DRIVER, + }; + + static void BeginInstrumentation(CUDACtx* profilerHost) { + auto& currentProfilerHost = PersistentState::Get().profilerHost; + if (currentProfilerHost != nullptr) { + return; + } + currentProfilerHost = profilerHost; + + // NOTE(frank): full-stop synchronization to ensure we only handle + // CUDA API calls and device activities that happens past this point + cudaDeviceSynchronize(); + + auto& subscriber = PersistentState::Get().subscriber; + CUPTI_API_CALL(cuptiSubscribe(&subscriber, CUPTI::OnCallbackAPI, profilerHost)); + CUPTI_API_CALL(cuptiActivityRegisterCallbacks(CUPTI::OnBufferRequested, CUPTI::OnBufferCompleted)); + for (auto domain : domains) { + CUPTI_API_CALL(cuptiEnableDomain(uint32_t(true), subscriber, domain)); + } + for (auto activity : activities) { + CUPTI_API_CALL(cuptiActivityEnable(activity)); + } + + #if TRACY_CUDA_ENABLE_COLLECTOR_THREAD + auto& collector = PersistentState::Get().collector; + collector.period = 160; + collector.signal.notify_one(); + #endif + } + + static void EndInstrumentation() { + auto& currentProfilerHost = PersistentState::Get().profilerHost; + if (currentProfilerHost == nullptr) { + return; + } + + // NOTE(frank): full-stop synchronization to ensure we catch + // and drain all the activities that has been tracked up to now. + cudaDeviceSynchronize(); + + FlushActivity(); + + auto& subscriber = PersistentState::Get().subscriber; + for (auto activity : activities) { + CUPTI_API_CALL(cuptiActivityDisable(activity)); + } + for (auto domain : domains) { + CUPTI_API_CALL(cuptiEnableDomain(uint32_t(false), subscriber, domain)); + } + // TODO(marcos): is here a counterpart for 'cuptiActivityRegisterCallbacks()'? + CUPTI_API_CALL(cuptiUnsubscribe(subscriber)); + + #if TRACY_CUDA_ENABLE_COLLECTOR_THREAD + auto& collector = PersistentState::Get().collector; + collector.period = ~uint32_t(0); + collector.signal.notify_one(); + #endif + + currentProfilerHost = nullptr; + } + + static void FlushActivity() + { + // NOTE(marcos): only one thread should do the collection at any given time, + // but there's no reason to block threads that are also trying to do the same + static std::mutex m; + if (!m.try_lock()) + return; + std::unique_lock lock (m, std::adopt_lock); + ZoneNamedNC(zone, "cuptiActivityFlushAll", tracy::Color::Red4, true); + CUPTI_API_CALL(cuptiActivityFlushAll(CUPTI_ACTIVITY_FLAG_NONE)); + } + + #if TRACY_CUDA_ENABLE_COLLECTOR_THREAD + // WARN(marcos): technically, CUPTI already offers async flushing of + // activity records through cuptiActivityFlushPeriod(), but I haven't + // had much luck getting reliable, consistent delivery with it... + struct Collector { + std::atomic running = true; + volatile uint32_t period = ~uint32_t(0); + std::mutex mtx; + std::condition_variable signal; + std::thread thread = std::thread( + [this]() { + tracy::SetThreadName("Tracy CUDA Collector"); + atexit([]() { + auto& collector = CUPTI::PersistentState::Get().collector; + collector.running = false; + collector.signal.notify_one(); + collector.thread.join(); + }); + while (running) { + { + std::unique_lock lock(mtx); + signal.wait_for(lock, std::chrono::milliseconds(period)); + } + FlushActivity(); + } + } + ); + }; + #endif + + static void FlushActivityAsync() + { + #if TRACY_CUDA_ENABLE_COLLECTOR_THREAD + ZoneScoped; + auto& collector = PersistentState::Get().collector; + collector.signal.notify_one(); + #endif + } + + struct PersistentState { + // NOTE(marcos): these objects must remain in memory past the application + // returning from main() because the Tracy client worker thread may still + // be responding to string/source-location requests from the server + SourceLocationMap kernelSrcLoc; + StringTable demangledNameTable; + SourceLocationLUT cudaCallSourceLocation; + + // NOTE(marcos): these objects do not need to persist, but their relative + // footprint is trivial enough that we don't care if we let them leak + ConcurrentHashMap cudaCallSiteInfo; + ConcurrentHashMap memAllocAddress; + CUpti_SubscriberHandle subscriber = {}; + CUDACtx* profilerHost = nullptr; + + Collector collector; + + static PersistentState& Get() { + static PersistentState& persistent = *(new PersistentState()); + return persistent; + } + }; + + }; + + CUDACtx(uint8_t gpuContextID = 255) + { + ZoneScoped; + + if (gpuContextID != 255) { + m_tracyGpuContext = gpuContextID; + return; + } + + m_tracyGpuContext = GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed); + assert(m_tracyGpuContext != 255); + + TracyTimestamp tTracy; + CUptiTimestamp tCUpti; + QueryTimestamps(tTracy, tCUpti); + + // Announce to Tracy about a new GPU context/timeline: + auto item = Profiler::QueueSerial(); + tracyMemWrite(item->hdr.type, QueueType::GpuNewContext); + tracyMemWrite(item->gpuNewContext.cpuTime, tTracy); + tracyMemWrite(item->gpuNewContext.gpuTime, (int64_t)tCUpti); // TODO: Be more careful about this cast + tracyMemWrite(item->gpuNewContext.thread, (uint32_t)0); + tracyMemWrite(item->gpuNewContext.period, 1.0f); + tracyMemWrite(item->gpuNewContext.type, GpuContextType::CUDA); + tracyMemWrite(item->gpuNewContext.context, m_tracyGpuContext); + #if TRACY_CUDA_CALIBRATED_CONTEXT + tracyMemWrite(item->gpuNewContext.flags, GpuContextCalibration); + #else + tracyMemWrite(item->gpuNewContext.flags, tracy::GpuContextFlags(0)); + #endif + Profiler::QueueSerialFinish(); + + constexpr const char* tracyCtxName = "CUDA GPU/Device Activity"; + this->Name(tracyCtxName, uint16_t(strlen(tracyCtxName))); + + // NOTE(marcos): a few rounds of calibation amorthized over 1 second + // in order to get a meaningful linear regression estimator + Recalibrate(); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + Recalibrate(); + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + Recalibrate(); + std::this_thread::sleep_for(std::chrono::milliseconds(300)); + Recalibrate(); + std::this_thread::sleep_for(std::chrono::milliseconds(400)); + Recalibrate(); + } + + ~CUDACtx() + { + ZoneScoped; + } + + struct Singleton { + CUDACtx* ctx = nullptr; + std::mutex m; + int ref_count = 0; + uint8_t ctx_id = 255; + static Singleton& Get() { + static Singleton singleton; + return singleton; + } + }; + + #if TRACY_CUDA_ENABLE_CUDA_CALL_STATS + ProfilerStats stats = {}; + #endif + + uint8_t m_tracyGpuContext = 255; + static constexpr size_t cacheline = 64; + alignas(cacheline) std::atomic m_queryIdGen = 0; + }; + +} + +#define TracyCUDAContext() tracy::CUDACtx::Create() +#define TracyCUDAContextDestroy(ctx) tracy::CUDACtx::Destroy(ctx) +#define TracyCUDAContextName(ctx, name, size) ctx->Name(name, size) + +#define TracyCUDAStartProfiling(ctx) ctx->StartProfiling() +#define TracyCUDAStopProfiling(ctx) ctx->StopProfiling() + +#define TracyCUDACollect(ctx) ctx->Collect() + +#endif + +#endif \ No newline at end of file diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyD3D11.hpp b/project/thirdparty/tracy-0.12.0/tracy/TracyD3D11.hpp similarity index 92% rename from project/thirdparty/tracy-0.11.1/tracy/TracyD3D11.hpp rename to project/thirdparty/tracy-0.12.0/tracy/TracyD3D11.hpp index 3ed151bff..acab38316 100644 --- a/project/thirdparty/tracy-0.11.1/tracy/TracyD3D11.hpp +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyD3D11.hpp @@ -95,6 +95,10 @@ class D3D11Ctx int64_t tcpu0 = Profiler::GetTime(); WaitForQuery(m_disjointQuery); + // NOTE: one would expect that by waiting for the enclosing disjoint query to finish, + // all timestamp queries within would also be readily available, but that does not + // seem to be the case here... See https://github.com/wolfpld/tracy/issues/947 + WaitForQuery(m_queries[0]); int64_t tcpu1 = Profiler::GetTime(); D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; @@ -109,7 +113,7 @@ class D3D11Ctx UINT64 timestamp = 0; if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK) - continue; // this should never happen, since the enclosing disjoint query succeeded + continue; // this should never happen (we waited for the query to finish above) tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2; tgpu = timestamp * (1000000000 / disjoint.Frequency); @@ -307,13 +311,21 @@ class D3D11ZoneScope WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); } - tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool active ) + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int32_t depth, bool active ) : D3D11ZoneScope(ctx, active) { if( !m_active ) return; - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcloc)); + if( depth > 0 && has_callstack() ) + { + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcloc)); + } + else + { + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); + } } tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active) @@ -327,15 +339,23 @@ class D3D11ZoneScope WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); } - tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active) + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool active) : D3D11ZoneScope(ctx, active) { if( !m_active ) return; const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + if ( depth > 0 && has_callstack() ) + { + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + } + else + { + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } } tracy_force_inline ~D3D11ZoneScope() diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyD3D12.hpp b/project/thirdparty/tracy-0.12.0/tracy/TracyD3D12.hpp similarity index 99% rename from project/thirdparty/tracy-0.11.1/tracy/TracyD3D12.hpp rename to project/thirdparty/tracy-0.12.0/tracy/TracyD3D12.hpp index 41567937e..d36253d7c 100644 --- a/project/thirdparty/tracy-0.11.1/tracy/TracyD3D12.hpp +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyD3D12.hpp @@ -385,7 +385,7 @@ namespace tracy WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcLocation)); } - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active) + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int32_t depth, bool active) : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; @@ -405,7 +405,7 @@ namespace tracy WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); } - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active) + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int32_t depth, bool active) : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyLua.hpp b/project/thirdparty/tracy-0.12.0/tracy/TracyLua.hpp similarity index 90% rename from project/thirdparty/tracy-0.11.1/tracy/TracyLua.hpp rename to project/thirdparty/tracy-0.12.0/tracy/TracyLua.hpp index 51dead51f..5a51c3b50 100644 --- a/project/thirdparty/tracy-0.11.1/tracy/TracyLua.hpp +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyLua.hpp @@ -120,6 +120,8 @@ static inline void LuaRemove( char* script ) } } +static inline void LuaHook( lua_State* L, lua_Debug* ar ) {} + } #else @@ -439,6 +441,44 @@ static inline void LuaRegister( lua_State* L ) static inline void LuaRemove( char* script ) {} +static inline void LuaHook( lua_State* L, lua_Debug* ar ) +{ + if ( ar->event == LUA_HOOKCALL ) + { +#ifdef TRACY_ON_DEMAND + const auto zoneCnt = GetLuaZoneState().counter++; + if ( zoneCnt != 0 && !GetLuaZoneState().active ) return; + GetLuaZoneState().active = GetProfiler().IsConnected(); + if ( !GetLuaZoneState().active ) return; +#endif + lua_getinfo( L, "Snl", ar ); + + char src[256]; + detail::LuaShortenSrc( src, ar->short_src ); + + const auto srcloc = Profiler::AllocSourceLocation( ar->currentline, src, ar->name ? ar->name : ar->short_src ); + TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); + MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommit( zoneBeginThread ); + } + else if (ar->event == LUA_HOOKRET) { +#ifdef TRACY_ON_DEMAND + assert( GetLuaZoneState().counter != 0 ); + GetLuaZoneState().counter--; + if ( !GetLuaZoneState().active ) return; + if ( !GetProfiler().IsConnected() ) + { + GetLuaZoneState().active = false; + return; + } +#endif + TracyQueuePrepare( QueueType::ZoneEnd ); + MemWrite( &item->zoneEnd.time, Profiler::GetTime() ); + TracyQueueCommit( zoneEndThread ); + } +} + } #endif diff --git a/project/thirdparty/tracy-0.12.0/tracy/TracyMetal.hmm b/project/thirdparty/tracy-0.12.0/tracy/TracyMetal.hmm new file mode 100644 index 000000000..a4b4cb521 --- /dev/null +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyMetal.hmm @@ -0,0 +1,644 @@ +#ifndef __TRACYMETAL_HMM__ +#define __TRACYMETAL_HMM__ + +/* This file implements a Metal API back-end for Tracy (it has only been tested on Apple + Silicon devices, but it should also work on Intel-based Macs and older iOS devices). + The Metal back-end in Tracy operates differently than other GPU back-ends like Vulkan, + Direct3D and OpenGL. Specifically, TracyMetalZone() must be placed around the site where + a command encoder is created. This is because not all hardware supports timestamps at + command granularity, and can only provide timestamps around an entire command encoder. + This accommodates for all tiers of hardware; in the future, variants of TracyMetalZone() + will be added to support the habitual command-level granularity of Tracy GPU back-ends. + Metal also imposes a few restrictions that make the process of requesting and collecting + queries more complicated in Tracy: + a) timestamp query buffers are limited to 4096 queries (32KB, where each query is 8 bytes) + b) when a timestamp query buffer is created, Metal initializes all timestamps with zeroes, + and there's no way to reset them back to zero after timestamps get resolved; the only + way to clear the timestamps is by allocating a new timestamp query buffer + c) if a command encoder records no commands and its corresponding command buffer ends up + committed to the command queue, Metal will "optimize-away" the encoder along with any + timestamp queries associated with it (the timestamp will remain as zero and will never + get resolved) + Because of the limitations above, two timestamp buffers are managed internally. Once one + of the buffers fills up with requests, the second buffer can start serving new requests. + Once all requests in a buffer get resolved and collected, the entire buffer is discarded + and a new one allocated for future requests. (Proper cycling through a ring buffer would + require bookkeeping and completion handlers to collect only the known complete queries.) + In the current implementation, there is potential for a race condition when the buffer is + discarded and reallocated. In practice, the race condition will never materialize so long + as TracyMetalCollect() is called frequently to keep the amount of unresolved queries low. + Finally, there's a timeout mechanism during timestamp collection to detect "empty" command + encoders and ensure progress. +*/ + +#ifndef TRACY_ENABLE + +#define TracyMetalContext(device) nullptr +#define TracyMetalDestroy(ctx) +#define TracyMetalContextName(ctx, name, size) + +#define TracyMetalZone(ctx, encoderDesc, name) +#define TracyMetalZoneC(ctx, encoderDesc, name, color) +#define TracyMetalNamedZone(ctx, varname, encoderDesc, name, active) +#define TracyMetalNamedZoneC(ctx, varname, encoderDesc, name, color, active) + +#define TracyMetalCollect(ctx) + +namespace tracy +{ +class MetalZoneScope {}; +} + +using TracyMetalCtx = void; + +#else + +#if not __has_feature(objc_arc) +#error TracyMetal requires ARC to be enabled. +#endif + +#include +#include +#include + +#include "Tracy.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyAlign.hpp" +#include "../common/TracyAlloc.hpp" + +// ok to import if in obj-c code +#import + +#define TRACY_METAL_VA_ARGS(...) , ##__VA_ARGS__ + +#define TracyMetalPanic(ret, msg, ...) do { \ + char buffer [1024]; \ + snprintf(buffer, sizeof(buffer), "TracyMetal: " msg TRACY_METAL_VA_ARGS(__VA_ARGS__)); \ + TracyMessageC(buffer, strlen(buffer), tracy::Color::OrangeRed); \ + fprintf(stderr, "%s\n", buffer); \ + ret; \ + } while(false); + +#ifndef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT +#define TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT 0.200f +#endif//TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT + +#ifndef TRACY_METAL_DEBUG_MASK +#define TRACY_METAL_DEBUG_MASK (0) +#endif//TRACY_METAL_DEBUG_MASK + +#if TRACY_METAL_DEBUG_MASK + #define TracyMetalDebugMasked(mask, ...) if constexpr (mask & TRACY_METAL_DEBUG_MASK) { __VA_ARGS__; } +#else + #define TracyMetalDebugMasked(mask, ...) +#endif + +#if TRACY_METAL_DEBUG_MASK & (1 << 1) + #define TracyMetalDebug_0b00010(...) __VA_ARGS__; +#else + #define TracyMetalDebug_0b00010(...) +#endif + +#if TRACY_METAL_DEBUG_MASK & (1 << 4) + #define TracyMetalDebug_0b10000(...) __VA_ARGS__; +#else + #define TracyMetalDebug_0b10000(...) +#endif + +#ifndef TracyMetalDebugZoneScopeWireTap +#define TracyMetalDebugZoneScopeWireTap +#endif//TracyMetalDebugZoneScopeWireTap + +namespace tracy +{ + +class MetalCtx +{ + friend class MetalZoneScope; + + enum { MaxQueries = 4 * 1024 }; // Metal: between 8 and 32768 _BYTES_... + +public: + static MetalCtx* Create(id device) + { + ZoneScopedNC("tracy::MetalCtx::Create", Color::Red4); + auto ctx = static_cast(tracy_malloc(sizeof(MetalCtx))); + new (ctx) MetalCtx(device); + if (ctx->m_contextId == 255) + { + TracyMetalPanic({assert(false);} return nullptr, "ERROR: unable to create context."); + Destroy(ctx); + } + return ctx; + } + + static void Destroy(MetalCtx* ctx) + { + ZoneScopedNC("tracy::MetalCtx::Destroy", Color::Red4); + ctx->~MetalCtx(); + tracy_free(ctx); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_contextId ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); + SubmitQueueItem(item); + } + + bool Collect() + { + ZoneScopedNC("tracy::MetalCtx::Collect", Color::Red4); + +#ifdef TRACY_ON_DEMAND + if (!GetProfiler().IsConnected()) + { + return true; + } +#endif + + // Only one thread is allowed to collect timestamps at any given time + // but there's no need to block contending threads + if (!m_collectionMutex.try_lock()) + { + return true; + } + + std::unique_lock lock (m_collectionMutex, std::adopt_lock); + + uintptr_t begin = m_previousCheckpoint.load(); + uintptr_t latestCheckpoint = m_queryCounter.load(); // TODO: MTLEvent? MTLFence?; + TracyMetalDebugMasked(1<<3, ZoneValue(begin)); + TracyMetalDebugMasked(1<<3, ZoneValue(latestCheckpoint)); + + uint32_t count = RingCount(begin, latestCheckpoint); + if (count == 0) // no pending timestamp queries + { + //uintptr_t nextCheckpoint = m_queryCounter.load(); + //if (nextCheckpoint != latestCheckpoint) + //{ + // // TODO: signal event / fence now? + //} + return true; + } + + // resolve up until the ring buffer boundary and let a subsequenty call + // to Collect handle the wrap-around + bool reallocateBuffer = false; + if (RingIndex(begin) + count >= RingSize()) + { + count = RingSize() - RingIndex(begin); + reallocateBuffer = true; + } + TracyMetalDebugMasked(1<<3, ZoneValue(count)); + + auto buffer_idx = (begin / MaxQueries) % 2; + auto counterSampleBuffer = m_counterSampleBuffers[buffer_idx]; + + if (count >= RingSize()) + { + TracyMetalPanic(return false, "Collect: FULL! too many pending timestamp queries. [%llu, %llu] (%u)", begin, latestCheckpoint, count); + } + + TracyMetalDebugMasked(1<<3, TracyMetalPanic(, "Collect: [%llu, %llu] :: (%u)", begin, latestCheckpoint, count)); + + NSRange range = NSMakeRange(RingIndex(begin), count); + NSData* data = [counterSampleBuffer resolveCounterRange:range]; + NSUInteger numResolvedTimestamps = data.length / sizeof(MTLCounterResultTimestamp); + MTLCounterResultTimestamp* timestamps = (MTLCounterResultTimestamp *)(data.bytes); + if (timestamps == nil) + { + TracyMetalPanic(return false, "Collect: unable to resolve timestamps."); + } + + if (numResolvedTimestamps != count) + { + TracyMetalPanic(, "Collect: numResolvedTimestamps != count : %u != %u", (uint32_t)numResolvedTimestamps, count); + } + + int resolved = 0; + for (auto i = 0; i < numResolvedTimestamps; i += 2) + { + TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::[i]") ); + MTLTimestamp t_start = timestamps[i+0].timestamp; + MTLTimestamp t_end = timestamps[i+1].timestamp; + uint32_t k = RingIndex(begin + i); + TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: timestamp[%u] = %llu | timestamp[%u] = %llu | diff = %llu\n", k, t_start, k+1, t_end, (t_end - t_start))); + if ((t_start == MTLCounterErrorValue) || (t_end == MTLCounterErrorValue)) + { + TracyMetalPanic(, "Collect: invalid timestamp (MTLCounterErrorValue) at %u.", k); + break; + } + // Metal will initialize timestamp buffer with zeroes; encountering a zero-value + // timestamp means that the timestamp has not been written and resolved yet + if ((t_start == 0) || (t_end == 0)) + { + auto checkTime = std::chrono::high_resolution_clock::now(); + auto requestTime = m_timestampRequestTime[k]; + auto ms_in_flight = std::chrono::duration(checkTime-requestTime).count()*1000.0f; + TracyMetalDebugMasked(1<<4, TracyMetalPanic(, "Collect: invalid timestamp (zero) at %u [%.0fms in flight].", k, ms_in_flight)); + const float timeout_ms = TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT * 1000.0f; + if (ms_in_flight < timeout_ms) + break; + TracyMetalDebug_0b10000( ZoneScopedN("tracy::MetalCtx::Collect::Drop") ); + TracyMetalPanic(, "Collect: giving up on timestamp at %u [%.0fms in flight].", k, ms_in_flight); + t_start = m_mostRecentTimestamp + 5; + t_end = t_start + 5; + } + TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+0), "TracyMetalGpuZone")); + TracyMetalDebugMasked(1<<2, TracyFreeN((void*)(uintptr_t)(k+1), "TracyMetalGpuZone")); + { + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(t_start)); + MemWrite(&item->gpuTime.queryId, static_cast(k)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + { + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(t_end)); + MemWrite(&item->gpuTime.queryId, static_cast(k+1)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + m_mostRecentTimestamp = (t_end > m_mostRecentTimestamp) ? t_end : m_mostRecentTimestamp; + TracyMetalDebugMasked(1<<1, TracyFreeN((void*)(uintptr_t)k, "TracyMetalTimestampQueryId")); + resolved += 2; + } + TracyMetalDebugMasked(1<<3, ZoneValue(RingCount(begin, m_previousCheckpoint.load()))); + + m_previousCheckpoint += resolved; + + // Check whether the timestamp buffer has been fully resolved/collected: + // WARN: there's technically a race condition here: NextQuery() may reference the + // buffer that is being released instead of the new one. In practice, this should + // never happen so long as Collect is called frequently enough to prevent pending + // timestamp query requests from piling up too quickly. + if ((resolved == count) && (m_previousCheckpoint.load() % MaxQueries) == 0) + { + m_counterSampleBuffers[buffer_idx] = NewTimestampSampleBuffer(m_device, MaxQueries); + } + + //RecalibrateClocks(); // to account for drift + + return true; + } + +private: + MetalCtx(id device) + : m_device(device) + { + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterErrorValue = 0x%llx", MTLCounterErrorValue)); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "MTLCounterDontSample = 0x%llx", MTLCounterDontSample)); + + if (m_device == nil) + { + TracyMetalPanic({assert(false);} return, "device is nil."); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtStageBoundary]) + { + TracyMetalPanic({assert(false);} return, "ERROR: timestamp sampling at pipeline stage boundary is not supported."); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDrawBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at draw call boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtBlitBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at blit boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtDispatchBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at compute dispatch boundary is not supported.\n")); + } + if (![m_device supportsCounterSampling:MTLCounterSamplingPointAtTileDispatchBoundary]) + { + TracyMetalDebugMasked(1<<0, fprintf(stderr, "WARNING: timestamp sampling at tile dispatch boundary is not supported.\n")); + } + + m_counterSampleBuffers[0] = NewTimestampSampleBuffer(m_device, MaxQueries); + m_counterSampleBuffers[1] = NewTimestampSampleBuffer(m_device, MaxQueries); + + m_timestampRequestTime.resize(MaxQueries); + + MTLTimestamp cpuTimestamp = 0; + MTLTimestamp gpuTimestamp = 0; + [m_device sampleTimestamps:&cpuTimestamp gpuTimestamp:&gpuTimestamp]; + m_mostRecentTimestamp = gpuTimestamp; + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Metal): %llu", cpuTimestamp)); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: GPU timestamp (Metal): %llu", gpuTimestamp)); + + cpuTimestamp = Profiler::GetTime(); + TracyMetalDebugMasked(1<<0, TracyMetalPanic(, "Calibration: CPU timestamp (Tracy): %llu", cpuTimestamp)); + + float period = 1.0f; + + m_contextId = GetGpuCtxCounter().fetch_add(1); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuNewContext); + MemWrite(&item->gpuNewContext.cpuTime, int64_t(cpuTimestamp)); + MemWrite(&item->gpuNewContext.gpuTime, int64_t(gpuTimestamp)); + MemWrite(&item->gpuNewContext.thread, uint32_t(0)); // TODO: why not GetThreadHandle()? + MemWrite(&item->gpuNewContext.period, period); + MemWrite(&item->gpuNewContext.context, m_contextId); + //MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); + MemWrite(&item->gpuNewContext.flags, GpuContextFlags(0)); + MemWrite(&item->gpuNewContext.type, GpuContextType::Metal); + SubmitQueueItem(item); + } + + ~MetalCtx() + { + // collect the last remnants of Metal GPU activity... + // TODO: add a timeout to this loop? + while (m_previousCheckpoint.load() != m_queryCounter.load()) + Collect(); + } + + tracy_force_inline void SubmitQueueItem(QueueItem* item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + tracy_force_inline uint32_t RingIndex(uintptr_t index) + { + index %= MaxQueries; + return static_cast(index); + } + + tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) + { + // wrap-around safe: all unsigned + uintptr_t count = end - begin; + return static_cast(count); + } + + tracy_force_inline uint32_t RingSize() const + { + return MaxQueries; + } + + struct Query { id buffer; uint32_t idx; }; + + tracy_force_inline Query NextQuery() + { + TracyMetalDebug_0b00010( ZoneScopedNC("Tracy::MetalCtx::NextQuery", tracy::Color::LightCoral) ); + auto id = m_queryCounter.fetch_add(2); + TracyMetalDebug_0b00010( ZoneValue(id) ); + auto count = RingCount(m_previousCheckpoint, id); + if (count >= MaxQueries) + { + // TODO: return a proper (hidden) "sentinel" query + Query sentinel = Query{ m_counterSampleBuffers[1], MaxQueries-2 }; + TracyMetalPanic( + return sentinel, + "NextQueryId: FULL! too many pending timestamp queries. Consider calling TracyMetalCollect() more frequently. [%llu, %llu] (%u)", + m_previousCheckpoint.load(), id, count + ); + } + uint32_t buffer_idx = (id / MaxQueries) % 2; + TracyMetalDebug_0b00010( ZoneValue(buffer_idx) ); + auto buffer = m_counterSampleBuffers[buffer_idx]; + if (buffer == nil) + TracyMetalPanic(, "NextQueryId: sample buffer is nil! (id=%llu)", id); + uint32_t idx = RingIndex(id); + TracyMetalDebug_0b00010( ZoneValue(idx) ); + TracyMetalDebug_0b00010( TracyAllocN((void*)(uintptr_t)idx, 2, "TracyMetalTimestampQueryId") ); + m_timestampRequestTime[idx] = std::chrono::high_resolution_clock::now(); + return Query{ buffer, idx }; + } + + tracy_force_inline uint8_t GetContextId() const + { + return m_contextId; + } + + static id NewTimestampSampleBuffer(id device, size_t count) + { + ZoneScopedN("tracy::MetalCtx::NewTimestampSampleBuffer"); + + id timestampCounterSet = nil; + for (id counterSet in device.counterSets) + { + if ([counterSet.name isEqualToString:MTLCommonCounterSetTimestamp]) + { + timestampCounterSet = counterSet; + break; + } + } + if (timestampCounterSet == nil) + { + TracyMetalPanic({assert(false);} return nil, "ERROR: timestamp counters are not supported on the platform."); + } + + MTLCounterSampleBufferDescriptor* sampleDescriptor = [[MTLCounterSampleBufferDescriptor alloc] init]; + sampleDescriptor.counterSet = timestampCounterSet; + sampleDescriptor.sampleCount = MaxQueries; + sampleDescriptor.storageMode = MTLStorageModeShared; + sampleDescriptor.label = @"TracyMetalTimestampPool"; + + NSError* error = nil; + id counterSampleBuffer = [device newCounterSampleBufferWithDescriptor:sampleDescriptor error:&error]; + if (error != nil) + { + //NSLog(@"%@ | %@", error.localizedDescription, error.localizedFailureReason); + TracyMetalPanic({assert(false);} return nil, + "ERROR: unable to create sample buffer for timestamp counters : %s | %s", + [error.localizedDescription cString], [error.localizedFailureReason cString]); + } + + return counterSampleBuffer; + } + + uint8_t m_contextId = 255; + + id m_device = nil; + id m_counterSampleBuffers [2] = {}; + + using atomic_counter = std::atomic; + static_assert(atomic_counter::is_always_lock_free); + atomic_counter m_queryCounter = 0; + + atomic_counter m_previousCheckpoint = 0; + MTLTimestamp m_mostRecentTimestamp = 0; + + std::vector m_timestampRequestTime; + + std::mutex m_collectionMutex; +}; + +class MetalZoneScope +{ +public: + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLComputePassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false);} return, "compute pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLBlitPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false); }return, "blit pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfEncoderSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfEncoderSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + tracy_force_inline MetalZoneScope( MetalCtx* ctx, MTLRenderPassDescriptor* desc, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if ( !m_active ) return; + if (desc == nil) TracyMetalPanic({assert(false);} return, "render pass descriptor is nil."); + m_ctx = ctx; + + auto& query = m_query = ctx->NextQuery(); + + desc.sampleBufferAttachments[0].sampleBuffer = query.buffer; + desc.sampleBufferAttachments[0].startOfVertexSampleIndex = query.idx+0; + desc.sampleBufferAttachments[0].endOfVertexSampleIndex = MTLCounterDontSample; + desc.sampleBufferAttachments[0].startOfFragmentSampleIndex = MTLCounterDontSample; + desc.sampleBufferAttachments[0].endOfFragmentSampleIndex = query.idx+1; + + SubmitZoneBeginGpu(ctx, query.idx + 0, srcloc); + } + + /* TODO: implement this constructor interfarce for "command-level" profiling, if the device supports it + tracy_force_inline MetalZoneScope( MetalCtx* ctx, id cmdEncoder, const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_ctx = ctx; + m_cmdEncoder = cmdEncoder; + + auto& query = m_query = ctx->NextQueryId(); + + [m_cmdEncoder sampleCountersInBuffer:m_ctx->m_counterSampleBuffer atSampleIndex:query.idx withBarrier:YES]; + + SubmitZoneBeginGpu(ctx, query.idx, srcloc); + } + */ + + tracy_force_inline ~MetalZoneScope() + { + if( !m_active ) return; + + SubmitZoneEndGpu(m_ctx, m_query.idx + 1); + } + + TracyMetalDebugZoneScopeWireTap; + +private: + const bool m_active; + + MetalCtx* m_ctx; + + /* TODO: declare it for "command-level" profiling + id m_cmdEncoder; + */ + + static void SubmitZoneBeginGpu(MetalCtx* ctx, uint32_t queryId, const SourceLocationData* srcloc) + { + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + + TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); + } + + static void SubmitZoneEndGpu(MetalCtx* ctx, uint32_t queryId) + { + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + + TracyMetalDebugMasked(1<<2, TracyAllocN((void*)(uintptr_t)queryId, 1, "TracyMetalGpuZone")); + } + + MetalCtx::Query m_query = {}; +}; + +} + +using TracyMetalCtx = tracy::MetalCtx; + +#define TracyMetalContext(device) tracy::MetalCtx::Create(device) +#define TracyMetalDestroy(ctx) tracy::MetalCtx::Destroy(ctx) +#define TracyMetalContextName(ctx, name, size) ctx->Name(name, size) + +#define TracyMetalZone( ctx, encoderDesc, name ) TracyMetalNamedZone( ctx, ___tracy_gpu_zone, encoderDesc, name, true ) +#define TracyMetalZoneC( ctx, encoderDesc, name, color ) TracyMetalNamedZoneC( ctx, ___tracy_gpu_zone, encoderDesc, name, color, true ) +#define TracyMetalNamedZone( ctx, varname, encoderDesc, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); +#define TracyMetalNamedZoneC( ctx, varname, encoderDesc, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::MetalZoneScope varname( ctx, encoderDesc, &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); + +#define TracyMetalCollect( ctx ) ctx->Collect(); + + + +#undef TracyMetalDebug_ZoneScopeWireTap +#undef TracyMetalDebug_0b00010 +#undef TracyMetalDebug_0b10000 +#undef TracyMetalDebugMasked +#undef TRACY_METAL_DEBUG_MASK +#undef TRACY_METAL_TIMESTAMP_COLLECT_TIMEOUT +#undef TracyMetalPanic +#undef TRACY_METAL_VA_ARGS + +#endif + +#endif//__TRACYMETAL_HMM__ diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyOpenCL.hpp b/project/thirdparty/tracy-0.12.0/tracy/TracyOpenCL.hpp similarity index 99% rename from project/thirdparty/tracy-0.11.1/tracy/TracyOpenCL.hpp rename to project/thirdparty/tracy-0.12.0/tracy/TracyOpenCL.hpp index 20d0a7cab..ede5c4613 100644 --- a/project/thirdparty/tracy-0.11.1/tracy/TracyOpenCL.hpp +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyOpenCL.hpp @@ -255,7 +255,7 @@ namespace tracy { Profiler::QueueSerialFinish(); } - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active) + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int32_t depth, bool is_active) #ifdef TRACY_ON_DEMAND : m_active(is_active&& GetProfiler().IsConnected()) #else @@ -304,7 +304,7 @@ namespace tracy { Profiler::QueueSerialFinish(); } - tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active) + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active) #ifdef TRACY_ON_DEMAND : m_active(is_active && GetProfiler().IsConnected()) #else diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyOpenGL.hpp b/project/thirdparty/tracy-0.12.0/tracy/TracyOpenGL.hpp similarity index 98% rename from project/thirdparty/tracy-0.11.1/tracy/TracyOpenGL.hpp rename to project/thirdparty/tracy-0.12.0/tracy/TracyOpenGL.hpp index 3bdadccee..30abd4fd0 100644 --- a/project/thirdparty/tracy-0.11.1/tracy/TracyOpenGL.hpp +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyOpenGL.hpp @@ -25,7 +25,7 @@ class GpuCtxScope { public: GpuCtxScope( const SourceLocationData*, bool ) {} - GpuCtxScope( const SourceLocationData*, int, bool ) {} + GpuCtxScope( const SourceLocationData*, int32_t, bool ) {} }; } @@ -222,7 +222,7 @@ class GpuCtxScope TracyLfqCommit; } - tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active ) + tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -271,7 +271,7 @@ class GpuCtxScope TracyLfqCommit; } - tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active ) + tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyVulkan.hpp b/project/thirdparty/tracy-0.12.0/tracy/TracyVulkan.hpp similarity index 97% rename from project/thirdparty/tracy-0.11.1/tracy/TracyVulkan.hpp rename to project/thirdparty/tracy-0.12.0/tracy/TracyVulkan.hpp index c34b71852..72643188f 100644 --- a/project/thirdparty/tracy-0.11.1/tracy/TracyVulkan.hpp +++ b/project/thirdparty/tracy-0.12.0/tracy/TracyVulkan.hpp @@ -265,7 +265,7 @@ class VkCtx } #endif assert( head > m_tail ); - + const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount ); unsigned int cnt; @@ -531,7 +531,7 @@ class VkCtxScope Profiler::QueueSerialFinish(); } - tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active ) + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -545,8 +545,17 @@ class VkCtxScope const auto queryId = ctx->NextQueryId(); CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); - auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); + QueueItem *item; + if( depth > 0 && has_callstack() ) + { + item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); + } + else + { + item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + } MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); @@ -580,7 +589,7 @@ class VkCtxScope Profiler::QueueSerialFinish(); } - tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active ) + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int32_t depth, bool is_active ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) #else @@ -595,8 +604,17 @@ class VkCtxScope CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); - auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); - MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); + QueueItem *item; + if( depth > 0 && has_callstack() ) + { + item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); + } + else + { + item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); + } MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); diff --git a/toolchain/haxe-target.xml b/toolchain/haxe-target.xml index 37e0ffc20..2cbd0d05a 100644 --- a/toolchain/haxe-target.xml +++ b/toolchain/haxe-target.xml @@ -140,7 +140,7 @@ - +