Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@
url = https://github.com/pybind/pybind11.git
[submodule "external/gotcha"]
path = external/gotcha
url = https://jrmadsen@github.com/jrmadsen/GOTCHA
url = https://github.com/ROCm/GOTCHA
3 changes: 3 additions & 0 deletions benchmark/source/bin/mandelbrot/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ set(CMAKE_HIP_STANDARD 17)
set(CMAKE_HIP_EXTENSIONS OFF)
set(CMAKE_HIP_STANDARD_REQUIRED ON)

find_package(rocprofiler-sdk-roctx REQUIRED)

set_source_files_properties(mandelbrot.cpp PROPERTIES LANGUAGE HIP)
set_source_files_properties(utils.cpp PROPERTIES LANGUAGE HIP)

Expand All @@ -37,6 +39,7 @@ target_sources(mandelbrot PRIVATE mandelbrot.cpp utils.cpp)
target_compile_options(mandelbrot PRIVATE -W -Wall -Wextra -Wpedantic -Werror
-ffp-contract=fast)
target_include_directories(mandelbrot PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(mandelbrot PRIVATE rocprofiler-sdk-roctx::rocprofiler-sdk-roctx)

install(
TARGETS mandelbrot
Expand Down
33 changes: 33 additions & 0 deletions benchmark/source/bin/mandelbrot/mandelbrot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

#include "utils.hpp"

#include <rocprofiler-sdk-roctx/roctx.h>

#include <hip/hip_vector_types.h>
#include <hip/math_functions.h>
#include <omp.h>
Expand Down Expand Up @@ -473,6 +475,19 @@ hipPerfMandelBrot::printResults()
std::cout << std::endl;
}

struct roctx_range
{
template <typename... Args>
roctx_range(Args&&... args)
{
auto _ss = std::stringstream{};
((_ss << args), ...);
roctxRangePush(_ss.str().c_str());
}

~roctx_range() { roctxRangePop(); }
};

// Wrappers for the kernel launches
void
hipPerfMandelBrot::float_mad(uint* out,
Expand All @@ -487,6 +502,9 @@ hipPerfMandelBrot::float_mad(uint* out,
int threads_per_block,
int kernelCnt)
{
auto _range =
roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"};

int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mad_kernel<float>,
dim3(blocks),
Expand Down Expand Up @@ -515,6 +533,9 @@ hipPerfMandelBrot::float_mandel_unroll(uint* out,
int threads_per_block,
int kernelCnt)
{
auto _range =
roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"};

int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mandel_unroll_kernel<float>,
dim3(blocks),
Expand Down Expand Up @@ -543,6 +564,9 @@ hipPerfMandelBrot::double_mad(uint* out,
int threads_per_block,
int kernelCnt)
{
auto _range =
roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"};

int streamCnt = getNumStreams();
hipLaunchKernelGGL(double_mad_kernel<double>,
dim3(blocks),
Expand Down Expand Up @@ -571,6 +595,9 @@ hipPerfMandelBrot::double_mandel_unroll(uint* out,
int threads_per_block,
int kernelCnt)
{
auto _range =
roctx_range{__FUNCTION__, "(streams=", getNumStreams(), ", kernels=", kernelCnt, ")"};

int streamCnt = getNumStreams();
hipLaunchKernelGGL(float_mandel_unroll_kernel<double>,
dim3(blocks),
Expand All @@ -589,6 +616,8 @@ hipPerfMandelBrot::double_mandel_unroll(uint* out,
void
hipPerfMandelBrot::run(unsigned int testCase, unsigned int /* deviceId */)
{
auto _run_range = roctx_range{__FUNCTION__, "(testCase=", testCase, ")"};

unsigned int numStreams = getNumStreams();
coordIdx = testCase % numCoords;

Expand Down Expand Up @@ -667,6 +696,8 @@ hipPerfMandelBrot::run(unsigned int testCase, unsigned int /* deviceId */)

for(unsigned int k = 0; k < numLoops; k++)
{
auto _loop_range = roctx_range{__FUNCTION__, "(testCase=", testCase, ") :: loop #", k};

if((testCase == 0 || testCase == 1 || testCase == 2 || testCase == 5 || testCase == 6 ||
testCase == 7 || testCase == 10 || testCase == 11 || testCase == 12))
{
Expand Down Expand Up @@ -805,6 +836,8 @@ hipPerfMandelBrot::checkData(uint* ptr)
int
main(int argc, char* argv[])
{
auto _range = roctx_range{argv[0]};

// Default values for kernels and streams
unsigned int numStreamsWarmup = 1, numKernelsWarmup = 1;
unsigned int numStreamsSync = 1, numKernelsSync = 1;
Expand Down
10 changes: 6 additions & 4 deletions cmake/rocprofiler_build_settings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,12 @@ rocprofiler_target_compile_options(
rocprofiler-sdk-debug-flags INTERFACE "-g3" "-fno-omit-frame-pointer"
"-fno-optimize-sibling-calls")

target_compile_options(
rocprofiler-sdk-debug-flags
INTERFACE $<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:GNU>:-rdynamic>>
$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:GNU>:-rdynamic>>)
if(NOT ROCPROFILER_ENABLE_CLANG_TIDY)
target_compile_options(
rocprofiler-sdk-debug-flags
INTERFACE $<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:GNU>:-rdynamic>>
$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:GNU>:-rdynamic>>)
endif()

if(NOT APPLE AND NOT ROCPROFILER_ENABLE_CLANG_TIDY)
target_link_options(rocprofiler-sdk-debug-flags INTERFACE
Expand Down
32 changes: 32 additions & 0 deletions cmake/rocprofiler_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ rocprofiler_add_option(
"Use (internal) <rocprofiler-sdk/rccl/details/api_trace.h> instead of RCCL-provided <rccl/amd_detail/api_trace.h>. Note: this should never be used in production"
OFF
ADVANCED)
rocprofiler_add_option(
ROCPROFILER_BUILD_PYTHON
"Enable building the Python bindings for roctx and rocpd. Note: this should not be disabled unless absolutely necessary"
ON
ADVANCED)

rocprofiler_add_option(
ROCPROFILER_BUILD_GHC_FS
Expand Down Expand Up @@ -162,3 +167,30 @@ set(ROCPROFILER_DEFAULT_FAIL_REGEX
# this should be defaulted to OFF by ROCm 7.0.1 or 7.1 this should only used to disable
# sample tests in extreme circumstances
option(ROCPROFILER_DISABLE_UNSTABLE_CTESTS "Disable unstable tests" ON)

if(ROCPROFILER_BUILD_PYTHON)
# make sure we have all python version candidates
set(ROCPROFILER_PYTHON_VERSION_CANDIDATES
"3.20;3.19;3.18;3.17;3.16;3.15;3.14;3.13;3.12;3.11;3.10;3.9;3.8;3.7;3.6"
CACHE STRING "Python versions to search for, newest first")

if(NOT ROCPROFILER_PYTHON_VERSIONS)
unset(ROCPROFILER_PYTHON_VERSIONS CACHE)
rocprofiler_get_default_python_versions(DEFAULT_PYTHON_VERSIONS)
set(ROCPROFILER_PYTHON_VERSIONS
"${DEFAULT_PYTHON_VERSIONS}"
CACHE STRING "")
endif()

if(NOT ROCPROFILER_PYTHON_VERSIONS)
message(
FATAL_ERROR
"No python3 versions found for building rocprofiler-sdk Python bindings. Either install Python3 development package(s) (i.e. Python.h + python library) or set ROCPROFILER_BUILD_PYTHON=OFF"
)
endif()
else()
set(ROCPROFILER_PYTHON_VERSIONS "")
endif()

rocprofiler_add_feature(ROCPROFILER_PYTHON_VERSIONS
"ROCTx and ROCpd Python bindings build versions")
75 changes: 75 additions & 0 deletions cmake/rocprofiler_utilities.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1026,4 +1026,79 @@ function(rocprofiler_install_env_setup_files)
COMPONENT ${RIEF_COMPONENT})
endfunction()

macro(rocprofiler_reset_python3_cache)
foreach(
_VAR
_Python3_Compiler_REASON_FAILURE
_Python3_Development_REASON_FAILURE
_Python3_EXECUTABLE
_Python3_INCLUDE_DIR
_Python3_INTERPRETER_PROPERTIES
_Python3_INTERPRETER_SIGNATURE
_Python3_LIBRARY_RELEASE
_Python3_NumPy_REASON_FAILURE
Python3_EXECUTABLE
Python3_INCLUDE_DIR
Python3_INTERPRETER_ID
Python3_STDLIB
Python3_STDARCH
Python3_SITELIB
Python3_SOABI
${ARGN})
unset(${_VAR} CACHE)
unset(${_VAR})
endforeach()
endmacro()

macro(rocprofiler_find_python3 _VERSION)
rocprofiler_reset_python3_cache()

if("${_VERSION}" MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$")
find_package(Python3 ${_VERSION} EXACT ${ARGN} REQUIRED MODULE
COMPONENTS Interpreter Development)
elseif("${_VERSION}" MATCHES "^([0-9]+)\\.([0-9]+)$")
find_package(Python3 ${_VERSION}.0...${_VERSION}.999 ${ARGN} REQUIRED MODULE
COMPONENTS Interpreter Development)
else()
message(
FATAL_ERROR
"Invalid Python3 version (${_VERSION}). Specify <MAJOR>.<MINOR> or <MAJOR>.<MINOR>.<PATCH>"
)
endif()
endmacro()

function(rocprofiler_get_default_python_versions _VAR)
set(_PYTHON_FOUND_VERSIONS)

foreach(_VER IN LISTS ROCPROFILER_PYTHON_VERSION_CANDIDATES)
rocprofiler_reset_python3_cache()
find_package(Python3 ${_VER} EXACT QUIET COMPONENTS Interpreter Development)

if(Python3_FOUND)
list(APPEND _PYTHON_FOUND_VERSIONS
"${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}")
endif()
endforeach()

# If none found, do one last check for 3.6 (no EXACT)
if(NOT _PYTHON_FOUND_VERSIONS)
rocprofiler_reset_python3_cache()
find_package(Python3 3.6 COMPONENTS Interpreter Development)

if(Python3_FOUND)
list(APPEND _PYTHON_FOUND_VERSIONS
"${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}")
endif()
endif()

# Set the output variable to the first found version, if any
if(_PYTHON_FOUND_VERSIONS)
set(${_VAR}
"${_PYTHON_FOUND_VERSIONS}"
PARENT_SCOPE)
endif()

rocprofiler_reset_python3_cache()
endfunction()

cmake_policy(POP)
2 changes: 2 additions & 0 deletions external/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ target_sources(
rocprofiler-sdk-perfetto-static-library
PRIVATE ${PROJECT_SOURCE_DIR}/external/perfetto/sdk/perfetto.h
${PROJECT_SOURCE_DIR}/external/perfetto/sdk/perfetto.cc)
target_compile_definitions(rocprofiler-sdk-perfetto-static-library
PRIVATE $<BUILD_INTERFACE:PERFETTO_DISABLE_LOG=1>)
target_include_directories(
rocprofiler-sdk-perfetto-static-library SYSTEM
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/external/perfetto/sdk>)
Expand Down
2 changes: 1 addition & 1 deletion source/include/rocprofiler-sdk-rocpd/sql.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ typedef enum ROCPD_EXPERIMENTAL rocpd_sql_schema_kind_t
ROCPD_SQL_SCHEMA_ROCPD_VIEWS,
ROCPD_SQL_SCHEMA_ROCPD_DATA_VIEWS,
ROCPD_SQL_SCHEMA_ROCPD_SUMMARY_VIEWS,
ROCPD_SQL_SCHEMA_ROCPD_MARKER_VIEWS,
ROCPD_SQL_SCHEMA_ROCPD_METADATA,
ROCPD_SQL_SCHEMA_LAST,
} rocpd_sql_schema_kind_t;

Expand Down
4 changes: 2 additions & 2 deletions source/lib/common/logging.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@

#if defined(ROCPROFILER_CI)
# define ROCP_CI_LOG_IF(NON_CI_LEVEL, ...) ROCP_FATAL_IF(__VA_ARGS__)
# define ROCP_CI_LOG(NON_CI_LEVEL, ...) ROCP_FATAL
# define ROCP_CI_LOG(NON_CI_LEVEL) ROCP_FATAL
#else
# define ROCP_CI_LOG_IF(NON_CI_LEVEL, ...) ROCP_##NON_CI_LEVEL##_IF(__VA_ARGS__)
# define ROCP_CI_LOG(NON_CI_LEVEL, ...) ROCP_##NON_CI_LEVEL
# define ROCP_CI_LOG(NON_CI_LEVEL) ROCP_##NON_CI_LEVEL
#endif

namespace rocprofiler
Expand Down
60 changes: 55 additions & 5 deletions source/lib/common/simple_timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "lib/common/simple_timer.hpp"
#include "lib/common/logging.hpp"
#include "lib/common/synchronized.hpp"

#include <fmt/format.h>

Expand All @@ -35,14 +36,16 @@ namespace rocprofiler
{
namespace common
{
simple_timer::simple_timer(std::string&& label)
simple_timer::simple_timer(std::string&& label, int log_level)
: m_label{std::move(label)}
, m_log_level{log_level}
{
start();
}

simple_timer::simple_timer(std::string&& label, defer_start)
simple_timer::simple_timer(std::string&& label, defer_start, int log_level)
: m_label{std::move(label)}
, m_log_level{log_level}
{}

simple_timer::~simple_timer()
Expand All @@ -52,19 +55,21 @@ simple_timer::~simple_timer()
else if(m_end <= m_beg)
stop();

ROCP_WARNING << fmt::format("{} :: {:12.6f} sec", m_label, get());
report();
}

void
simple_timer&
simple_timer::start()
{
m_beg = clock_type::now();
return *this;
}

void
simple_timer&
simple_timer::stop()
{
m_end = clock_type::now();
return *this;
}

double
Expand All @@ -81,6 +86,51 @@ simple_timer::get_nsec() const
return std::chrono::duration_cast<std::chrono::nanoseconds>(m_end - m_beg).count();
}

simple_timer&
simple_timer::report()
{
static auto max_width = Synchronized<uint64_t>{0};
max_width.wlock(
[](auto& _max_width, auto _w) {
if(_w > _max_width) _max_width = _w;
if(_max_width > 120) _max_width = 120;
},
m_label.size());

auto _width = max_width.get();

switch(m_log_level)
{
case ROCP_LOG_LEVEL_WARNING:
{
ROCP_WARNING << fmt::format("{:<{}} :: {:12.6f} sec", m_label, _width, get());
break;
}
case ROCP_LOG_LEVEL_ERROR:
{
ROCP_ERROR << fmt::format("{:<{}} :: {:12.6f} sec", m_label, _width, get());
break;
}
case ROCP_LOG_LEVEL_INFO:
{
ROCP_INFO << fmt::format("{:<{}} :: {:12.6f} sec", m_label, _width, get());
break;
}
case ROCP_LOG_LEVEL_TRACE:
{
ROCP_TRACE << fmt::format("{:<{}} :: {:12.6f} sec", m_label, _width, get());
break;
}
case ROCP_LOG_LEVEL_NONE:
default:
{
break;
}
}

return *this;
}

std::ostream&
operator<<(std::ostream& _os, const simple_timer& _val)
{
Expand Down
Loading
Loading