Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
21c9f8d
migrate CSV to SDK method of writing CSVs
a-canadasruiz Jul 10, 2025
e6c9dde
Fix review comments and formatting
yhuiYH Jul 17, 2025
daefb04
replace azure runners with internal (#518)
jbonnell-amd Jul 17, 2025
881aceb
[SDK] Update UUID (`rocprofiler_uuid_t`) (#390)
SrirakshaNag Jul 18, 2025
5853dc2
[SDK] Fix null handles (#474)
jrmadsen Jul 18, 2025
ee6a176
[Stochastic PC Sampling][GFX9] Increasing ROCr buffer to 4MBs. (#487)
vlaindic Jul 18, 2025
55420c2
Adding inline callstack information to disassembly (#468)
Jul 22, 2025
e08d47e
MI350 stochastic support in changelog (#530)
vlaindic Jul 22, 2025
b32c1fd
[rocprofv3-avail] - Add sample data (#514)
SrirakshaNag Jul 22, 2025
3be363d
Update output file fields docs to correctly define Grid_Size (#526)
harkgill-amd Jul 22, 2025
3d8d190
[SDK] Expose counter dims in rocprofiler_counter_info_v1_t and only s…
venkat1361 Jul 22, 2025
a88cd74
Thread trace and Trace Decoder API tests and samples (#416)
Jul 23, 2025
a83efc1
[SDK] Fix context tracing domain bitset overflow (#536)
MythreyaK Jul 23, 2025
0ded7f0
[rocprofv3] rocpd doesn't generate output files for counter collectio…
srihari-uttanur Jul 24, 2025
3da3f57
[CMake] Fix thread trace sample ENVIRONMENT test property (#544)
jrmadsen Jul 24, 2025
2c3528e
[rocpd] Adding summary module to generate summaries from rocpd databa…
yhuiYH Jul 24, 2025
afed510
PCS test: cast agent name to str (#546)
vlaindic Jul 29, 2025
aa479ef
Removing ATT buffer size limitation (#534)
Jul 29, 2025
6d9f289
SWDEV-544115 Adding documentation for rocprofv3 advanced options (#516)
bgopesh Jul 30, 2025
9f145ed
use new azure runners (#533)
jbonnell-amd Jul 30, 2025
a3e96b7
[Samples] Remove thread trace sample dependency on rocprofiler-sdk-am…
venkat1361 Jul 31, 2025
3a7ff6c
Adding MI100 counters (#501)
Sushma1203 Jul 31, 2025
9f08aa1
migrate CSV to SDK method of writing CSVs
a-canadasruiz Jul 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/code_coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ jobs:
strategy:
# fail-fast: false
matrix:
runner: ['rocprof-azure']
runner: ['mi300x']
os: ['ubuntu-22.04']
build-type: ['Release']

runs-on: ${{ matrix.runner }}-emu-runner-set
runs-on: rocprof-ubuntu-22

# define this for containers
env:
Expand Down
51 changes: 26 additions & 25 deletions .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,13 @@ jobs:
strategy:
fail-fast: false
matrix:
runner: ['navi3', 'navi4', 'mi3xx']
os: ['ubuntu-22.04']
build-type: ['RelWithDebInfo']
ci-flags: ['--linter clang-tidy']
system: [
{gpu: 'navi3', runner: 'navi3-emu-runner-set', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', ci-flags: '--linter clang-tidy'},
{gpu: 'navi4', runner: 'navi4-emu-runner-set', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', ci-flags: '--linter clang-tidy'},
{gpu: 'mi3xx', runner: 'rocprof-ubuntu-22', os: 'ubuntu-22.04', build-type: 'RelWithDebInfo', ci-flags: '--linter clang-tidy'}
]

runs-on: ${{ matrix.runner == 'mi3xx' && 'rocprof-azure' || matrix.runner }}-emu-runner-set
runs-on: ${{ matrix.system.runner }}

# define this for containers
env:
Expand Down Expand Up @@ -90,7 +91,7 @@ jobs:
ls -la

- name: Enable PC Sampling
if: ${{ contains(matrix.runner, 'mi200') || contains(matrix.runner, 'mi300a') }}
if: ${{ contains(matrix.system.gpu, 'mi200') || contains(matrix.system.gpu, 'mi300a') }}
shell: bash
run: |
echo 'ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1' >> $GITHUB_ENV
Expand All @@ -100,39 +101,39 @@ jobs:
shell: bash
run:
python3 ./source/scripts/run-ci.py -B build
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.os }}-${{ matrix.runner }}-core
--name ${{ github.repository }}-${{ github.ref_name }}-${{ matrix.system.os }}-${{ matrix.system.gpu }}-core
--build-jobs 16
--site ${RUNNER_HOSTNAME}
--gpu-targets ${{ env.GPU_TARGETS }}
--run-attempt ${{ github.run_attempt }}
${{ matrix.ci-flags }}
${{ matrix.system.ci-flags }}
--
-DROCPROFILER_DEP_ROCMCORE=ON
-DROCPROFILER_BUILD_DOCS=OFF
-DCMAKE_BUILD_TYPE=${{ matrix.build-type }}
-DCMAKE_BUILD_TYPE=${{ matrix.system.build-type }}
-DCMAKE_INSTALL_PREFIX=/opt/rocprofiler-sdk
-DCPACK_GENERATOR='DEB;RPM;TGZ'
-DCPACK_PACKAGING_INSTALL_PREFIX="$(realpath /opt/rocm)"
-DPython3_EXECUTABLE=$(which python3)
${{ env.GLOBAL_CMAKE_OPTIONS }}
--
-LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}"
-E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}"
-LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}"
-E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}"

- name: Install
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
timeout-minutes: 10
run:
cmake --build build --target install --parallel 16

- name: Build Packaging
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
timeout-minutes: 10
run:
cmake --build build --target package --parallel 16

- name: Test Install Build
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
timeout-minutes: 20
shell: bash
run: |
Expand All @@ -141,11 +142,11 @@ jobs:
export LD_LIBRARY_PATH=/opt/rocprofiler-sdk/lib:${LD_LIBRARY_PATH}
cmake --build build-samples --target all --parallel 16
cmake --build build-tests --target all --parallel 16
ctest --test-dir build-samples -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-tests -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-samples -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-tests -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure

- name: Install Packages
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
timeout-minutes: 5
shell: bash
run: |
Expand All @@ -157,19 +158,19 @@ jobs:
for i in $(ls -S ./build/rocprofiler-sdk*.deb | egrep -v 'roctx|rocpd'); do dpkg --force-all -i ${i}; done;

- name: Test Installed Packages
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
if: ${{ contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
timeout-minutes: 20
shell: bash
run: |
CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-samples-deb /opt/rocm/share/rocprofiler-sdk/samples
CMAKE_PREFIX_PATH=/opt/rocm cmake -B build-tests-deb -DGPU_TARGETS="gfx942" /opt/rocm/share/rocprofiler-sdk/tests
cmake --build build-samples-deb --target all --parallel 16
cmake --build build-tests-deb --target all --parallel 16
ctest --test-dir build-samples-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-tests-deb -LE "${${{ matrix.runner }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.runner }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-samples-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure
ctest --test-dir build-tests-deb -LE "${${{ matrix.system.gpu }}_EXCLUDE_LABEL_REGEX}" -E "${${{ matrix.system.gpu }}_EXCLUDE_TESTS_REGEX}" --output-on-failure

- name: Archive production artifacts
if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.runner, env.CORE_EXT_RUNNER) }}
if: ${{ github.event_name == 'workflow_dispatch' && contains(matrix.system.gpu, env.CORE_EXT_RUNNER) }}
uses: actions/upload-artifact@v4
with:
name: installers-deb
Expand All @@ -184,11 +185,11 @@ jobs:
fail-fast: false
matrix:
runner: ['mi300']
os: ['rhel', 'sles']
os: ['rhel-8', 'sles-15']
build-type: ['RelWithDebInfo']
ci-flags: ['']

runs-on: rocprof-azure-${{ matrix.os }}-emu-runner-set
runs-on: rocprof-${{ matrix.os }}

# define this for containers
env:
Expand All @@ -203,7 +204,7 @@ jobs:
shell: bash
run: |
git config --global --add safe.directory '*'
if [ "${OS_TYPE}" == "rhel" ]; then
if [ "${OS_TYPE}" == "rhel-8" ]; then
dnf makecache
dnf groupinstall -y "Development Tools"
dnf remove -y gcc-c++
Expand Down Expand Up @@ -261,7 +262,7 @@ jobs:
build-type: ['RelWithDebInfo']

if: ${{ contains(github.event_name, 'pull_request') }}
runs-on: ${{ matrix.runner == 'mi3xx' && 'rocprof-azure' || matrix.runner }}-emu-runner-set
runs-on: ${{ matrix.runner == 'mi3xx' && 'rocprof-ubuntu-22' || matrix.runner }}

# define this for containers
env:
Expand Down
70 changes: 0 additions & 70 deletions .github/workflows/generate_afar.yml

This file was deleted.

8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
- absolute == node_id
- relative == logical_node_id
- type-relative == logical_node_type_id
- Added MI300 stochastic (hardware-based) PC sampling support in ROCProfiler-SDK and ROCProfV3
- Added MI300/MI350 stochastic (hardware-based) PC sampling support in ROCProfiler-SDK and ROCProfV3
- Python bindings for rocprofiler-sdk-roctx
- SQLite3 output support for rocprofv3 (`--output-format rocpd`)
- Added `rocprofiler-sdk-rocpd` package
Expand All @@ -193,25 +193,31 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
- Added `rocpd` output format documentation
- Requires the ROCprof Trace Decoder plugin installed (see above)
- Added perfetto support for scratch memory.
- Added documentation for rocprofv3 advanced options

### Changed

- SDK no longer creates a background thread when every tool returns a nullptr from `rocprofiler_configure`.
- Updated disassembly.hpp's vaddr-to-file-offset mapping to use the dedicated comgr API.
- rocprofiler_uuid_t ABI is changed to hold 128 bit value.
- rocprofv3 shorthand argument for `--collection-period` is now `-P` (upper-case) as `-p` (lower-case) is reserved for later use
- default output format for rocprofv3 is now `rocpd` (SQLite3 database)
- rocprofv3 avail tool renamed from rocprofv3_avail to rocprofv3-avail tool
- rocprofv3 avail tool has support for command line arguments.
- rocprofv3 tool now allows for Thread Trace + PC Sampling on the same agent
- fixed inconsistency for what is a "null" handle in `rocprofiler_*_id_t` structs.
- correct answer is `.handle = 0` but some definitions used `UINT64_MAX`

### Resolved issues

- Fixed missing callbacks around internal thread creation within counter collection service
- Fixed potential data race in rocprofiler-sdk double buffering scheme
- Usage of std::regex in core rocprofiler-sdk library which causes segfaults/exceptions when used under dual ABI
- Fixed perfetto counter collection by introducing per dispatch accumulation.
- Code object disassembly was missing function inlining information
- Fixed queue preemption error and HSA_STATUS_ERROR_INVALID_PACKET_FORMAT error for stochastic PC-sampling for MI300X, leading to more stable runs.
- Fixed the system hang issue for host-trap PC-sampling on MI300X.
- Fixed rocpd counter collection issue when counter collection alone is enabled, rocpd_kernel_dispatch table gets populated by counters data instead of kernel_dispatch data.

### Removed

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ cmake \
-DCMAKE_PREFIX_PATH=/opt/rocm \
rocprofiler-sdk-source

cmake --build rocprofiler-sdk-build --target all --parallel $(nproc)
cmake --build rocprofiler-sdk-build --target all --parallel $(nproc)
```

To install ROCprofiler, run:
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ cmake>=3.21.0
cmake-format
dataclasses
flake8
jinja2
numpy
otf2
pandas
perfetto
pycobertura
pytest
pyyaml
reportlab
1 change: 1 addition & 0 deletions samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ add_subdirectory(code_object_isa_decode)
add_subdirectory(external_correlation_id_request)
add_subdirectory(pc_sampling)
add_subdirectory(openmp_target)
add_subdirectory(thread_trace)
2 changes: 1 addition & 1 deletion samples/counter_collection/buffered_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ fill_dimension_cache(rocprofiler_counter_id_t counter)
(*dimension_cache())
->emplace(counter.handle,
std::vector<rocprofiler_counter_record_dimension_info_t>{
info.dimensions, info.dimensions + info.dimensions_count});
*info.dimensions, *info.dimensions + info.dimensions_count});
}

/**
Expand Down
4 changes: 2 additions & 2 deletions samples/counter_collection/device_counting_sync_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ counter_sampler::get_counter_size(rocprofiler_counter_id_t counter)
ROCPROFILER_CALL(rocprofiler_query_counter_info(
counter, ROCPROFILER_COUNTER_INFO_VERSION_1, static_cast<void*>(&info)),
"Could not query info for counter");
return info.instance_ids_count;
return info.dimensions_instances_count;
}

std::unordered_map<std::string, rocprofiler_counter_id_t>
Expand Down Expand Up @@ -339,7 +339,7 @@ counter_sampler::get_counter_dimensions(rocprofiler_counter_id_t counter)
counter, ROCPROFILER_COUNTER_INFO_VERSION_1, static_cast<void*>(&info)),
"Could not query info for counter");
return std::vector<rocprofiler_counter_record_dimension_info_t>{
info.dimensions, info.dimensions + info.dimensions_count};
*info.dimensions, *info.dimensions + info.dimensions_count};
}

std::atomic<bool>&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,15 +292,15 @@ dispatch_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data,
"Could not query counter_id");
cap.expected_counter_names.emplace(found_counter.handle, std::string(info.name));
cap.remaining.push_back(found_counter);
cap.expected.emplace(found_counter.handle, info.instance_ids_count);
cap.expected.emplace(found_counter.handle, info.dimensions_instances_count);

auto& info_vector =
cap.expected_data_dims.emplace(found_counter.handle, validate_dim_presence{})
.first->second;

for(uint64_t i = 0; i < info.dimensions_count; i++)
{
info_vector.maybe_forward(info.dimensions[i]);
info_vector.maybe_forward(*info.dimensions[i]);
}
}
if(cap.expected.empty())
Expand Down
Loading