-
Notifications
You must be signed in to change notification settings - Fork 180
[rocprofiler-sdk] Implement HSA profiler serialization with ready_signal support and improve testing infrastructure #192
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,9 @@ | |
| // SOFTWARE. | ||
|
|
||
| #include <hip/hip_runtime.h> | ||
| #include <stdlib.h> | ||
| #include <time.h> | ||
| #include <cassert> | ||
|
|
||
| #include "client.hpp" | ||
|
|
||
|
|
@@ -48,6 +51,92 @@ kernelA(int devid, volatile int* wait_on, int value, int* no_opt) | |
| printf("[device=%i][return] Wait on %i: %i (%i)\n", devid, value, *wait_on, *no_opt); | ||
| } | ||
|
|
||
| // Force assert to work even in Release builds for this test | ||
| #ifdef NDEBUG | ||
| # undef NDEBUG | ||
| # include <cassert> | ||
| # define NDEBUG | ||
| #endif | ||
|
||
|
|
||
| __global__ void | ||
| check_order_kernel(int expected, int* actual) | ||
| { | ||
| // Note: We do not use atomics here on purpose to ensure that the barrier | ||
| // being injected has proper fencing set. | ||
| if(*actual != expected) | ||
| { | ||
| printf("[error] Expected %i but got %i\n", expected, *actual); | ||
| } | ||
| // Assert will now work in both Debug and Release builds | ||
| assert(*actual == expected); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe needs NDEBUG?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, if it fails, we want the CI to trigger and fail this sample/test (this should never fail).
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, but unless we remove NDEBUG, this check will not be there in Release builds.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh thats what you are asking about, changed. Thanks for the comment here! |
||
| (*actual)++; | ||
| } | ||
|
|
||
| class DualStreamExecutor | ||
| { | ||
| private: | ||
| hipStream_t stream1_ = {}; | ||
| hipStream_t stream2_ = {}; | ||
| int device_ = {0}; | ||
|
|
||
| public: | ||
| DualStreamExecutor(int device = 0) | ||
| : device_(device) | ||
| { | ||
| HIP_CALL(hipSetDevice(device_)); | ||
| HIP_CALL(hipStreamCreate(&stream1_)); | ||
| HIP_CALL(hipStreamCreate(&stream2_)); | ||
| std::cout << "Created dual streams on device " << device_ << std::endl; | ||
| } | ||
|
|
||
| ~DualStreamExecutor() | ||
| { | ||
| HIP_CALL(hipStreamDestroy(stream1_)); | ||
| HIP_CALL(hipStreamDestroy(stream2_)); | ||
| } | ||
|
|
||
| // Function template to launch any kernel on both streams | ||
| template <typename KernelFunc, typename... Args> | ||
| void launch_kernel_on_both_streams(KernelFunc kernel, | ||
| dim3 gridSize, | ||
| dim3 blockSize, | ||
| size_t sharedMem, | ||
| Args... args) | ||
| { | ||
| hipLaunchKernelGGL(kernel, gridSize, blockSize, sharedMem, stream1_, args...); | ||
| hipLaunchKernelGGL(kernel, gridSize, blockSize, sharedMem, stream2_, args...); | ||
| } | ||
|
|
||
| // Synchronize both streams | ||
| void synchronize() | ||
| { | ||
| HIP_CALL(hipStreamSynchronize(stream1_)); | ||
| HIP_CALL(hipStreamSynchronize(stream2_)); | ||
| std::cout << "Both streams synchronized" << std::endl; | ||
| } | ||
|
|
||
| // Get stream handles | ||
| hipStream_t get_stream1() const { return stream1_; } | ||
| hipStream_t get_stream2() const { return stream2_; } | ||
|
|
||
| // Execute async memory operations on both streams | ||
| void async_memcpy_to_device(void* dst1, void* dst2, const void* src, size_t size) | ||
| { | ||
| HIP_CALL(hipMemcpyAsync(dst1, src, size, hipMemcpyHostToDevice, stream1_)); | ||
| HIP_CALL(hipMemcpyAsync(dst2, src, size, hipMemcpyHostToDevice, stream2_)); | ||
| } | ||
|
|
||
| void async_memcpy_to_host(void* dst1, | ||
| void* dst2, | ||
| const void* src1, | ||
| const void* src2, | ||
| size_t size) | ||
| { | ||
| HIP_CALL(hipMemcpyAsync(dst1, src1, size, hipMemcpyDeviceToHost, stream1_)); | ||
| HIP_CALL(hipMemcpyAsync(dst2, src2, size, hipMemcpyDeviceToHost, stream2_)); | ||
| } | ||
| }; | ||
|
|
||
| int | ||
| main(int, char**) | ||
| { | ||
|
|
@@ -76,5 +165,27 @@ main(int, char**) | |
| HIP_CALL(hipSetDevice(0)); | ||
| HIP_CALL(hipDeviceSynchronize()); | ||
|
|
||
| // Validate that kernels are being processed in order on the same device | ||
| HIP_CALL(hipSetDevice(0)); | ||
| DualStreamExecutor executor(0); | ||
| *no_opt_0 = 0; | ||
| // Use reproducible seed for deterministic testing and easier debugging | ||
| srand(12345); | ||
|
||
|
|
||
| for(int i = 0; i < 10000; i++) | ||
| { | ||
| if(rand() & 1) | ||
| { | ||
| hipLaunchKernelGGL( | ||
| check_order_kernel, dim3(1), dim3(1), 0, executor.get_stream1(), i, no_opt_0); | ||
| } | ||
| else | ||
| { | ||
| hipLaunchKernelGGL( | ||
| check_order_kernel, dim3(1), dim3(1), 0, executor.get_stream2(), i, no_opt_0); | ||
| } | ||
| } | ||
| executor.synchronize(); | ||
| HIP_CALL(hipDeviceSynchronize()); | ||
| std::cerr << "Run complete\n"; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,8 +30,7 @@ target_link_libraries( | |
| PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler-sdk::samples-build-flags | ||
| rocprofiler-sdk::samples-common-library) | ||
|
|
||
| set(DEFAULT_GPU_TARGETS "gfx906" "gfx908" "gfx90a" "gfx942" "gfx950" "gfx1100" "gfx1101" | ||
| "gfx1102") | ||
| set(DEFAULT_GPU_TARGETS ${ROCPROFILER_GPU_TARGETS}) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is invalid when samples are build against an install -- |
||
|
|
||
| set(OPENMP_GPU_TARGETS | ||
| "${DEFAULT_GPU_TARGETS}" | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.