Skip to content

Commit 1f6e151

Browse files
authored
[PTI-SDK] Fix data race(s) found in ptiViewEnable/Disable (intel#62)
* Fix data race issues found by ThreadSanitizer when calling `ptiViewEnable` and `ptiViewDisable` from multiple threads. * Improve ThreadSanitizer results by adding suppressions for third party libraries and adding additional compiler flags. * Document and rename internal structure to denote that it is NOT "thread safe" * Add ThreadSanitizer build to CI along with fixes to iso sample. Signed-off-by: Schilling, Matthew <[email protected]>
1 parent 6cd8f30 commit 1f6e151

File tree

15 files changed

+223
-79
lines changed

15 files changed

+223
-79
lines changed

.github/workflows/sdk_build_and_test.yml

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,38 +23,51 @@ jobs:
2323
uses: actions/checkout@v4
2424

2525
- name: Build
26+
working-directory: sdk
2627
run: |
27-
cd sdk
2828
cmake --preset default
2929
cmake --build --preset default -j $(($(nproc)/2))
3030
3131
- name: Test
32+
working-directory: sdk
3233
run: |
33-
cd sdk
3434
ctest --output-on-failure --preset default
3535
36-
- name: BuildSanitized
36+
- name: Build AddressSanitizer
3737
if: always()
38+
working-directory: sdk
3839
run: |
39-
cd sdk
4040
cmake --preset asan
4141
cmake --build --preset asan --parallel $(($(nproc)/2))
4242
43-
- name: BuildFuzz
43+
- name: Build ThreadSanitizer
4444
if: always()
45+
working-directory: sdk
46+
run: |
47+
cmake --preset tsan
48+
cmake --build --preset tsan --parallel $(($(nproc)/2))
49+
50+
- name: Build libFuzzer
51+
if: always()
52+
working-directory: sdk
4553
run: |
4654
# To ensure it still builds, run build for fuzz targets until we have
4755
# proper fuzz testing infrastructure in place.
48-
cd sdk
4956
cmake --preset fuzz
5057
cmake --build --preset fuzz --parallel $(($(nproc)/2))
5158
52-
- name: TestSanitized
59+
- name: Test AddressSanitizer
5360
if: always()
61+
working-directory: sdk
5462
run: |
55-
cd sdk
5663
ctest --preset asan --output-on-failure -L samples
5764
65+
- name: Test ThreadSanitizer
66+
if: always()
67+
working-directory: sdk
68+
run: |
69+
ctest --preset tsan --output-on-failure -L samples
70+
5871
- name: Install SDK
5972
working-directory: sdk
6073
run: |

sdk/CMakePresets.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
"environment": {
5050
"NEOReadDebugKeys": "1",
5151
"DisableDeepBind": "1",
52-
"ASAN_OPTIONS": "suppressions=${sourceDir}/test/ASan.supp,detect_leaks=1,check_initialization_order=1,alloc_dealloc_mismatch=0,new_delete_type_mismatch=0,halt_on_error=1,use_sigaltstack=0",
53-
"LSAN_OPTIONS": "suppressions=${sourceDir}/test/LSan.supp,use_unaligned=1",
52+
"ASAN_OPTIONS": "suppressions=${sourceDir}/test/suppressions/ASan.supp,detect_leaks=1,check_initialization_order=1,alloc_dealloc_mismatch=0,new_delete_type_mismatch=0,halt_on_error=1,use_sigaltstack=0",
53+
"LSAN_OPTIONS": "suppressions=${sourceDir}/test/suppressions/LSan.supp,use_unaligned=1",
5454
"UBSAN_OPTIONS": "print_stacktrace=1"
5555
}
5656
},
@@ -70,6 +70,9 @@
7070
"inherits": "asan",
7171
"displayName": "ThreadSanitizer Test Config",
7272
"description": "Build configuration for thread sanitizer.",
73+
"environment": {
74+
"TSAN_OPTIONS": "suppressions=${sourceDir}/test/suppressions/TSan.supp"
75+
},
7376
"cacheVariables": {
7477
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/toolchains/icpx_tsan_toolchain.cmake"
7578
}

sdk/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.3.1
1+
0.3.2

sdk/cmake/toolchains/icpx_tsan_toolchain.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ if (UNIX)
22
set(CMAKE_C_COMPILER icx)
33
set(CMAKE_CXX_COMPILER icpx)
44
endif()
5-
set(CMAKE_CXX_FLAGS_DEBUG_INIT "-fsanitize=thread,undefined")
6-
set(CMAKE_C_FLAGS_DEBUG_INIT "-fsanitize=thread,undefined")
5+
set(CMAKE_CXX_FLAGS_DEBUG_INIT "-fsanitize=thread -fno-omit-frame-pointer -fsanitize-recover=all")
6+
set(CMAKE_C_FLAGS_DEBUG_INIT "-fsanitize=thread -fno-omit-frame-pointer -fsanitize-recover=all")

sdk/samples/iso3dfd_dpcpp/include/iso3dfd.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <sycl/sycl.hpp>
88
using namespace sycl;
99

10+
#include <mutex>
1011
#include <chrono>
1112
#include <cmath>
1213
#include <cstring>
@@ -23,6 +24,8 @@ constexpr float dt = 0.002f;
2324
constexpr float dxyz = 50.0f;
2425
constexpr unsigned int kHalfLength = 8;
2526

27+
extern std::mutex global_cout_mtx;
28+
2629
/*
2730
* Padding to test and eliminate shared local memory bank conflicts for
2831
* the shared local memory(slm) version of the kernel executing on GPU

sdk/samples/iso3dfd_dpcpp/src/iso3dfd.cpp

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,14 @@
3333
#include "iso3dfd.h"
3434
#include <iostream>
3535
#include <string>
36+
#include <mutex>
3637
#include "device_selector.hpp"
3738
#include <dpc_common.hpp>
3839
#include "pti_view.h"
3940
#include "samples_utils.h"
41+
42+
std::mutex global_cout_mtx;
43+
4044
namespace oneapi {}
4145
using namespace oneapi;
4246

@@ -54,14 +58,16 @@ void StopTracing() {
5458
assert(ptiViewDisable(PTI_VIEW_SYCL_RUNTIME_CALLS) == pti_result::PTI_SUCCESS);
5559
}
5660

57-
5861
/*
5962
* Host-Code
6063
* Function used for initialization
6164
*/
6265
void Initialize(float* ptr_prev, float* ptr_next, float* ptr_vel, size_t n1,
6366
size_t n2, size_t n3) {
64-
std::cout << "Initializing ... \n";
67+
{
68+
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
69+
std::cout << "Initializing ... \n";
70+
}
6571
size_t dim2 = n2 * n1;
6672

6773
for (size_t i = 0; i < n3; i++) {
@@ -212,6 +218,7 @@ int main(int argc, char* argv[]) {
212218
while (true) {
213219
auto buf_status =
214220
ptiViewGetNextRecord(buf, valid_buf_size, &ptr);
221+
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
215222
if (buf_status == pti_result::PTI_STATUS_END_OF_BUFFER) {
216223
std::cout << "Reached End of buffer" << '\n';
217224
break;
@@ -231,7 +238,7 @@ int main(int argc, char* argv[]) {
231238
<< '\n';
232239
std::cout << "Found Sycl Runtime Record" << '\n';
233240
samples_utils::dump_record(reinterpret_cast<pti_view_record_sycl_runtime *>(ptr));
234-
break;
241+
break;
235242
}
236243
case pti_view_kind:: PTI_VIEW_DEVICE_GPU_MEM_COPY: {
237244
std::cout << "---------------------------------------------------"
@@ -278,15 +285,16 @@ int main(int argc, char* argv[]) {
278285

279286
((reinterpret_cast<pti_view_record_kernel *>(ptr) ->_start_timestamp) <=
280287
(reinterpret_cast<pti_view_record_kernel *>(ptr) ->_end_timestamp))) {
281-
std::cout << "------------> All Monotonic" << std::endl;
282-
} else {
283-
std::cout << "------------> Something wrong: NOT All monotonic" << std::endl;
284-
};
285-
if ( reinterpret_cast<pti_view_record_kernel *>(ptr)->_sycl_task_begin_timestamp == 0)
286-
std::cout << "------------> Something wrong: Sycl Task Begin Time is 0" << std::endl;
287-
if ( reinterpret_cast<pti_view_record_kernel *>(ptr)->_sycl_enqk_begin_timestamp == 0)
288+
std::cout << "------------> All Monotonic" << std::endl;
289+
} else {
290+
std::cout << "------------> Something wrong: NOT All monotonic" << std::endl;
291+
}
292+
if (reinterpret_cast<pti_view_record_kernel *>(ptr)->_sycl_task_begin_timestamp == 0) {
293+
std::cout << "------------> Something wrong: Sycl Task Begin Time is 0" << std::endl;
294+
}
295+
if ( reinterpret_cast<pti_view_record_kernel *>(ptr)->_sycl_enqk_begin_timestamp == 0) {
288296
std::cout << "------------> Something wrong: Sycl Enq Launch Kernel Time is 0" << std::endl;
289-
297+
}
290298
break;
291299
}
292300
default: {
@@ -368,18 +376,24 @@ int main(int argc, char* argv[]) {
368376
coeff[i] = coeff[i] / (dxyz * dxyz);
369377
}
370378

371-
std::cout << "Grid Sizes: " << n1 - 2 * kHalfLength << " "
372-
<< n2 - 2 * kHalfLength << " " << n3 - 2 * kHalfLength << "\n";
373-
std::cout << "Memory Usage: " << ((3 * nsize * sizeof(float)) / (1024 * 1024))
374-
<< " MB\n";
379+
{
380+
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
381+
std::cout << "Grid Sizes: " << n1 - 2 * kHalfLength << " "
382+
<< n2 - 2 * kHalfLength << " " << n3 - 2 * kHalfLength << "\n";
383+
std::cout << "Memory Usage: " << ((3 * nsize * sizeof(float)) / (1024 * 1024))
384+
<< " MB\n";
385+
}
375386

376387
// Check if running OpenMP OR Serial version on CPU
377388
if (omp) {
389+
{
390+
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
378391
#if defined(_OPENMP)
379-
std::cout << " ***** Running OpenMP variant *****\n";
392+
std::cout << " ***** Running OpenMP variant *****\n";
380393
#else
381-
std::cout << " ***** Running C++ Serial variant *****\n";
394+
std::cout << " ***** Running C++ Serial variant *****\n";
382395
#endif
396+
}
383397

384398
// Initialize arrays and introduce initial conditions (source)
385399
Initialize(prev_base, next_base, vel_base, n1, n2, n3);
@@ -409,7 +423,10 @@ int main(int argc, char* argv[]) {
409423
// Check if running SYCL version
410424
if (sycl) {
411425
try {
412-
std::cout << " ***** Running SYCL variant *****\n";
426+
{
427+
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
428+
std::cout << " ***** Running SYCL variant *****\n";
429+
}
413430
// Initialize arrays and introduce initial conditions (source)
414431
Initialize(prev_base, next_base, vel_base, n1, n2, n3);
415432

@@ -473,13 +490,17 @@ int main(int argc, char* argv[]) {
473490
error = WithinEpsilon(prev_base, temp, n1, n2, n3, kHalfLength, 0, 0.1f);
474491
}
475492
if (error) {
476-
std::cout << "Final wavefields from SYCL device and CPU are not "
493+
std::cerr << "Final wavefields from SYCL device and CPU are not "
477494
<< "equivalent: Fail\n";
478495
} else {
496+
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
479497
std::cout << "Final wavefields from SYCL device and CPU are equivalent:"
480498
<< " Success\n";
481499
}
482-
std::cout << "--------------------------------------\n";
500+
{
501+
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
502+
std::cout << "--------------------------------------\n";
503+
}
483504
delete[] temp;
484505
}
485506

sdk/samples/iso3dfd_dpcpp/src/iso3dfd_kernels.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,10 @@ bool Iso3dfdDevice(sycl::queue &q, float *ptr_next, float *ptr_prev,
320320
// Iterate over time steps
321321
for (auto i = 0; i < nIterations; i += 1) {
322322
// Submit command group for execution
323-
std::cout << "Q Submitting at: " << i << ": " << std::dec << GetTime() << std::endl;
323+
{
324+
const std::lock_guard<std::mutex> cout_lock(global_cout_mtx);
325+
std::cout << "Q Submitting at: " << i << ": " << std::dec << GetTime() << std::endl;
326+
}
324327
q.submit([&](auto &h) {
325328
// Create accessors
326329
accessor next(b_ptr_next, h);

sdk/src/levelzero/ze_collector.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -396,14 +396,7 @@ class GlobalZeInitializer {
396396
public:
397397
inline static ze_result_t Initialize() {
398398
utils::SetEnv("ZE_ENABLE_TRACING_LAYER", "1");
399-
overhead::Init();
400-
ze_result_t status = zeInit(ZE_INIT_FLAG_GPU_ONLY);
401-
{
402-
std::string o_api_string = "zeInit";
403-
overhead::FiniLevel0(overhead::OverheadRuntimeType::OVERHEAD_RUNTIME_TYPE_L0,
404-
o_api_string.c_str());
405-
};
406-
return status;
399+
return zeInit(ZE_INIT_FLAG_GPU_ONLY);
407400
}
408401

409402
inline static ze_result_t result_ = Initialize();

sdk/src/overhead_kinds.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ typedef enum _pti_view_overhead_view_kind {
3838
} pti_view_overhead_view_kind;
3939

4040
// TODO: redo this approach to enable/disable state tracking.
41-
static std::atomic<bool> overhead_collection_enabled = false;
41+
inline static std::atomic<bool> overhead_collection_enabled = false;
4242

4343
inline constexpr auto kOhThreshold =
4444
1.00; // 1ns threshhold by default -- TODO -- make this setAttributable
@@ -144,8 +144,8 @@ inline void FiniLevel0(OverheadRuntimeType runtime_type,
144144
ocallback_(&overhead_it->second, overhead_data);
145145
}
146146
ResetRecord();
147-
};
148-
};
147+
}
148+
}
149149
}
150150

151151
inline void FiniSycl(OverheadRuntimeType runtime_type) {
@@ -173,8 +173,8 @@ inline void FiniSycl(OverheadRuntimeType runtime_type) {
173173
ocallback_(&overhead_it->second, overhead_data);
174174
}
175175
ResetRecord();
176-
};
177-
};
176+
}
177+
}
178178
}
179179

180180
} // namespace overhead

0 commit comments

Comments
 (0)