Skip to content

Commit d89abd9

Browse files
committed
Merge branch 'main' into user-after-free
2 parents 60f33bd + 91c9e91 commit d89abd9

File tree

14 files changed

+130
-49
lines changed

14 files changed

+130
-49
lines changed

.github/workflows/e2e_core.yml

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,6 @@ jobs:
6666
ls -la ./
6767
rm -rf ./* || true
6868
69-
- uses: xt0rted/pull-request-comment-branch@d97294d304604fa98a2600a6e2f916a84b596dc7 # v2.0.0
70-
id: comment-branch
71-
if: ${{ always() && inputs.trigger != 'schedule' }}
72-
7369
- name: Add comment to PR
7470
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
7571
if: ${{ always() && inputs.trigger != 'schedule' }}
@@ -90,7 +86,18 @@ jobs:
9086
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
9187
with:
9288
path: ur-repo
93-
ref: ${{ steps.comment-branch.outputs.head_ref }}
89+
90+
# On issue_comment trigger (for PRs) we need to fetch special ref for
91+
# proper PR's merge commit. Note, this ref may be absent if the PR is already merged.
92+
- name: Fetch PR's merge commit
93+
if: ${{ inputs.trigger != 'schedule' }}
94+
working-directory: ${{github.workspace}}/ur-repo
95+
env:
96+
PR_NO: ${{github.event.issue.number}}
97+
run: |
98+
git fetch -- https://github.com/${{github.repository}} +refs/pull/${PR_NO}/*:refs/remotes/origin/pr/${PR_NO}/*
99+
git checkout origin/pr/${PR_NO}/merge
100+
git rev-parse origin/pr/${PR_NO}/merge
94101
95102
- name: Checkout SYCL
96103
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
@@ -191,8 +198,9 @@ jobs:
191198
script: |
192199
const adapter = '${{ matrix.adapter.name }}';
193200
const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}';
194-
const status = '${{ steps.tests.outcome }}';
195-
const body = `E2E ${adapter} build: \n${url}\n Status: ${status}`;
201+
const test_status = '${{ steps.tests.outcome }}';
202+
const job_status = '${{ job.status }}';
203+
const body = `E2E ${adapter} build:\n${url}\nJob status: ${job_status}. Test status: ${test_status}`;
196204
197205
github.rest.issues.createComment({
198206
issue_number: context.issue.number,

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ set(UR_CONFORMANCE_TARGET_TRIPLES "" CACHE STRING
5353
"List of sycl targets to build CTS device binaries for")
5454
set(UR_CONFORMANCE_AMD_ARCH "" CACHE STRING "AMD device target ID to build CTS binaries for")
5555

56+
# There's little reason not to generate the compile_commands.json
57+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
58+
5659
include(Assertions)
5760

5861
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)

scripts/ctest_parser.py

100644100755
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ def summarize_results(results):
3333
total_failed = len(results['Failed'])
3434
total_crashed = total - (total_passed + total_skipped + total_failed)
3535

36-
pass_rate_incl_skipped = percent(total_passed, total)
37-
pass_rate_excl_skipped = percent(total_passed, total - total_skipped)
36+
pass_rate_incl_skipped = percent(total_passed + total_skipped, total)
37+
pass_rate_excl_skipped = percent(total_passed, total)
3838

3939
skipped_rate = percent(total_skipped, total)
4040
failed_rate = percent(total_failed, total)

source/adapters/cuda/command_buffer.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,20 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
6666
cuGraphDestroy(CudaGraph);
6767

6868
// Release the memory allocated to the CudaGraphExec
69-
cuGraphExecDestroy(CudaGraphExec);
69+
if (CudaGraphExec) {
70+
cuGraphExecDestroy(CudaGraphExec);
71+
}
7072
}
7173

7274
ur_exp_command_buffer_command_handle_t_::
7375
ur_exp_command_buffer_command_handle_t_(
7476
ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel,
75-
std::shared_ptr<CUgraphNode> Node, CUDA_KERNEL_NODE_PARAMS Params,
77+
std::shared_ptr<CUgraphNode> &&Node, CUDA_KERNEL_NODE_PARAMS Params,
7678
uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr,
7779
const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr)
78-
: CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params),
79-
WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) {
80+
: CommandBuffer(CommandBuffer), Kernel(Kernel), Node{std::move(Node)},
81+
Params(Params), WorkDim(WorkDim), RefCountInternal(1),
82+
RefCountExternal(1) {
8083
CommandBuffer->incrementInternalReferenceCount();
8184

8285
const size_t CopySize = sizeof(size_t) * WorkDim;
@@ -375,6 +378,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
375378
NodeParams.blockDimZ = ThreadsPerBlock[2];
376379
NodeParams.sharedMemBytes = LocalSize;
377380
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());
381+
NodeParams.kern = nullptr;
378382
NodeParams.extra = nullptr;
379383

380384
// Create and add an new kernel node to the Cuda graph
@@ -392,8 +396,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
392396
}
393397

394398
auto NewCommand = new ur_exp_command_buffer_command_handle_t_{
395-
hCommandBuffer, hKernel, NodeSP, NodeParams,
396-
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize};
399+
hCommandBuffer, hKernel, std::move(NodeSP), NodeParams,
400+
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize};
397401

398402
NewCommand->incrementInternalReferenceCount();
399403
hCommandBuffer->CommandHandles.push_back(NewCommand);

source/adapters/cuda/command_buffer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ static inline const char *getUrResultString(ur_result_t Result) {
183183
struct ur_exp_command_buffer_command_handle_t_ {
184184
ur_exp_command_buffer_command_handle_t_(
185185
ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel,
186-
std::shared_ptr<CUgraphNode> Node, CUDA_KERNEL_NODE_PARAMS Params,
186+
std::shared_ptr<CUgraphNode> &&Node, CUDA_KERNEL_NODE_PARAMS Params,
187187
uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr,
188188
const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr);
189189

source/adapters/hip/command_buffer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
5050
ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable)
5151
: Context(hContext), Device(hDevice),
5252
IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr},
53-
RefCountInternal{1}, RefCountExternal{1} {
53+
RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} {
5454
urContextRetain(hContext);
5555
urDeviceRetain(hDevice);
5656
}
@@ -65,11 +65,11 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
6565
UR_TRACE(urDeviceRelease(Device));
6666

6767
// Release the memory allocated to the HIPGraph
68-
UR_CHECK_ERROR(hipGraphDestroy(HIPGraph));
68+
(void)hipGraphDestroy(HIPGraph);
6969

7070
// Release the memory allocated to the HIPGraphExec
7171
if (HIPGraphExec) {
72-
UR_CHECK_ERROR(hipGraphExecDestroy(HIPGraphExec));
72+
(void)hipGraphExecDestroy(HIPGraphExec);
7373
}
7474
}
7575

source/adapters/hip/command_buffer.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ struct ur_exp_command_buffer_handle_t_ {
254254
~ur_exp_command_buffer_handle_t_();
255255

256256
void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
257-
std::shared_ptr<hipGraphNode_t> HIPNode) {
258-
SyncPoints[SyncPoint] = HIPNode;
257+
std::shared_ptr<hipGraphNode_t> &&HIPNode) {
258+
SyncPoints[SyncPoint] = std::move(HIPNode);
259259
NextSyncPoint++;
260260
}
261261

@@ -269,7 +269,7 @@ struct ur_exp_command_buffer_handle_t_ {
269269
ur_exp_command_buffer_sync_point_t
270270
addSyncPoint(std::shared_ptr<hipGraphNode_t> HIPNode) {
271271
ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint;
272-
registerSyncPoint(SyncPoint, HIPNode);
272+
registerSyncPoint(SyncPoint, std::move(HIPNode));
273273
return SyncPoint;
274274
}
275275
uint32_t incrementInternalReferenceCount() noexcept {

source/adapters/level_zero/CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2022 Intel Corporation
1+
# Copyright (C) 2022-2024 Intel Corporation
22
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -122,6 +122,13 @@ add_ur_adapter(${TARGET_NAME}
122122
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
123123
)
124124

125+
if(NOT WIN32)
126+
target_sources(ur_adapter_level_zero
127+
PRIVATE
128+
${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_linux.cpp
129+
)
130+
endif()
131+
125132
# TODO: fix level_zero adapter conversion warnings
126133
target_compile_options(${TARGET_NAME} PRIVATE
127134
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>

source/adapters/level_zero/adapter.cpp

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
#include "adapter.hpp"
1212
#include "ur_level_zero.hpp"
1313

14+
// Due to multiple DLLMain definitions with SYCL, Global Adapter is init at
15+
// variable creation.
16+
#if defined(_WIN32)
17+
ur_adapter_handle_t_ *GlobalAdapter = new ur_adapter_handle_t_();
18+
#else
19+
ur_adapter_handle_t_ *GlobalAdapter;
20+
#endif
21+
1422
ur_result_t initPlatforms(PlatformVec &platforms) noexcept try {
1523
uint32_t ZeDriverCount = 0;
1624
ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr));
@@ -37,8 +45,7 @@ ur_result_t initPlatforms(PlatformVec &platforms) noexcept try {
3745
ur_result_t adapterStateInit() { return UR_RESULT_SUCCESS; }
3846

3947
ur_adapter_handle_t_::ur_adapter_handle_t_() {
40-
41-
Adapter.PlatformCache.Compute = [](Result<PlatformVec> &result) {
48+
PlatformCache.Compute = [](Result<PlatformVec> &result) {
4249
static std::once_flag ZeCallCountInitialized;
4350
try {
4451
std::call_once(ZeCallCountInitialized, []() {
@@ -52,7 +59,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() {
5259
}
5360

5461
// initialize level zero only once.
55-
if (Adapter.ZeResult == std::nullopt) {
62+
if (GlobalAdapter->ZeResult == std::nullopt) {
5663
// Setting these environment variables before running zeInit will enable
5764
// the validation layer in the Level Zero loader.
5865
if (UrL0Debug & UR_L0_DEBUG_VALIDATION) {
@@ -71,20 +78,21 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() {
7178
// We must only initialize the driver once, even if urPlatformGet() is
7279
// called multiple times. Declaring the return value as "static" ensures
7380
// it's only called once.
74-
Adapter.ZeResult = ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));
81+
GlobalAdapter->ZeResult =
82+
ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));
7583
}
76-
assert(Adapter.ZeResult !=
84+
assert(GlobalAdapter->ZeResult !=
7785
std::nullopt); // verify that level-zero is initialized
7886
PlatformVec platforms;
7987

8088
// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
81-
if (*Adapter.ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
89+
if (*GlobalAdapter->ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
8290
result = std::move(platforms);
8391
return;
8492
}
85-
if (*Adapter.ZeResult != ZE_RESULT_SUCCESS) {
93+
if (*GlobalAdapter->ZeResult != ZE_RESULT_SUCCESS) {
8694
urPrint("zeInit: Level Zero initialization failure\n");
87-
result = ze2urResult(*Adapter.ZeResult);
95+
result = ze2urResult(*GlobalAdapter->ZeResult);
8896
return;
8997
}
9098

@@ -97,7 +105,11 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() {
97105
};
98106
}
99107

100-
ur_adapter_handle_t_ Adapter{};
108+
void globalAdapterOnDemandCleanup() {
109+
if (GlobalAdapter) {
110+
delete GlobalAdapter;
111+
}
112+
}
101113

102114
ur_result_t adapterStateTeardown() {
103115
bool LeakFound = false;
@@ -184,6 +196,11 @@ ur_result_t adapterStateTeardown() {
184196
}
185197
if (LeakFound)
186198
return UR_RESULT_ERROR_INVALID_MEM_OBJECT;
199+
// Due to multiple DLLMain definitions with SYCL, register to cleanup the
200+
// Global Adapter after refcnt is 0
201+
#if defined(_WIN32)
202+
std::atexit(globalAdapterOnDemandCleanup);
203+
#endif
187204

188205
return UR_RESULT_SUCCESS;
189206
}
@@ -203,11 +220,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
203220
///< adapters available.
204221
) {
205222
if (NumEntries > 0 && Adapters) {
206-
std::lock_guard<std::mutex> Lock{Adapter.Mutex};
207-
if (Adapter.RefCount++ == 0) {
208-
adapterStateInit();
223+
if (GlobalAdapter) {
224+
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
225+
if (GlobalAdapter->RefCount++ == 0) {
226+
adapterStateInit();
227+
}
228+
} else {
229+
// If the GetAdapter is called after the Library began or was torndown,
230+
// then temporarily create a new Adapter handle and register a new
231+
// cleanup.
232+
GlobalAdapter = new ur_adapter_handle_t_();
233+
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
234+
if (GlobalAdapter->RefCount++ == 0) {
235+
adapterStateInit();
236+
}
237+
std::atexit(globalAdapterOnDemandCleanup);
209238
}
210-
*Adapters = &Adapter;
239+
*Adapters = GlobalAdapter;
211240
}
212241

213242
if (NumAdapters) {
@@ -218,17 +247,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(
218247
}
219248

220249
UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
221-
std::lock_guard<std::mutex> Lock{Adapter.Mutex};
222-
if (--Adapter.RefCount == 0) {
223-
return adapterStateTeardown();
250+
// Check first if the Adapter pointer is valid
251+
if (GlobalAdapter) {
252+
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
253+
if (--GlobalAdapter->RefCount == 0) {
254+
return adapterStateTeardown();
255+
}
224256
}
225257

226258
return UR_RESULT_SUCCESS;
227259
}
228260

229261
UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
230-
std::lock_guard<std::mutex> Lock{Adapter.Mutex};
231-
Adapter.RefCount++;
262+
if (GlobalAdapter) {
263+
std::lock_guard<std::mutex> Lock{GlobalAdapter->Mutex};
264+
GlobalAdapter->RefCount++;
265+
}
232266

233267
return UR_RESULT_SUCCESS;
234268
}
@@ -257,7 +291,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
257291
case UR_ADAPTER_INFO_BACKEND:
258292
return ReturnValue(UR_ADAPTER_BACKEND_LEVEL_ZERO);
259293
case UR_ADAPTER_INFO_REFERENCE_COUNT:
260-
return ReturnValue(Adapter.RefCount.load());
294+
return ReturnValue(GlobalAdapter->RefCount.load());
261295
default:
262296
return UR_RESULT_ERROR_INVALID_ENUMERATION;
263297
}

source/adapters/level_zero/adapter.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ struct ur_adapter_handle_t_ {
2525
ZeCache<Result<PlatformVec>> PlatformCache;
2626
};
2727

28-
extern ur_adapter_handle_t_ Adapter;
28+
extern ur_adapter_handle_t_ *GlobalAdapter;

0 commit comments

Comments
 (0)