|
1 | 1 | //===--------------- kernel.hpp - Native CPU Adapter ----------------------===// |
2 | 2 | // |
3 | | -// Copyright (C) 2023 Intel Corporation |
4 | | -// |
5 | | -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM |
6 | | -// Exceptions. See LICENSE.TXT |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
7 | 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
8 | 6 | // |
9 | 7 | //===----------------------------------------------------------------------===// |
@@ -42,50 +40,53 @@ struct ur_kernel_handle_t_ : RefCounted { |
42 | 40 | ur_kernel_handle_t_(const char *name, nativecpu_task_t subhandler) |
43 | 41 | : _name{name}, _subhandler{std::move(subhandler)} {} |
44 | 42 |
|
45 | | - const char *_name; |
46 | | - nativecpu_task_t _subhandler; |
47 | | - std::vector<native_cpu::NativeCPUArgDesc> _args; |
48 | | - std::vector<local_arg_info_t> _localArgInfo; |
49 | | - |
50 | | - // To be called before enqueing the kernel. |
51 | | - void handleLocalArgs() { |
52 | | - updateMemPool(); |
53 | | - size_t offset = 0; |
54 | | - for (auto &entry : _localArgInfo) { |
55 | | - _args[entry.argIndex].MPtr = |
56 | | - reinterpret_cast<char *>(_localMemPool) + offset; |
57 | | - // update offset in the memory pool |
58 | | - // Todo: update this offset computation when we have work-group |
59 | | - // level parallelism. |
60 | | - offset += entry.argSize; |
61 | | - } |
| 43 | + ur_kernel_handle_t_(const ur_kernel_handle_t_& other) : _name(other._name), _subhandler(other._subhandler), |
| 44 | + _args(other._args), _localArgInfo(other._localArgInfo), _localMemPool(other._localMemPool), _localMemPoolSize(other._localMemPoolSize) { |
| 45 | + incrementReferenceCount(); |
62 | 46 | } |
63 | 47 |
|
64 | 48 | ~ur_kernel_handle_t_() { |
65 | | - if (_localMemPool) { |
| 49 | + decrementReferenceCount(); |
| 50 | + if (_refCount == 0) { |
66 | 51 | free(_localMemPool); |
67 | 52 | } |
| 53 | + |
68 | 54 | } |
69 | 55 |
|
70 | | -private: |
71 | | - void updateMemPool() { |
| 56 | + const char *_name; |
| 57 | + nativecpu_task_t _subhandler; |
| 58 | + std::vector<native_cpu::NativeCPUArgDesc> _args; |
| 59 | + std::vector<local_arg_info_t> _localArgInfo; |
| 60 | + |
| 61 | + // To be called before enqueing the kernel. |
| 62 | + void updateMemPool(size_t numParallelThreads) { |
72 | 63 | // compute requested size. |
73 | | - // Todo: currently we execute only one work-group at a time, so for each |
74 | | - // local arg we can allocate just 1 * argSize local arg. When we implement |
75 | | - // work-group level parallelism we should allocate N * argSize where N is |
76 | | - // the number of work groups being executed in parallel (e.g. number of |
77 | | - // threads in the thread pool). |
78 | 64 | size_t reqSize = 0; |
79 | 65 | for (auto &entry : _localArgInfo) { |
80 | | - reqSize += entry.argSize; |
| 66 | + reqSize += entry.argSize * numParallelThreads; |
81 | 67 | } |
82 | 68 | if (reqSize == 0 || reqSize == _localMemPoolSize) { |
83 | 69 | return; |
84 | 70 | } |
85 | 71 | // realloc handles nullptr case |
86 | | - _localMemPool = realloc(_localMemPool, reqSize); |
| 72 | + _localMemPool = (char*)realloc(_localMemPool, reqSize); |
87 | 73 | _localMemPoolSize = reqSize; |
88 | 74 | } |
89 | | - void *_localMemPool = nullptr; |
| 75 | + |
| 76 | + // To be called before executing a work group |
| 77 | + void handleLocalArgs(size_t numParallelThread, size_t threadId) { |
| 78 | + // For each local argument we have size*numthreads |
| 79 | + size_t offset = 0; |
| 80 | + for (auto &entry : _localArgInfo) { |
| 81 | + _args[entry.argIndex].MPtr = |
| 82 | + _localMemPool + offset + (entry.argSize * threadId); |
| 83 | + // update offset in the memory pool |
| 84 | + offset += entry.argSize * numParallelThread; |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | +private: |
| 89 | + char* _localMemPool = nullptr; |
90 | 90 | size_t _localMemPoolSize = 0; |
91 | 91 | }; |
| 92 | + |
0 commit comments