Skip to content

Commit 8fc7fbd

Browse files
authored
Merge pull request #1236 from IntelPython/feature/enable_spirv_to_device_caching
Enable SPIR-V to device caching
2 parents 11e245c + a8178c6 commit 8fc7fbd

File tree

12 files changed

+412
-50
lines changed

12 files changed

+412
-50
lines changed

LICENSES.third-party

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
The numba-dpex source tree includes vendored libraries governed by the following
2+
licenses.
3+
4+
5+
boost hash.hpp header
6+
---------------------
7+
8+
Boost Software License - Version 1.0 - August 17th, 2003
9+
10+
Permission is hereby granted, free of charge, to any person or organization
11+
obtaining a copy of the software and accompanying documentation covered by
12+
this license (the "Software") to use, reproduce, display, distribute,
13+
execute, and transmit the Software, and to prepare derivative works of the
14+
Software, and to permit third-parties to whom the Software is furnished to
15+
do so, all subject to the following:
16+
17+
The copyright notices in the Software and this entire statement, including
18+
the above license grant, this restriction and the following disclaimer,
19+
must be included in all copies of the Software, in whole or in part, and
20+
all derivative works of the Software, unless such copies or derivative
21+
works are solely in the form of machine-executable object code generated by
22+
a source language processor.
23+
24+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26+
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
27+
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
28+
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
29+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30+
DEALINGS IN THE SOFTWARE.

numba_dpex/core/runtime/_dpexrt_python.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "_queuestruct.h"
2525
#include "_usmarraystruct.h"
2626

27+
#include "experimental/kernel_caching.h"
2728
#include "experimental/nrt_reserve_meminfo.h"
2829
#include "numba/core/runtime/nrt_external.h"
2930

@@ -1493,6 +1494,8 @@ static PyObject *build_c_helpers_dict(void)
14931494
_declpointer("DPEXRT_sycl_event_init", &DPEXRT_sycl_event_init);
14941495
_declpointer("DPEXRT_nrt_acquire_meminfo_and_schedule_release",
14951496
&DPEXRT_nrt_acquire_meminfo_and_schedule_release);
1497+
_declpointer("DPEXRT_build_or_get_kernel", &DPEXRT_build_or_get_kernel);
1498+
_declpointer("DPEXRT_kernel_cache_size", &DPEXRT_kernel_cache_size);
14961499

14971500
#undef _declpointer
14981501
return dct;
@@ -1563,6 +1566,11 @@ MOD_INIT(_dpexrt_python)
15631566
PyModule_AddObject(
15641567
m, "DPEXRT_nrt_acquire_meminfo_and_schedule_release",
15651568
PyLong_FromVoidPtr(&DPEXRT_nrt_acquire_meminfo_and_schedule_release));
1569+
PyModule_AddObject(m, "DPEXRT_build_or_get_kernel",
1570+
PyLong_FromVoidPtr(&DPEXRT_build_or_get_kernel));
1571+
PyModule_AddObject(m, "DPEXRT_kernel_cache_size",
1572+
PyLong_FromVoidPtr(&DPEXRT_kernel_cache_size));
1573+
15661574
PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
15671575
return MOD_SUCCESS_VAL(m);
15681576
}

numba_dpex/core/runtime/context.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,3 +471,56 @@ def acquire_meminfo_and_schedule_release(
471471
ret = builder.call(fn, args)
472472

473473
return ret
474+
475+
def build_or_get_kernel(self, builder: llvmir.IRBuilder, args):
476+
"""Inserts LLVM IR to call build_or_get_kernel.
477+
478+
DPCTLSyclKernelRef
479+
DPEXRT_build_or_get_kernel(
480+
const DPCTLSyclContextRef ctx,
481+
const DPCTLSyclDeviceRef dev,
482+
size_t il_hash,
483+
const char *il,
484+
size_t il_length,
485+
const char *compile_opts,
486+
const char *kernel_name,
487+
);
488+
489+
"""
490+
mod = builder.module
491+
492+
func_ty = llvmir.FunctionType(
493+
cgutils.voidptr_t,
494+
[
495+
cgutils.voidptr_t,
496+
cgutils.voidptr_t,
497+
llvmir.IntType(64),
498+
cgutils.voidptr_t,
499+
llvmir.IntType(64),
500+
cgutils.voidptr_t,
501+
cgutils.voidptr_t,
502+
],
503+
)
504+
fn = cgutils.get_or_insert_function(
505+
mod, func_ty, "DPEXRT_build_or_get_kernel"
506+
)
507+
ret = builder.call(fn, args)
508+
509+
return ret
510+
511+
def kernel_cache_size(self, builder: llvmir.IRBuilder):
512+
"""Inserts LLVM IR to call kernel_cache_size.
513+
514+
size_t DPEXRT_kernel_cache_size();
515+
516+
"""
517+
fn = cgutils.get_or_insert_function(
518+
builder.module,
519+
llvmir.FunctionType(
520+
llvmir.IntType(64),
521+
[],
522+
),
523+
"DPEXRT_kernel_cache_size",
524+
)
525+
526+
return builder.call(fn, [])
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include "kernel_caching.h"
6+
#include <unordered_map>
7+
8+
extern "C"
9+
{
10+
#include "dpctl_capi.h"
11+
#include "dpctl_sycl_interface.h"
12+
13+
#include "_dbg_printer.h"
14+
15+
#include "numba/core/runtime/nrt_external.h"
16+
}
17+
18+
#include "syclinterface/dpctl_sycl_type_casters.hpp"
19+
#include "tools/boost_hash.hpp"
20+
#include "tools/dpctl.hpp"
21+
22+
using CacheKey = std::tuple<DPCTLSyclContextRef, DPCTLSyclDeviceRef, size_t>;
23+
24+
namespace std
25+
{
26+
template <> struct hash<CacheKey>
27+
{
28+
size_t operator()(const CacheKey &ck) const
29+
{
30+
std::size_t seed = 0;
31+
boost::hash_combine(seed, std::get<DPCTLSyclDeviceRef>(ck));
32+
boost::hash_combine(seed, std::get<DPCTLSyclContextRef>(ck));
33+
boost::hash_detail::hash_combine_impl(seed, std::get<size_t>(ck));
34+
return seed;
35+
}
36+
};
37+
template <> struct equal_to<CacheKey>
38+
{
39+
constexpr bool operator()(const CacheKey &lhs, const CacheKey &rhs) const
40+
{
41+
return DPCTLDevice_AreEq(std::get<DPCTLSyclDeviceRef>(lhs),
42+
std::get<DPCTLSyclDeviceRef>(rhs)) &&
43+
DPCTLContext_AreEq(std::get<DPCTLSyclContextRef>(lhs),
44+
std::get<DPCTLSyclContextRef>(rhs)) &&
45+
std::get<size_t>(lhs) == std::get<size_t>(rhs);
46+
}
47+
};
48+
} // namespace std
49+
50+
// TODO: add cache cleaning
51+
// https://github.com/IntelPython/numba-dpex/issues/1240
52+
std::unordered_map<CacheKey, DPCTLSyclKernelRef> sycl_kernel_cache =
53+
std::unordered_map<CacheKey, DPCTLSyclKernelRef>();
54+
55+
template <class M, class Key, class F>
56+
typename M::mapped_type &get_else_compute(M &m, Key const &k, F f)
57+
{
58+
typedef typename M::mapped_type V;
59+
std::pair<typename M::iterator, bool> r =
60+
m.insert(typename M::value_type(k, V()));
61+
V &v = r.first->second;
62+
if (r.second) {
63+
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: building kernel.\n"););
64+
f(v);
65+
}
66+
else {
67+
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: using cached kernel.\n"););
68+
DPCTLDevice_Delete(std::get<DPCTLSyclDeviceRef>(k));
69+
DPCTLContext_Delete(std::get<DPCTLSyclContextRef>(k));
70+
}
71+
return v;
72+
}
73+
74+
extern "C"
75+
{
76+
DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx,
77+
const DPCTLSyclDeviceRef dev,
78+
size_t il_hash,
79+
const char *il,
80+
size_t il_length,
81+
const char *compile_opts,
82+
const char *kernel_name)
83+
{
84+
DPEXRT_DEBUG(
85+
drt_debug_print("DPEXRT-DEBUG: in build or get kernel.\n"););
86+
87+
CacheKey key = std::make_tuple(ctx, dev, il_hash);
88+
89+
DPEXRT_DEBUG(auto ctx_hash = std::hash<DPCTLSyclContextRef>{}(ctx);
90+
auto dev_hash = std::hash<DPCTLSyclDeviceRef>{}(dev);
91+
drt_debug_print("DPEXRT-DEBUG: key hashes: %d %d %d.\n",
92+
ctx_hash, dev_hash, il_hash););
93+
94+
auto k_ref = get_else_compute(
95+
sycl_kernel_cache, key,
96+
[ctx, dev, il, il_length, compile_opts,
97+
kernel_name](DPCTLSyclKernelRef &k_ref) {
98+
auto kb_ref = DPCTLKernelBundle_CreateFromSpirv(
99+
ctx, dev, il, il_length, compile_opts);
100+
k_ref = DPCTLKernelBundle_GetKernel(kb_ref, kernel_name);
101+
DPCTLKernelBundle_Delete(kb_ref);
102+
});
103+
104+
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: kernel hash size: %d.\n",
105+
sycl_kernel_cache.size()););
106+
107+
return DPCTLKernel_Copy(k_ref);
108+
}
109+
110+
size_t DPEXRT_kernel_cache_size() { return sycl_kernel_cache.size(); }
111+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// SPDX-FileCopyrightText: 2023 Intel Corporation
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
//===----------------------------------------------------------------------===//
6+
///
7+
/// \file
8+
/// Defines dpex run time function(s) that cache kernel on device.
9+
///
10+
//===----------------------------------------------------------------------===//
11+
12+
#pragma once
13+
14+
#include "dpctl_capi.h"
15+
#include "dpctl_sycl_interface.h"
16+
17+
#ifdef __cplusplus
18+
extern "C"
19+
{
20+
#endif
21+
/*!
22+
* @brief returns dpctl kernel reference for the SPIRV file on particular
23+
* device. Compiles only first time, all others will use cache for the same
24+
* input. It steals reference to context and device because we need to keep
25+
* it alive for cache keys.
26+
*
27+
* @param ctx Context reference,
28+
* @param dev Device reference,
29+
* @param il_hash Hash of the SPIRV binary data,
30+
* @param il SPIRV binary data,
31+
* @param il_length SPIRV binary data size,
32+
* @param compile_opts compile options,
33+
* @param kernel_name kernel name inside SPIRV binary data to return
34+
* reference to.
35+
*
36+
* @return {return} Kernel reference to the compiled SPIR-V.
37+
*/
38+
DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx,
39+
const DPCTLSyclDeviceRef dev,
40+
size_t il_hash,
41+
const char *il,
42+
size_t il_length,
43+
const char *compile_opts,
44+
const char *kernel_name);
45+
46+
/*!
47+
* @brief returns cache size. Intended for test purposes only
48+
*
49+
* @return {return} Kernel cache size.
50+
*/
51+
size_t DPEXRT_kernel_cache_size();
52+
#ifdef __cplusplus
53+
}
54+
#endif

numba_dpex/core/runtime/experimental/nrt_reserve_meminfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
//===----------------------------------------------------------------------===//
66
///
77
/// \file
8-
/// Defines dpctl style function(s) that interruct with nrt meminfo and sycl.
8+
/// Defines dpctl style function(s) that interact with nrt meminfo and sycl.
99
///
1010
//===----------------------------------------------------------------------===//
1111

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright 2005-2014 Daniel James.
2+
// Distributed under the Boost Software License, Version 1.0. (See accompanying
3+
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
4+
5+
// Based on Peter Dimov's proposal
6+
// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf
7+
// issue 6.18.
8+
//
9+
// This also contains public domain code from MurmurHash. From the
10+
// MurmurHash header:
11+
12+
// MurmurHash3 was written by Austin Appleby, and is placed in the public
13+
// domain. The author hereby disclaims copyright to this source code.
14+
15+
// 2023 Intel Corporation
16+
// Copied hash_combine and hash_combine_impl from boost
17+
// (https://www.boost.org/doc/libs/1_76_0/boost/container_hash/hash.hpp) and
18+
// changed hash_combine to use std::hash<T> instead of boost::hash<T>.
19+
20+
#include <functional>
21+
22+
namespace boost
23+
{
24+
namespace hash_detail
25+
{
26+
template <typename SizeT>
27+
inline void hash_combine_impl(SizeT &seed, SizeT value)
28+
{
29+
seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
30+
}
31+
} // namespace hash_detail
32+
33+
template <class T> inline void hash_combine(std::size_t &seed, T const &v)
34+
{
35+
std::hash<T> hasher;
36+
return boost::hash_detail::hash_combine_impl(seed, hasher(v));
37+
}
38+
} // namespace boost
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include "dpctl.hpp"
6+
#include <CL/sycl.hpp>
7+
8+
namespace std
9+
{
10+
11+
size_t
12+
hash<DPCTLSyclDeviceRef>::operator()(const DPCTLSyclDeviceRef &DRef) const
13+
{
14+
using dpctl::syclinterface::unwrap;
15+
return hash<sycl::device>()(*unwrap<sycl::device>(DRef));
16+
}
17+
18+
size_t
19+
hash<DPCTLSyclContextRef>::operator()(const DPCTLSyclContextRef &CRef) const
20+
{
21+
using dpctl::syclinterface::unwrap;
22+
return hash<sycl::context>()(*unwrap<sycl::context>(CRef));
23+
}
24+
} // namespace std
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
//===----------------------------------------------------------------------===//
6+
///
7+
/// \file
8+
/// Defines overloads to dpctl library that eventually must be ported there.
9+
///
10+
//===----------------------------------------------------------------------===//
11+
12+
#pragma once
13+
#include "syclinterface/dpctl_sycl_type_casters.hpp"
14+
15+
namespace std
16+
{
17+
template <> struct hash<DPCTLSyclDeviceRef>
18+
{
19+
size_t operator()(const DPCTLSyclDeviceRef &DRef) const;
20+
};
21+
22+
template <> struct hash<DPCTLSyclContextRef>
23+
{
24+
size_t operator()(const DPCTLSyclContextRef &CRef) const;
25+
};
26+
} // namespace std

0 commit comments

Comments
 (0)