Merge pull request #1236 from IntelPython/feature/enable_spirv_to_device_caching

ZzEeKkAa · web-flow · commit 8fc7fbddb2b7 · 2023-12-12T14:16:57.000-05:00
Enable SPIR-V to device caching
diff --git a/LICENSES.third-party b/LICENSES.third-party
@@ -0,0 +1,30 @@
+The numba-dpex source tree includes vendored libraries governed by the following
+licenses.
+
+
+boost hash.hpp header
+---------------------
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/numba_dpex/core/runtime/_dpexrt_python.c b/numba_dpex/core/runtime/_dpexrt_python.c
@@ -24,6 +24,7 @@
 #include "_queuestruct.h"
 #include "_usmarraystruct.h"
 
+#include "experimental/kernel_caching.h"
 #include "experimental/nrt_reserve_meminfo.h"
 #include "numba/core/runtime/nrt_external.h"
 
@@ -1493,6 +1494,8 @@ static PyObject *build_c_helpers_dict(void)
     _declpointer("DPEXRT_sycl_event_init", &DPEXRT_sycl_event_init);
     _declpointer("DPEXRT_nrt_acquire_meminfo_and_schedule_release",
                  &DPEXRT_nrt_acquire_meminfo_and_schedule_release);
+    _declpointer("DPEXRT_build_or_get_kernel", &DPEXRT_build_or_get_kernel);
+    _declpointer("DPEXRT_kernel_cache_size", &DPEXRT_kernel_cache_size);
 
 #undef _declpointer
     return dct;
@@ -1563,6 +1566,11 @@ MOD_INIT(_dpexrt_python)
     PyModule_AddObject(
         m, "DPEXRT_nrt_acquire_meminfo_and_schedule_release",
         PyLong_FromVoidPtr(&DPEXRT_nrt_acquire_meminfo_and_schedule_release));
+    PyModule_AddObject(m, "DPEXRT_build_or_get_kernel",
+                       PyLong_FromVoidPtr(&DPEXRT_build_or_get_kernel));
+    PyModule_AddObject(m, "DPEXRT_kernel_cache_size",
+                       PyLong_FromVoidPtr(&DPEXRT_kernel_cache_size));
+
     PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
     return MOD_SUCCESS_VAL(m);
 }
diff --git a/numba_dpex/core/runtime/context.py b/numba_dpex/core/runtime/context.py
@@ -471,3 +471,56 @@ def acquire_meminfo_and_schedule_release(
         ret = builder.call(fn, args)
 
         return ret
+
+    def build_or_get_kernel(self, builder: llvmir.IRBuilder, args):
+        """Inserts LLVM IR to call build_or_get_kernel.
+
+        DPCTLSyclKernelRef
+        DPEXRT_build_or_get_kernel(
+            const DPCTLSyclContextRef ctx,
+            const DPCTLSyclDeviceRef dev,
+            size_t il_hash,
+            const char *il,
+            size_t il_length,
+            const char *compile_opts,
+            const char *kernel_name,
+        );
+
+        """
+        mod = builder.module
+
+        func_ty = llvmir.FunctionType(
+            cgutils.voidptr_t,
+            [
+                cgutils.voidptr_t,
+                cgutils.voidptr_t,
+                llvmir.IntType(64),
+                cgutils.voidptr_t,
+                llvmir.IntType(64),
+                cgutils.voidptr_t,
+                cgutils.voidptr_t,
+            ],
+        )
+        fn = cgutils.get_or_insert_function(
+            mod, func_ty, "DPEXRT_build_or_get_kernel"
+        )
+        ret = builder.call(fn, args)
+
+        return ret
+
+    def kernel_cache_size(self, builder: llvmir.IRBuilder):
+        """Inserts LLVM IR to call kernel_cache_size.
+
+        size_t DPEXRT_kernel_cache_size();
+
+        """
+        fn = cgutils.get_or_insert_function(
+            builder.module,
+            llvmir.FunctionType(
+                llvmir.IntType(64),
+                [],
+            ),
+            "DPEXRT_kernel_cache_size",
+        )
+
+        return builder.call(fn, [])
diff --git a/numba_dpex/core/runtime/experimental/kernel_caching.cpp b/numba_dpex/core/runtime/experimental/kernel_caching.cpp
@@ -0,0 +1,111 @@
+// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "kernel_caching.h"
+#include <unordered_map>
+
+extern "C"
+{
+#include "dpctl_capi.h"
+#include "dpctl_sycl_interface.h"
+
+#include "_dbg_printer.h"
+
+#include "numba/core/runtime/nrt_external.h"
+}
+
+#include "syclinterface/dpctl_sycl_type_casters.hpp"
+#include "tools/boost_hash.hpp"
+#include "tools/dpctl.hpp"
+
+using CacheKey = std::tuple<DPCTLSyclContextRef, DPCTLSyclDeviceRef, size_t>;
+
+namespace std
+{
+template <> struct hash<CacheKey>
+{
+    size_t operator()(const CacheKey &ck) const
+    {
+        std::size_t seed = 0;
+        boost::hash_combine(seed, std::get<DPCTLSyclDeviceRef>(ck));
+        boost::hash_combine(seed, std::get<DPCTLSyclContextRef>(ck));
+        boost::hash_detail::hash_combine_impl(seed, std::get<size_t>(ck));
+        return seed;
+    }
+};
+template <> struct equal_to<CacheKey>
+{
+    constexpr bool operator()(const CacheKey &lhs, const CacheKey &rhs) const
+    {
+        return DPCTLDevice_AreEq(std::get<DPCTLSyclDeviceRef>(lhs),
+                                 std::get<DPCTLSyclDeviceRef>(rhs)) &&
+               DPCTLContext_AreEq(std::get<DPCTLSyclContextRef>(lhs),
+                                  std::get<DPCTLSyclContextRef>(rhs)) &&
+               std::get<size_t>(lhs) == std::get<size_t>(rhs);
+    }
+};
+} // namespace std
+
+// TODO: add cache cleaning
+// https://github.com/IntelPython/numba-dpex/issues/1240
+std::unordered_map<CacheKey, DPCTLSyclKernelRef> sycl_kernel_cache =
+    std::unordered_map<CacheKey, DPCTLSyclKernelRef>();
+
+template <class M, class Key, class F>
+typename M::mapped_type &get_else_compute(M &m, Key const &k, F f)
+{
+    typedef typename M::mapped_type V;
+    std::pair<typename M::iterator, bool> r =
+        m.insert(typename M::value_type(k, V()));
+    V &v = r.first->second;
+    if (r.second) {
+        DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: building kernel.\n"););
+        f(v);
+    }
+    else {
+        DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: using cached kernel.\n"););
+        DPCTLDevice_Delete(std::get<DPCTLSyclDeviceRef>(k));
+        DPCTLContext_Delete(std::get<DPCTLSyclContextRef>(k));
+    }
+    return v;
+}
+
+extern "C"
+{
+    DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx,
+                                                  const DPCTLSyclDeviceRef dev,
+                                                  size_t il_hash,
+                                                  const char *il,
+                                                  size_t il_length,
+                                                  const char *compile_opts,
+                                                  const char *kernel_name)
+    {
+        DPEXRT_DEBUG(
+            drt_debug_print("DPEXRT-DEBUG: in build or get kernel.\n"););
+
+        CacheKey key = std::make_tuple(ctx, dev, il_hash);
+
+        DPEXRT_DEBUG(auto ctx_hash = std::hash<DPCTLSyclContextRef>{}(ctx);
+                     auto dev_hash = std::hash<DPCTLSyclDeviceRef>{}(dev);
+                     drt_debug_print("DPEXRT-DEBUG: key hashes: %d %d %d.\n",
+                                     ctx_hash, dev_hash, il_hash););
+
+        auto k_ref = get_else_compute(
+            sycl_kernel_cache, key,
+            [ctx, dev, il, il_length, compile_opts,
+             kernel_name](DPCTLSyclKernelRef &k_ref) {
+                auto kb_ref = DPCTLKernelBundle_CreateFromSpirv(
+                    ctx, dev, il, il_length, compile_opts);
+                k_ref = DPCTLKernelBundle_GetKernel(kb_ref, kernel_name);
+                DPCTLKernelBundle_Delete(kb_ref);
+            });
+
+        DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: kernel hash size: %d.\n",
+                                     sycl_kernel_cache.size()););
+
+        return DPCTLKernel_Copy(k_ref);
+    }
+
+    size_t DPEXRT_kernel_cache_size() { return sycl_kernel_cache.size(); }
+}
diff --git a/numba_dpex/core/runtime/experimental/kernel_caching.h b/numba_dpex/core/runtime/experimental/kernel_caching.h
@@ -0,0 +1,54 @@
+// SPDX-FileCopyrightText: 2023 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Defines dpex run time function(s) that cache kernel on device.
+///
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "dpctl_capi.h"
+#include "dpctl_sycl_interface.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+    /*!
+     * @brief returns dpctl kernel reference for the SPIRV file on particular
+     * device. Compiles only first time, all others will use cache for the same
+     * input. It steals reference to context and device because we need to keep
+     * it alive for cache keys.
+     *
+     * @param    ctx            Context reference,
+     * @param    dev            Device reference,
+     * @param    il_hash        Hash of the SPIRV binary data,
+     * @param    il             SPIRV binary data,
+     * @param    il_length      SPIRV binary data size,
+     * @param    compile_opts   compile options,
+     * @param    kernel_name    kernel name inside SPIRV binary data to return
+     * reference to.
+     *
+     * @return   {return}       Kernel reference to the compiled SPIR-V.
+     */
+    DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx,
+                                                  const DPCTLSyclDeviceRef dev,
+                                                  size_t il_hash,
+                                                  const char *il,
+                                                  size_t il_length,
+                                                  const char *compile_opts,
+                                                  const char *kernel_name);
+
+    /*!
+     * @brief returns cache size. Intended for test purposes only
+     *
+     * @return   {return}       Kernel cache size.
+     */
+    size_t DPEXRT_kernel_cache_size();
+#ifdef __cplusplus
+}
+#endif
diff --git a/numba_dpex/core/runtime/experimental/nrt_reserve_meminfo.h b/numba_dpex/core/runtime/experimental/nrt_reserve_meminfo.h
@@ -5,7 +5,7 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// Defines dpctl style function(s) that interruct with nrt meminfo and sycl.
+/// Defines dpctl style function(s) that interact with nrt meminfo and sycl.
 ///
 //===----------------------------------------------------------------------===//
 
diff --git a/numba_dpex/core/runtime/experimental/tools/boost_hash.hpp b/numba_dpex/core/runtime/experimental/tools/boost_hash.hpp
@@ -0,0 +1,38 @@
+// Copyright 2005-2014 Daniel James.
+// Distributed under the Boost Software License, Version 1.0. (See accompanying
+// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+//  Based on Peter Dimov's proposal
+//  http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf
+//  issue 6.18.
+//
+//  This also contains public domain code from MurmurHash. From the
+//  MurmurHash header:
+
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// 2023 Intel Corporation
+// Copied hash_combine and hash_combine_impl from boost
+// (https://www.boost.org/doc/libs/1_76_0/boost/container_hash/hash.hpp) and
+// changed hash_combine to use std::hash<T> instead of boost::hash<T>.
+
+#include <functional>
+
+namespace boost
+{
+namespace hash_detail
+{
+template <typename SizeT>
+inline void hash_combine_impl(SizeT &seed, SizeT value)
+{
+    seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+}
+} // namespace hash_detail
+
+template <class T> inline void hash_combine(std::size_t &seed, T const &v)
+{
+    std::hash<T> hasher;
+    return boost::hash_detail::hash_combine_impl(seed, hasher(v));
+}
+} // namespace boost
diff --git a/numba_dpex/core/runtime/experimental/tools/dpctl.cpp b/numba_dpex/core/runtime/experimental/tools/dpctl.cpp
@@ -0,0 +1,24 @@
+// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "dpctl.hpp"
+#include <CL/sycl.hpp>
+
+namespace std
+{
+
+size_t
+hash<DPCTLSyclDeviceRef>::operator()(const DPCTLSyclDeviceRef &DRef) const
+{
+    using dpctl::syclinterface::unwrap;
+    return hash<sycl::device>()(*unwrap<sycl::device>(DRef));
+}
+
+size_t
+hash<DPCTLSyclContextRef>::operator()(const DPCTLSyclContextRef &CRef) const
+{
+    using dpctl::syclinterface::unwrap;
+    return hash<sycl::context>()(*unwrap<sycl::context>(CRef));
+}
+} // namespace std
diff --git a/numba_dpex/core/runtime/experimental/tools/dpctl.hpp b/numba_dpex/core/runtime/experimental/tools/dpctl.hpp
@@ -0,0 +1,26 @@
+// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Defines overloads to dpctl library that eventually must be ported there.
+///
+//===----------------------------------------------------------------------===//
+
+#pragma once
+#include "syclinterface/dpctl_sycl_type_casters.hpp"
+
+namespace std
+{
+template <> struct hash<DPCTLSyclDeviceRef>
+{
+    size_t operator()(const DPCTLSyclDeviceRef &DRef) const;
+};
+
+template <> struct hash<DPCTLSyclContextRef>
+{
+    size_t operator()(const DPCTLSyclContextRef &CRef) const;
+};
+} // namespace std
diff --git a/numba_dpex/experimental/launcher.py b/numba_dpex/experimental/launcher.py
diff --git a/numba_dpex/experimental/testing.py b/numba_dpex/experimental/testing.py
diff --git a/numba_dpex/tests/experimental/test_async_kernel.py b/numba_dpex/tests/experimental/test_async_kernel.py