Skip to content

Commit 88f5b14

Browse files
authored
Merge pull request #712 from IntelPython/master
Merge master to gold
2 parents 8351771 + 5ae05ca commit 88f5b14

18 files changed

+310
-93
lines changed

doc/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@
3131
author = 'Intel'
3232

3333
# The short X.Y version
34-
version = '0.5'
34+
version = '0.6'
3535
# The full version, including alpha/beta/rc tags
36-
release = '0.5.1'
36+
release = '0.6.2'
3737

3838

3939
# -- General configuration ---------------------------------------------------

dpnp/backend/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727

2828
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
2929

30-
# set(DPNP_VERSION 0.5.1)
31-
# set(DPNP_API_VERSION 0.5)
30+
# set(DPNP_VERSION 0.6.2)
31+
# set(DPNP_API_VERSION 0.6)
3232

3333
# set directory where the custom finders live
3434
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules")

dpnp/backend/doc/Doxyfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ PROJECT_NAME = "DPNP C++ backend kernel library"
3838
# could be handy for archiving the generated documentation or if some version
3939
# control system is used.
4040

41-
PROJECT_NUMBER = 0.5.1
41+
PROJECT_NUMBER = 0.6.2
4242

4343
# Using the PROJECT_BRIEF tag one can provide an optional one line description
4444
# for a project that appears at the top of each page and should give viewer a

dpnp/backend/include/dpnp_iface.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,18 @@ INP_DLLEXPORT void dpnp_std_c(
705705
template <typename _DataType, typename _IndecesType>
706706
INP_DLLEXPORT void dpnp_take_c(void* array, void* indices, void* result, size_t size);
707707

708+
/**
709+
* @ingroup BACKEND_API
710+
* @brief math library implementation of trace function
711+
*
712+
* @param [in] array Input array with data.
713+
* @param [out] result Output array.
714+
* @param [in] shape Shape of input array.
715+
* @param [in] ndim Number of elements in array.shape.
716+
*/
717+
template <typename _DataType, typename _ResultType>
718+
INP_DLLEXPORT void dpnp_trace_c(const void* array, void* result, const size_t* shape, const size_t ndim);
719+
708720
/**
709721
* @ingroup BACKEND_API
710722
* @brief math library implementation of take function

dpnp/backend/include/dpnp_iface_fptr.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ enum class DPNPFuncName : size_t
199199
DPNP_FN_TAN, /**< Used in numpy.tan() implementation */
200200
DPNP_FN_TANH, /**< Used in numpy.tanh() implementation */
201201
DPNP_FN_TRANSPOSE, /**< Used in numpy.transpose() implementation */
202+
DPNP_FN_TRACE, /**< Used in numpy.trace() implementation */
202203
DPNP_FN_TRAPZ, /**< Used in numpy.trapz() implementation */
203204
DPNP_FN_TRI, /**< Used in numpy.tri() implementation */
204205
DPNP_FN_TRIL, /**< Used in numpy.tril() implementation */

dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,68 @@ void dpnp_vander_c(const void* array1_in, void* result1, const size_t size_in, c
211211
}
212212
}
213213

214+
template <typename _DataType, typename _ResultType>
215+
class dpnp_trace_c_kernel;
216+
217+
template <typename _DataType, typename _ResultType>
218+
void dpnp_trace_c(const void* array1_in, void* result1, const size_t* shape_, const size_t ndim)
219+
{
220+
cl::sycl::event event;
221+
222+
if ((array1_in == nullptr) || (result1 == nullptr))
223+
{
224+
return;
225+
}
226+
227+
const _DataType* array_in = reinterpret_cast<const _DataType*>(array1_in);
228+
_ResultType* result = reinterpret_cast<_ResultType*>(result1);
229+
230+
if (shape_ == nullptr)
231+
{
232+
return;
233+
}
234+
235+
if (ndim == 0)
236+
{
237+
return;
238+
}
239+
240+
size_t size = 1;
241+
for (size_t i = 0; i < ndim - 1; ++i)
242+
{
243+
size *= shape_[i];
244+
}
245+
246+
if (size == 0)
247+
{
248+
return;
249+
}
250+
251+
size_t* shape = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
252+
auto memcpy_event = DPNP_QUEUE.memcpy(shape, shape_, ndim * sizeof(size_t));
253+
254+
cl::sycl::range<1> gws(size);
255+
auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) {
256+
size_t i = global_id[0];
257+
result[i] = 0;
258+
for (size_t j = 0; j < shape[ndim - 1]; ++j)
259+
{
260+
result[i] += array_in[i * shape[ndim - 1] + j];
261+
}
262+
};
263+
264+
auto kernel_func = [&](cl::sycl::handler& cgh) {
265+
cgh.depends_on({memcpy_event});
266+
cgh.parallel_for<class dpnp_trace_c_kernel<_DataType, _ResultType>>(gws, kernel_parallel_for_func);
267+
};
268+
269+
event = DPNP_QUEUE.submit(kernel_func);
270+
271+
event.wait();
272+
273+
dpnp_memory_free_c(shape);
274+
}
275+
214276
template <typename _DataType>
215277
class dpnp_tri_c_kernel;
216278

@@ -539,6 +601,23 @@ void func_map_init_arraycreation(func_map_t& fmap)
539601
fmap[DPNPFuncName::DPNP_FN_VANDER][eft_C128][eft_C128] = {
540602
eft_C128, (void*)dpnp_vander_c<std::complex<double>, std::complex<double>>};
541603

604+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_trace_c<int, int>};
605+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_LNG][eft_INT] = {eft_INT, (void*)dpnp_trace_c<long, int>};
606+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_FLT][eft_INT] = {eft_INT, (void*)dpnp_trace_c<float, int>};
607+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_DBL][eft_INT] = {eft_INT, (void*)dpnp_trace_c<double, int>};
608+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_INT][eft_LNG] = {eft_LNG, (void*)dpnp_trace_c<int, long>};
609+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_trace_c<long, long>};
610+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_FLT][eft_LNG] = {eft_LNG, (void*)dpnp_trace_c<float, long>};
611+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_DBL][eft_LNG] = {eft_LNG, (void*)dpnp_trace_c<double, long>};
612+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_INT][eft_FLT] = {eft_FLT, (void*)dpnp_trace_c<int, float>};
613+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_LNG][eft_FLT] = {eft_FLT, (void*)dpnp_trace_c<long, float>};
614+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_trace_c<float, float>};
615+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_DBL][eft_FLT] = {eft_FLT, (void*)dpnp_trace_c<double, float>};
616+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_INT][eft_DBL] = {eft_DBL, (void*)dpnp_trace_c<int, double>};
617+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_LNG][eft_DBL] = {eft_DBL, (void*)dpnp_trace_c<long, double>};
618+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_FLT][eft_DBL] = {eft_DBL, (void*)dpnp_trace_c<float, double>};
619+
fmap[DPNPFuncName::DPNP_FN_TRACE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_trace_c<double, double>};
620+
542621
fmap[DPNPFuncName::DPNP_FN_TRI][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_tri_c<int>};
543622
fmap[DPNPFuncName::DPNP_FN_TRI][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_tri_c<long>};
544623
fmap[DPNPFuncName::DPNP_FN_TRI][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tri_c<float>};

dpnp/backend/kernels/dpnp_krnl_random.cpp

Lines changed: 77 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -475,54 +475,72 @@ void dpnp_rng_logistic_c(void* result, const double loc, const double scale, con
475475
template <typename _DataType>
476476
void dpnp_rng_lognormal_c(void* result, const _DataType mean, const _DataType stddev, const size_t size)
477477
{
478-
if (!size)
478+
if (!size || !result)
479479
{
480480
return;
481481
}
482482
_DataType* result1 = reinterpret_cast<_DataType*>(result);
483483

484-
const _DataType displacement = _DataType(0.0);
485-
486-
const _DataType scalefactor = _DataType(1.0);
484+
if (stddev == 0.0)
485+
{
486+
_DataType* fill_value = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(sizeof(_DataType)));
487+
fill_value[0] = static_cast<_DataType>(std::exp(mean + (stddev * stddev) / 2));
488+
dpnp_initval_c<_DataType>(result, fill_value, size);
489+
dpnp_memory_free_c(fill_value);
490+
}
491+
else
492+
{
493+
const _DataType displacement = _DataType(0.0);
494+
const _DataType scalefactor = _DataType(1.0);
487495

488-
mkl_rng::lognormal<_DataType> distribution(mean, stddev, displacement, scalefactor);
489-
// perform generation
490-
auto event_out = mkl_rng::generate(distribution, DPNP_RNG_ENGINE, size, result1);
491-
event_out.wait();
496+
mkl_rng::lognormal<_DataType> distribution(mean, stddev, displacement, scalefactor);
497+
auto event_out = mkl_rng::generate(distribution, DPNP_RNG_ENGINE, size, result1);
498+
event_out.wait();
499+
}
500+
return;
492501
}
493502

494503
template <typename _DataType>
495504
void dpnp_rng_multinomial_c(
496505
void* result, const int ntrial, const double* p_vector, const size_t p_vector_size, const size_t size)
497506
{
498-
if (!size)
507+
if (!size || !result)
499508
{
500509
return;
501510
}
502-
std::int32_t* result1 = reinterpret_cast<std::int32_t*>(result);
503-
std::vector<double> p(p_vector, p_vector + p_vector_size);
504-
// size = size
505-
// `result` is a array for random numbers
506-
// `size` is a `result`'s len. `size = n * p.size()`
507-
// `n` is a number of random values to be generated.
508-
size_t n = size / p.size();
509511

510-
if (dpnp_queue_is_cpu_c())
512+
if (ntrial == 0)
511513
{
512-
mkl_rng::multinomial<std::int32_t> distribution(ntrial, p);
513-
// perform generation
514-
auto event_out = mkl_rng::generate(distribution, DPNP_RNG_ENGINE, n, result1);
515-
event_out.wait();
514+
dpnp_zeros_c<_DataType>(result, size);
516515
}
517516
else
518517
{
519-
int errcode = viRngMultinomial(
520-
VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, get_rng_stream(), n, result1, ntrial, p_vector_size, p_vector);
521-
if (errcode != VSL_STATUS_OK)
518+
std::int32_t* result1 = reinterpret_cast<std::int32_t*>(result);
519+
std::vector<double> p(p_vector, p_vector + p_vector_size);
520+
// size = size
521+
// `result` is a array for random numbers
522+
// `size` is a `result`'s len. `size = n * p.size()`
523+
// `n` is a number of random values to be generated.
524+
size_t n = size / p.size();
525+
526+
if (dpnp_queue_is_cpu_c())
522527
{
523-
throw std::runtime_error("DPNP RNG Error: dpnp_rng_multinomial_c() failed.");
528+
mkl_rng::multinomial<std::int32_t> distribution(ntrial, p);
529+
// perform generation
530+
auto event_out = mkl_rng::generate(distribution, DPNP_RNG_ENGINE, n, result1);
531+
event_out.wait();
532+
}
533+
else
534+
{
535+
int errcode = viRngMultinomial(
536+
VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, get_rng_stream(), n, result1, ntrial, p_vector_size, p_vector);
537+
if (errcode != VSL_STATUS_OK)
538+
{
539+
throw std::runtime_error("DPNP RNG Error: dpnp_rng_multinomial_c() failed.");
540+
}
524541
}
525542
}
543+
return;
526544
}
527545

528546
template <typename _DataType>
@@ -946,17 +964,20 @@ template <typename _DataType>
946964
void dpnp_rng_shuffle_c(
947965
void* result, const size_t itemsize, const size_t ndim, const size_t high_dim_size, const size_t size)
948966
{
949-
if (!(size) || !(high_dim_size > 1))
967+
if (!result)
950968
{
951969
return;
952970
}
953971

954-
char* result1 = reinterpret_cast<char*>(result);
972+
if (!size || !ndim || !(high_dim_size > 1))
973+
{
974+
return;
975+
}
955976

956-
double* Uvec = nullptr;
977+
char* result1 = reinterpret_cast<char*>(result);
957978

958979
size_t uvec_size = high_dim_size - 1;
959-
Uvec = reinterpret_cast<double*>(dpnp_memory_alloc_c(uvec_size * sizeof(double)));
980+
double* Uvec = reinterpret_cast<double*>(dpnp_memory_alloc_c(uvec_size * sizeof(double)));
960981
mkl_rng::uniform<double> uniform_distribution(0.0, 1.0);
961982
auto uniform_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, uvec_size, Uvec);
962983
uniform_event.wait();
@@ -966,42 +987,52 @@ void dpnp_rng_shuffle_c(
966987
// Fast, statically typed path: shuffle the underlying buffer.
967988
// Only for non-empty, 1d objects of class ndarray (subclasses such
968989
// as MaskedArrays may not support this approach).
969-
// TODO
970-
// kernel
971-
char* buf = nullptr;
972-
buf = reinterpret_cast<char*>(dpnp_memory_alloc_c(itemsize * sizeof(char)));
990+
char* buf = reinterpret_cast<char*>(dpnp_memory_alloc_c(itemsize * sizeof(char)));
973991
for (size_t i = uvec_size; i > 0; i--)
974992
{
975993
size_t j = (size_t)(floor((i + 1) * Uvec[i - 1]));
976-
memcpy(buf, result1 + j * itemsize, itemsize);
977-
memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize);
978-
memcpy(result1 + i * itemsize, buf, itemsize);
994+
if (i != j)
995+
{
996+
auto memcpy1 =
997+
DPNP_QUEUE.submit([&](cl::sycl::handler& h) { h.memcpy(buf, result1 + j * itemsize, itemsize); });
998+
auto memcpy2 = DPNP_QUEUE.submit([&](cl::sycl::handler& h) {
999+
h.depends_on({memcpy1});
1000+
h.memcpy(result1 + j * itemsize, result1 + i * itemsize, itemsize);
1001+
});
1002+
auto memcpy3 = DPNP_QUEUE.submit([&](cl::sycl::handler& h) {
1003+
h.depends_on({memcpy2});
1004+
h.memcpy(result1 + i * itemsize, buf, itemsize);
1005+
});
1006+
memcpy3.wait();
1007+
}
9791008
}
980-
9811009
dpnp_memory_free_c(buf);
9821010
}
9831011
else
9841012
{
9851013
// Multidimensional ndarrays require a bounce buffer.
986-
// TODO
987-
// kernel
988-
char* buf = nullptr;
9891014
size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element
990-
buf = reinterpret_cast<char*>(dpnp_memory_alloc_c(step_size * sizeof(char)));
1015+
char* buf = reinterpret_cast<char*>(dpnp_memory_alloc_c(step_size * sizeof(char)));
9911016
for (size_t i = uvec_size; i > 0; i--)
9921017
{
9931018
size_t j = (size_t)(floor((i + 1) * Uvec[i - 1]));
9941019
if (j < i)
9951020
{
996-
memcpy(buf, result1 + j * step_size, step_size);
997-
memcpy(result1 + j * step_size, result1 + i * step_size, step_size);
998-
memcpy(result1 + i * step_size, buf, step_size);
1021+
auto memcpy1 =
1022+
DPNP_QUEUE.submit([&](cl::sycl::handler& h) { h.memcpy(buf, result1 + j * step_size, step_size); });
1023+
auto memcpy2 = DPNP_QUEUE.submit([&](cl::sycl::handler& h) {
1024+
h.depends_on({memcpy1});
1025+
h.memcpy(result1 + j * step_size, result1 + i * step_size, step_size);
1026+
});
1027+
auto memcpy3 = DPNP_QUEUE.submit([&](cl::sycl::handler& h) {
1028+
h.depends_on({memcpy2});
1029+
h.memcpy(result1 + i * step_size, buf, step_size);
1030+
});
1031+
memcpy3.wait();
9991032
}
10001033
}
1001-
10021034
dpnp_memory_free_c(buf);
10031035
}
1004-
10051036
dpnp_memory_free_c(Uvec);
10061037
}
10071038

dpnp/backend/src/queue_sycl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ static cl::sycl::device get_default_sycl_device()
9797
}
9898
#endif
9999

100+
#if defined(DPNPC_TOUCH_KERNEL_TO_LINK)
100101
/**
101102
* Function push the SYCL kernels to be linked (final stage of the compilation) for the current queue
102103
*
@@ -120,6 +121,7 @@ static long dpnp_kernels_link()
120121

121122
return result;
122123
}
124+
#endif
123125

124126
#if defined(DPNP_LOCAL_QUEUE)
125127
// Catch asynchronous exceptions
@@ -177,7 +179,10 @@ void backend_sycl::backend_sycl_queue_init(QueueOptions selector)
177179
#endif
178180

179181
std::chrono::high_resolution_clock::time_point t3 = std::chrono::high_resolution_clock::now();
182+
#if defined(DPNPC_TOUCH_KERNEL_TO_LINK)
183+
// Remove pre-link kernel library at startup time
180184
dpnp_kernels_link();
185+
#endif
181186
std::chrono::high_resolution_clock::time_point t4 = std::chrono::high_resolution_clock::now();
182187
std::chrono::duration<double> time_kernels_link =
183188
std::chrono::duration_cast<std::chrono::duration<double>>(t4 - t3);

dpnp/dpnp_algo/dpnp_algo.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na
171171
DPNP_FN_TAKE
172172
DPNP_FN_TAN
173173
DPNP_FN_TANH
174+
DPNP_FN_TRACE
174175
DPNP_FN_TRANSPOSE
175176
DPNP_FN_TRAPZ
176177
DPNP_FN_TRI

0 commit comments

Comments
 (0)