Skip to content

Commit b4bde28

Browse files
authored
Merge pull request #696 from IntelPython/master
Merge master to gold branch
2 parents 5ef873b + 649ead9 commit b4bde28

22 files changed

+177
-570
lines changed

conda-recipe/meta.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ requirements:
88
host:
99
- python
1010
- setuptools
11-
- numpy-devel >=1.18
11+
- numpy >=1.15
1212
- cython
1313
- cmake >=3.16.5
1414
- dpctl >=0.5.0a0
@@ -22,6 +22,7 @@ requirements:
2222
- dpcpp_cpp_rt >=2021.1.1
2323
- mkl >=2021.1.1
2424
- mkl-dpcpp >=2021.1.1
25+
- numpy >=1.15
2526

2627
build:
2728
number: {{ GIT_DESCRIBE_NUMBER }}
@@ -44,7 +45,7 @@ test:
4445
commands:
4546
- python -c "import dpnp"
4647
- conda list
47-
- pytest
48+
- pytest -s
4849

4950
about:
5051
home: https://github.com/IntelPython/dpnp

dpnp/backend/include/dpnp_iface.hpp

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -233,15 +233,15 @@ INP_DLLEXPORT void dpnp_elemwise_absolute_c(void* array1_in, void* result1, size
233233
*/
234234
template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
235235
INP_DLLEXPORT void dpnp_dot_c(void* result_out,
236-
const void* input1_in,
237-
const size_t input1_size,
238-
const size_t* input1_shape,
239-
const size_t input1_shape_ndim,
240-
const void* input2_in,
241-
const size_t input2_size,
242-
const size_t* input2_shape,
243-
const size_t input2_shape_ndim,
244-
const size_t* where);
236+
const void* input1_in,
237+
const size_t input1_size,
238+
const size_t* input1_shape,
239+
const size_t input1_shape_ndim,
240+
const void* input2_in,
241+
const size_t input2_size,
242+
const size_t* input2_shape,
243+
const size_t input2_shape_ndim,
244+
const size_t* where);
245245

246246
/**
247247
* @ingroup BACKEND_API
@@ -336,7 +336,8 @@ INP_DLLEXPORT void dpnp_sum_c(void* result_out,
336336
* @param [in] ndim Number of elements in shape.
337337
*/
338338
template <typename _DataType>
339-
INP_DLLEXPORT void dpnp_partition_c(void* array, void* array2, void* result, const size_t kth, const size_t* shape, const size_t ndim);
339+
INP_DLLEXPORT void
340+
dpnp_partition_c(void* array, void* array2, void* result, const size_t kth, const size_t* shape, const size_t ndim);
340341

341342
/**
342343
* @ingroup BACKEND_API
@@ -485,15 +486,15 @@ INP_DLLEXPORT void dpnp_cholesky_c(void* array1_in, void* result1, const size_t
485486
*/
486487
template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
487488
INP_DLLEXPORT void dpnp_correlate_c(void* result_out,
488-
const void* input1_in,
489-
const size_t input1_size,
490-
const size_t* input1_shape,
491-
const size_t input1_shape_ndim,
492-
const void* input2_in,
493-
const size_t input2_size,
494-
const size_t* input2_shape,
495-
const size_t input2_shape_ndim,
496-
const size_t* where);
489+
const void* input1_in,
490+
const size_t input1_size,
491+
const size_t* input1_shape,
492+
const size_t input1_shape_ndim,
493+
const void* input2_in,
494+
const size_t input2_size,
495+
const size_t* input2_shape,
496+
const size_t input2_shape_ndim,
497+
const size_t* where);
497498

498499
/**
499500
* @ingroup BACKEND_API

dpnp/backend/kernels/dpnp_krnl_common.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ void dpnp_dot_c(void* result_out,
8686
const size_t input2_shape_ndim,
8787
const size_t* where)
8888
{
89-
9089
(void)input1_shape;
9190
(void)input1_shape_ndim;
9291
(void)input2_size;

dpnp/backend/kernels/dpnp_krnl_elemwise.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,8 +370,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
370370
_DataType_input2* input2_data = reinterpret_cast<_DataType_input2*>(const_cast<void*>(input2_in)); \
371371
_DataType_output* result = reinterpret_cast<_DataType_output*>(result_out); \
372372
\
373-
std::vector<size_t> result_shape = get_result_shape(input1_shape, input1_shape_ndim, \
374-
input2_shape, input2_shape_ndim); \
373+
std::vector<size_t> result_shape = \
374+
get_result_shape(input1_shape, input1_shape_ndim, input2_shape, input2_shape_ndim); \
375375
\
376376
DPNPC_id<_DataType_input1>* input1_it; \
377377
const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>); \

dpnp/backend/kernels/dpnp_krnl_fft.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ void dpnp_fft_fft_c(const void* array1_in,
6767
long* output_shape_offsets = reinterpret_cast<long*>(dpnp_memory_alloc_c(shape_size * sizeof(long)));
6868
long* input_shape_offsets = reinterpret_cast<long*>(dpnp_memory_alloc_c(shape_size * sizeof(long)));
6969
// must be a thread local storage.
70-
long* xyz = reinterpret_cast<long*>(dpnp_memory_alloc_c(result_size * shape_size * sizeof(long)));
7170
long* axis_iterator = reinterpret_cast<long*>(dpnp_memory_alloc_c(result_size * shape_size * sizeof(long)));
7271

7372
get_shape_offsets_inkernel<long>(output_shape, shape_size, output_shape_offsets);
@@ -79,14 +78,14 @@ void dpnp_fft_fft_c(const void* array1_in,
7978

8079
double sum_real = 0.0;
8180
double sum_imag = 0.0;
82-
// need to replace these arrays by thread local storage
83-
long* xyz_thread = xyz + (output_id * shape_size);
81+
// need to replace this array by thread local storage
8482
long* axis_iterator_thread = axis_iterator + (output_id * shape_size);
8583

86-
get_xyz_by_id_inkernel(output_id, output_shape_offsets, shape_size, xyz_thread);
84+
size_t xyz_id;
8785
for (size_t i = 0; i < shape_size; ++i)
8886
{
89-
axis_iterator_thread[i] = xyz_thread[i];
87+
xyz_id = get_xyz_id_by_id_inkernel(output_id, output_shape_offsets, shape_size, i);
88+
axis_iterator_thread[i] = xyz_id;
9089
}
9190

9291
const long axis_length = input_boundarie;
@@ -114,7 +113,8 @@ void dpnp_fft_fft_c(const void* array1_in,
114113
}
115114
}
116115

117-
const size_t output_local_id = xyz_thread[axis];
116+
xyz_id = get_xyz_id_by_id_inkernel(output_id, output_shape_offsets, shape_size, axis);
117+
const size_t output_local_id = xyz_id;
118118
const double angle = 2.0 * kernel_pi * it * output_local_id / axis_length;
119119

120120
const double angle_cos = cl::sycl::cos(angle);
@@ -153,7 +153,6 @@ void dpnp_fft_fft_c(const void* array1_in,
153153
dpnp_memory_free_c(input_shape_offsets);
154154
dpnp_memory_free_c(output_shape_offsets);
155155
dpnp_memory_free_c(axis_iterator);
156-
dpnp_memory_free_c(xyz);
157156

158157
return;
159158
}

dpnp/backend/kernels/dpnp_krnl_indexing.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,23 @@ void dpnp_put_c(
254254
size_t* ind = reinterpret_cast<size_t*>(ind_in);
255255
_DataType* v = reinterpret_cast<_DataType*>(v_in);
256256

257+
if ((array_1 == nullptr) || (ind == nullptr) || (v == nullptr))
258+
{
259+
return;
260+
}
261+
262+
if (size_v == 0)
263+
{
264+
return;
265+
}
266+
257267
for (size_t i = 0; i < size; ++i)
258268
{
259269
for (size_t j = 0; j < size_ind; ++j)
260270
{
261-
if (i == ind[j])
271+
if (i == ind[j] || (i == (size + ind[j])))
262272
{
263273
array_1[i] = v[j % size_v];
264-
break;
265274
}
266275
}
267276
}

dpnp/backend/kernels/dpnp_krnl_random.cpp

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -548,27 +548,9 @@ void dpnp_rng_multivariate_normal_c(void* result,
548548
// `size1` is a number of random values to be generated for each dimension.
549549
size_t size1 = size / dimen;
550550

551-
if (dpnp_queue_is_cpu_c())
552-
{
553-
mkl_rng::gaussian_mv<_DataType> distribution(dimen, mean, cov);
554-
auto event_out = mkl_rng::generate(distribution, DPNP_RNG_ENGINE, size1, result1);
555-
event_out.wait();
556-
}
557-
else
558-
{
559-
int errcode = vdRngGaussianMV(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER2,
560-
get_rng_stream(),
561-
size1,
562-
result1,
563-
dimen,
564-
VSL_MATRIX_STORAGE_FULL,
565-
mean_vector,
566-
cov_vector);
567-
if (errcode != VSL_STATUS_OK)
568-
{
569-
throw std::runtime_error("DPNP RNG Error: dpnp_rng_multivariate_normal_c() failed.");
570-
}
571-
}
551+
mkl_rng::gaussian_mv<_DataType> distribution(dimen, mean, cov);
552+
auto event_out = mkl_rng::generate(distribution, DPNP_RNG_ENGINE, size1, result1);
553+
event_out.wait();
572554
}
573555

574556
template <typename _DataType>
@@ -1086,11 +1068,10 @@ void dpnp_rng_standard_normal_c(void* result, size_t size)
10861068
template <typename _DataType>
10871069
void dpnp_rng_standard_t_c(void* result, const _DataType df, const size_t size)
10881070
{
1089-
if (!size)
1071+
if (!size || !result)
10901072
{
10911073
return;
10921074
}
1093-
cl::sycl::vector_class<cl::sycl::event> no_deps;
10941075

10951076
_DataType* result1 = reinterpret_cast<_DataType*>(result);
10961077
const _DataType d_zero = 0.0, d_one = 1.0;
@@ -1100,18 +1081,17 @@ void dpnp_rng_standard_t_c(void* result, const _DataType df, const size_t size)
11001081
if (dpnp_queue_is_cpu_c())
11011082
{
11021083
mkl_rng::gamma<_DataType> gamma_distribution(shape, d_zero, 1.0 / shape);
1103-
auto event_out = mkl_rng::generate(gamma_distribution, DPNP_RNG_ENGINE, size, result1);
1104-
event_out.wait();
1105-
event_out = mkl_vm::invsqrt(DPNP_QUEUE, size, result1, result1, no_deps, mkl_vm::mode::ha);
1106-
event_out.wait();
1084+
auto gamma_distr_event = mkl_rng::generate(gamma_distribution, DPNP_RNG_ENGINE, size, result1);
1085+
1086+
auto invsqrt_event = mkl_vm::invsqrt(DPNP_QUEUE, size, result1, result1, {gamma_distr_event}, mkl_vm::mode::ha);
11071087

11081088
sn = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
11091089

11101090
mkl_rng::gaussian<_DataType> gaussian_distribution(d_zero, d_one);
1111-
event_out = mkl_rng::generate(gaussian_distribution, DPNP_RNG_ENGINE, size, sn);
1112-
event_out.wait();
1091+
auto gaussian_distr_event = mkl_rng::generate(gaussian_distribution, DPNP_RNG_ENGINE, size, sn);
11131092

1114-
event_out = mkl_vm::mul(DPNP_QUEUE, size, result1, sn, result1, no_deps, mkl_vm::mode::ha);
1093+
auto event_out = mkl_vm::mul(
1094+
DPNP_QUEUE, size, result1, sn, result1, {invsqrt_event, gaussian_distr_event}, mkl_vm::mode::ha);
11151095
dpnp_memory_free_c(sn);
11161096
event_out.wait();
11171097
}

dpnp/backend/kernels/dpnp_krnl_sorting.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ template <typename _DataType>
8787
class dpnp_partition_c_kernel;
8888

8989
template <typename _DataType>
90-
void dpnp_partition_c(void* array1_in, void* array2_in, void* result1, const size_t kth, const size_t* shape_, const size_t ndim)
90+
void dpnp_partition_c(
91+
void* array1_in, void* array2_in, void* result1, const size_t kth, const size_t* shape_, const size_t ndim)
9192
{
9293
_DataType* arr = reinterpret_cast<_DataType*>(array1_in);
9394
_DataType* arr2 = reinterpret_cast<_DataType*>(array2_in);
@@ -109,7 +110,7 @@ void dpnp_partition_c(void* array1_in, void* array2_in, void* result1, const siz
109110
size *= shape_[i];
110111
}
111112

112-
size_t size_ = size/shape_[ndim-1];
113+
size_t size_ = size / shape_[ndim - 1];
113114

114115
if (size_ == 0)
115116
{
@@ -121,29 +122,29 @@ void dpnp_partition_c(void* array1_in, void* array2_in, void* result1, const siz
121122

122123
for (size_t i = 0; i < size_; ++i)
123124
{
124-
size_t ind_begin = i * shape_[ndim-1];
125-
size_t ind_end = (i + 1) * shape_[ndim-1] - 1;
125+
size_t ind_begin = i * shape_[ndim - 1];
126+
size_t ind_end = (i + 1) * shape_[ndim - 1] - 1;
126127

127-
_DataType matrix[shape_[ndim-1]];
128+
_DataType matrix[shape_[ndim - 1]];
128129
for (size_t j = ind_begin; j < ind_end + 1; ++j)
129130
{
130131
size_t ind = j - ind_begin;
131132
matrix[ind] = arr2[j];
132133
}
133-
std::partial_sort(matrix, matrix + shape_[ndim-1], matrix + shape_[ndim-1]);
134+
std::partial_sort(matrix, matrix + shape_[ndim - 1], matrix + shape_[ndim - 1]);
134135
for (size_t j = ind_begin; j < ind_end + 1; ++j)
135136
{
136137
size_t ind = j - ind_begin;
137138
arr2[j] = matrix[ind];
138139
}
139140
}
140141

141-
    size_t* shape = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
142+
size_t* shape = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
142143
auto memcpy_event = DPNP_QUEUE.memcpy(shape, shape_, ndim * sizeof(size_t));
143144

144145
memcpy_event.wait();
145146

146-
cl::sycl::range<2> gws(size_, kth+1);
147+
cl::sycl::range<2> gws(size_, kth + 1);
147148
auto kernel_parallel_for_func = [=](cl::sycl::id<2> global_id) {
148149
size_t j = global_id[0];
149150
size_t k = global_id[1];
@@ -160,7 +161,6 @@ void dpnp_partition_c(void* array1_in, void* array2_in, void* result1, const siz
160161
result[j * shape[ndim - 1] + i] = change_val2;
161162
}
162163
}
163-
164164
};
165165

166166
auto kernel_func = [&](cl::sycl::handler& cgh) {
@@ -172,7 +172,7 @@ void dpnp_partition_c(void* array1_in, void* array2_in, void* result1, const siz
172172

173173
event.wait();
174174

175-
    dpnp_memory_free_c(shape);
175+
dpnp_memory_free_c(shape);
176176
}
177177

178178
template <typename _DataType>

0 commit comments

Comments
 (0)