@@ -475,54 +475,72 @@ void dpnp_rng_logistic_c(void* result, const double loc, const double scale, con
475
475
template <typename _DataType>
476
476
void dpnp_rng_lognormal_c (void * result, const _DataType mean, const _DataType stddev, const size_t size)
477
477
{
478
- if (!size)
478
+ if (!size || !result )
479
479
{
480
480
return ;
481
481
}
482
482
_DataType* result1 = reinterpret_cast <_DataType*>(result);
483
483
484
- const _DataType displacement = _DataType (0.0 );
485
-
486
- const _DataType scalefactor = _DataType (1.0 );
484
+ if (stddev == 0.0 )
485
+ {
486
+ _DataType* fill_value = reinterpret_cast <_DataType*>(dpnp_memory_alloc_c (sizeof (_DataType)));
487
+ fill_value[0 ] = static_cast <_DataType>(std::exp (mean + (stddev * stddev) / 2 ));
488
+ dpnp_initval_c<_DataType>(result, fill_value, size);
489
+ dpnp_memory_free_c (fill_value);
490
+ }
491
+ else
492
+ {
493
+ const _DataType displacement = _DataType (0.0 );
494
+ const _DataType scalefactor = _DataType (1.0 );
487
495
488
- mkl_rng::lognormal<_DataType> distribution (mean, stddev, displacement, scalefactor);
489
- // perform generation
490
- auto event_out = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, size, result1);
491
- event_out.wait ();
496
+ mkl_rng::lognormal<_DataType> distribution (mean, stddev, displacement, scalefactor);
497
+ auto event_out = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, size, result1);
498
+ event_out.wait ();
499
+ }
500
+ return ;
492
501
}
493
502
494
503
template <typename _DataType>
495
504
void dpnp_rng_multinomial_c (
496
505
void * result, const int ntrial, const double * p_vector, const size_t p_vector_size, const size_t size)
497
506
{
498
- if (!size)
507
+ if (!size || !result )
499
508
{
500
509
return ;
501
510
}
502
- std::int32_t * result1 = reinterpret_cast <std::int32_t *>(result);
503
- std::vector<double > p (p_vector, p_vector + p_vector_size);
504
- // size = size
505
- // `result` is a array for random numbers
506
- // `size` is a `result`'s len. `size = n * p.size()`
507
- // `n` is a number of random values to be generated.
508
- size_t n = size / p.size ();
509
511
510
- if (dpnp_queue_is_cpu_c () )
512
+ if (ntrial == 0 )
511
513
{
512
- mkl_rng::multinomial<std::int32_t > distribution (ntrial, p);
513
- // perform generation
514
- auto event_out = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, n, result1);
515
- event_out.wait ();
514
+ dpnp_zeros_c<_DataType>(result, size);
516
515
}
517
516
else
518
517
{
519
- int errcode = viRngMultinomial (
520
- VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, get_rng_stream (), n, result1, ntrial, p_vector_size, p_vector);
521
- if (errcode != VSL_STATUS_OK)
518
+ std::int32_t * result1 = reinterpret_cast <std::int32_t *>(result);
519
+ std::vector<double > p (p_vector, p_vector + p_vector_size);
520
+ // size = size
521
+ // `result` is a array for random numbers
522
+ // `size` is a `result`'s len. `size = n * p.size()`
523
+ // `n` is a number of random values to be generated.
524
+ size_t n = size / p.size ();
525
+
526
+ if (dpnp_queue_is_cpu_c ())
522
527
{
523
- throw std::runtime_error (" DPNP RNG Error: dpnp_rng_multinomial_c() failed." );
528
+ mkl_rng::multinomial<std::int32_t > distribution (ntrial, p);
529
+ // perform generation
530
+ auto event_out = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, n, result1);
531
+ event_out.wait ();
532
+ }
533
+ else
534
+ {
535
+ int errcode = viRngMultinomial (
536
+ VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, get_rng_stream (), n, result1, ntrial, p_vector_size, p_vector);
537
+ if (errcode != VSL_STATUS_OK)
538
+ {
539
+ throw std::runtime_error (" DPNP RNG Error: dpnp_rng_multinomial_c() failed." );
540
+ }
524
541
}
525
542
}
543
+ return ;
526
544
}
527
545
528
546
template <typename _DataType>
@@ -946,17 +964,20 @@ template <typename _DataType>
946
964
void dpnp_rng_shuffle_c (
947
965
void * result, const size_t itemsize, const size_t ndim, const size_t high_dim_size, const size_t size)
948
966
{
949
- if (!(size) || !(high_dim_size > 1 ) )
967
+ if (!result )
950
968
{
951
969
return ;
952
970
}
953
971
954
- char * result1 = reinterpret_cast <char *>(result);
972
+ if (!size || !ndim || !(high_dim_size > 1 ))
973
+ {
974
+ return ;
975
+ }
955
976
956
- double * Uvec = nullptr ;
977
+ char * result1 = reinterpret_cast < char *>(result) ;
957
978
958
979
size_t uvec_size = high_dim_size - 1 ;
959
- Uvec = reinterpret_cast <double *>(dpnp_memory_alloc_c (uvec_size * sizeof (double )));
980
+ double * Uvec = reinterpret_cast <double *>(dpnp_memory_alloc_c (uvec_size * sizeof (double )));
960
981
mkl_rng::uniform<double > uniform_distribution (0.0 , 1.0 );
961
982
auto uniform_event = mkl_rng::generate (uniform_distribution, DPNP_RNG_ENGINE, uvec_size, Uvec);
962
983
uniform_event.wait ();
@@ -966,42 +987,52 @@ void dpnp_rng_shuffle_c(
966
987
// Fast, statically typed path: shuffle the underlying buffer.
967
988
// Only for non-empty, 1d objects of class ndarray (subclasses such
968
989
// as MaskedArrays may not support this approach).
969
- // TODO
970
- // kernel
971
- char * buf = nullptr ;
972
- buf = reinterpret_cast <char *>(dpnp_memory_alloc_c (itemsize * sizeof (char )));
990
+ char * buf = reinterpret_cast <char *>(dpnp_memory_alloc_c (itemsize * sizeof (char )));
973
991
for (size_t i = uvec_size; i > 0 ; i--)
974
992
{
975
993
size_t j = (size_t )(floor ((i + 1 ) * Uvec[i - 1 ]));
976
- memcpy (buf, result1 + j * itemsize, itemsize);
977
- memcpy (result1 + j * itemsize, result1 + i * itemsize, itemsize);
978
- memcpy (result1 + i * itemsize, buf, itemsize);
994
+ if (i != j)
995
+ {
996
+ auto memcpy1 =
997
+ DPNP_QUEUE.submit ([&](cl::sycl::handler& h) { h.memcpy (buf, result1 + j * itemsize, itemsize); });
998
+ auto memcpy2 = DPNP_QUEUE.submit ([&](cl::sycl::handler& h) {
999
+ h.depends_on ({memcpy1});
1000
+ h.memcpy (result1 + j * itemsize, result1 + i * itemsize, itemsize);
1001
+ });
1002
+ auto memcpy3 = DPNP_QUEUE.submit ([&](cl::sycl::handler& h) {
1003
+ h.depends_on ({memcpy2});
1004
+ h.memcpy (result1 + i * itemsize, buf, itemsize);
1005
+ });
1006
+ memcpy3.wait ();
1007
+ }
979
1008
}
980
-
981
1009
dpnp_memory_free_c (buf);
982
1010
}
983
1011
else
984
1012
{
985
1013
// Multidimensional ndarrays require a bounce buffer.
986
- // TODO
987
- // kernel
988
- char * buf = nullptr ;
989
1014
size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element
990
- buf = reinterpret_cast <char *>(dpnp_memory_alloc_c (step_size * sizeof (char )));
1015
+ char * buf = reinterpret_cast <char *>(dpnp_memory_alloc_c (step_size * sizeof (char )));
991
1016
for (size_t i = uvec_size; i > 0 ; i--)
992
1017
{
993
1018
size_t j = (size_t )(floor ((i + 1 ) * Uvec[i - 1 ]));
994
1019
if (j < i)
995
1020
{
996
- memcpy (buf, result1 + j * step_size, step_size);
997
- memcpy (result1 + j * step_size, result1 + i * step_size, step_size);
998
- memcpy (result1 + i * step_size, buf, step_size);
1021
+ auto memcpy1 =
1022
+ DPNP_QUEUE.submit ([&](cl::sycl::handler& h) { h.memcpy (buf, result1 + j * step_size, step_size); });
1023
+ auto memcpy2 = DPNP_QUEUE.submit ([&](cl::sycl::handler& h) {
1024
+ h.depends_on ({memcpy1});
1025
+ h.memcpy (result1 + j * step_size, result1 + i * step_size, step_size);
1026
+ });
1027
+ auto memcpy3 = DPNP_QUEUE.submit ([&](cl::sycl::handler& h) {
1028
+ h.depends_on ({memcpy2});
1029
+ h.memcpy (result1 + i * step_size, buf, step_size);
1030
+ });
1031
+ memcpy3.wait ();
999
1032
}
1000
1033
}
1001
-
1002
1034
dpnp_memory_free_c (buf);
1003
1035
}
1004
-
1005
1036
dpnp_memory_free_c (Uvec);
1006
1037
}
1007
1038
0 commit comments