@@ -33,7 +33,6 @@ static const auto n_samples = 120'000'000;
33
33
34
34
double estimate_pi (sycl::queue& q, size_t n_points) {
35
35
double estimated_pi; // Estimated value of Pi
36
- size_t n_under_curve = 0 ; // Number of points fallen under the curve
37
36
38
37
// Step 1. Generate n_points * 2 random numbers
39
38
// 1.1. Generator initialization
@@ -42,15 +41,17 @@ double estimate_pi(sycl::queue& q, size_t n_points) {
42
41
// Create an object of distribution (by default float, a = 0.0f, b = 1.0f)
43
42
mkl::rng::uniform distr;
44
43
45
- float * rng_ptr = sycl::malloc_device <float >(n_points * 2 , q);
44
+ float * rng_ptr = sycl::malloc_shared <float >(n_points * 2 , q);
46
45
47
46
// 1.2. Random number generation
48
47
auto event = mkl::rng::generate (distr, engine, n_points * 2 , rng_ptr);
49
48
50
49
// Step 2. Count points under curve (x ^ 2 + y ^ 2 < 1.0f)
51
- size_t count_per_thread = 32 ;
50
+ constexpr size_t count_per_thread = 32 ;
51
+ size_t *n_under_curve = sycl::malloc_host<size_t >(1 , q); // Number of points fallen under the curve
52
+ *n_under_curve = 0 ;
53
+ auto reductor = sycl::reduction (n_under_curve, size_t (0 ), std::plus<size_t >{});
52
54
53
- auto reductor = sycl::reduction (&n_under_curve, size_t (0 ), std::plus<size_t >{});
54
55
q.parallel_for (sycl::range<1 >(n_points / count_per_thread), event, reductor,
55
56
[=](sycl::item<1 > item, auto & sum) {
56
57
sycl::vec<float , 2 > r;
@@ -65,9 +66,10 @@ double estimate_pi(sycl::queue& q, size_t n_points) {
65
66
}).wait_and_throw ();
66
67
67
68
// Step 3. Calculate approximated value of Pi
68
- estimated_pi = n_under_curve / ((double )n_points) * 4.0 ;
69
+ estimated_pi = * n_under_curve / ((double )n_points) * 4.0 ;
69
70
70
71
sycl::free (rng_ptr, q);
72
+ sycl::free (n_under_curve, q);
71
73
72
74
return estimated_pi;
73
75
0 commit comments