Skip to content

Commit 525d211

Browse files
committed
Merge branch 'master' of https://github.com/kimwalisch/primesum
2 parents 0da29e9 + c6a9c9a commit 525d211

File tree

1 file changed

+28
-42
lines changed

1 file changed

+28
-42
lines changed

src/deleglise-rivat/S2_hard.cpp

Lines changed: 28 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,7 @@ T S2_hard_OpenMP_thread(uint128_t x,
133133

134134
BitSieve sieve(segment_size);
135135
Wheel wheel(primes, max_b + 1, low);
136-
phi.clear();
137136
phi.resize(max_b + 1, 0);
138-
mu_sum.clear();
139137
mu_sum.resize(max_b + 1, 0);
140138
BinaryIndexedTree tree;
141139

@@ -341,58 +339,46 @@ S2_hard_OpenMP_master(X x,
341339
int64_t min_segment_size = loadBalancer.get_min_segment_size();
342340
int64_t segment_size = min_segment_size;
343341
int64_t segments_per_thread = 1;
344-
int64_t segments = 0;
345-
346-
aligned_vector<vector<int128_t>> phi(threads);
347-
aligned_vector<vector<res_t>> mu_sum(threads);
348-
aligned_vector<double> timings(threads);
349342

350343
PiTable pi(max_prime);
351344
vector<int128_t> phi_total(pi[isqrt(z)] + 1, 0);
352345
double alpha = get_alpha(x, y);
353346

354-
#pragma omp parallel num_threads(threads) reduction(+: s2_hard)
347+
while (low < limit)
355348
{
356-
int t = omp_get_thread_num();
349+
int64_t segments = ceil_div(limit - low, segment_size);
350+
threads = in_between(1, threads, segments);
351+
segments_per_thread = in_between(1, segments_per_thread, ceil_div(segments, threads));
357352

358-
while (low < limit)
359-
{
360-
#pragma omp single
361-
{
362-
segments = ceil_div(limit - low, segment_size);
363-
threads = in_between(1, threads, segments);
364-
segments_per_thread = in_between(1, segments_per_thread, ceil_div(segments, threads));
365-
}
353+
aligned_vector<vector<int128_t>> phi(threads);
354+
aligned_vector<vector<res_t>> mu_sum(threads);
355+
aligned_vector<double> timings(threads);
366356

367-
if (t < threads)
368-
{
369-
timings[t] = get_wtime();
370-
s2_hard += S2_hard_OpenMP_thread(x, y, z, c, segment_size, segments_per_thread,
371-
t, low, limit, alpha, factors, pi, primes, mu_sum[t], phi[t]);
372-
timings[t] = get_wtime() - timings[t];
373-
}
357+
#pragma omp parallel for num_threads(threads) reduction(+: s2_hard)
358+
for (int i = 0; i < threads; i++)
359+
{
360+
timings[i] = get_wtime();
361+
s2_hard += S2_hard_OpenMP_thread(x, y, z, c, segment_size, segments_per_thread,
362+
i, low, limit, alpha, factors, pi, primes, mu_sum[i], phi[i]);
363+
timings[i] = get_wtime() - timings[i];
364+
}
374365

375-
#pragma omp barrier
376-
#pragma omp single
366+
// Once all threads have finished reconstruct and add the
367+
// missing contribution of all special leaves. This must
368+
// be done in order as each thread (i) requires the sum of
369+
// the phi values from the previous threads.
370+
//
371+
for (int i = 0; i < threads; i++)
372+
{
373+
for (size_t j = 1; j < phi[i].size(); j++)
377374
{
378-
// Once all threads have finished reconstruct and add the
379-
// missing contribution of all special leaves. This must
380-
// be done in order as each thread requires the sum of the
381-
// phi values from the previous threads.
382-
//
383-
for (int i = 0; i < threads; i++)
384-
{
385-
for (size_t j = 1; j < phi[i].size(); j++)
386-
{
387-
s2_hard += mu_sum[i][j] * phi_total[j];
388-
phi_total[j] += phi[i][j];
389-
}
390-
}
391-
392-
low += segments_per_thread * threads * segment_size;
393-
loadBalancer.update(low, threads, &segment_size, &segments_per_thread, timings);
375+
s2_hard += mu_sum[i][j] * phi_total[j];
376+
phi_total[j] += phi[i][j];
394377
}
395378
}
379+
380+
low += segments_per_thread * threads * segment_size;
381+
loadBalancer.update(low, threads, &segment_size, &segments_per_thread, timings);
396382
}
397383

398384
return s2_hard;

0 commit comments

Comments
 (0)