Skip to content

Commit 51fda93

Browse files
jiebinnZhiguoZh
authored andcommitted
blosc/blosc2.c: optimize the mutex lock with spin lock
The mutex lock cost lots of cycles in t_blosc_do_job when the thread number is large. The updating to global values of protected part is not that heavy. It will save lots of cycles if we Replace the mutex lock with spin lock. Apply the patch and test the c-blosc2/bench. $ ./bench/b2bench blosclz noshuffle single 160 83886080 4 Score gain: 7.4% CPU: ICX 8380 x 2 sockets Core number: 160 threads
1 parent 0d9eac4 commit 51fda93

File tree

2 files changed

+15
-14
lines changed

2 files changed

+15
-14
lines changed

blosc/blosc2.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sys/types.h>
1717
#include <assert.h>
1818
#include <math.h>
19+
#include <pthread.h>
1920

2021
#include "blosc2.h"
2122
#include "blosc-private.h"
@@ -3055,10 +3056,10 @@ static void t_blosc_do_job(void *ctxt)
30553056
}
30563057
else {
30573058
// Use dynamic schedule via a queue. Get the next block.
3058-
pthread_mutex_lock(&context->count_mutex);
3059+
pthread_spin_lock(&context->count_spin);
30593060
context->thread_nblock++;
30603061
nblock_ = context->thread_nblock;
3061-
pthread_mutex_unlock(&context->count_mutex);
3062+
pthread_spin_unlock(&context->count_spin);
30623063
tblock = nblocks;
30633064
}
30643065

@@ -3119,15 +3120,15 @@ static void t_blosc_do_job(void *ctxt)
31193120
/* Check results for the compressed/decompressed block */
31203121
if (cbytes < 0) { /* compr/decompr failure */
31213122
/* Set giveup_code error */
3122-
pthread_mutex_lock(&context->count_mutex);
3123+
pthread_spin_lock(&context->count_spin);
31233124
context->thread_giveup_code = cbytes;
3124-
pthread_mutex_unlock(&context->count_mutex);
3125+
pthread_spin_unlock(&context->count_spin);
31253126
break;
31263127
}
31273128

31283129
if (compress && !memcpyed) {
31293130
/* Start critical section */
3130-
pthread_mutex_lock(&context->count_mutex);
3131+
pthread_spin_lock(&context->count_spin);
31313132
ntdest = context->output_bytes;
31323133
// Note: do not use a typical local dict_training variable here
31333134
// because it is probably cached from previous calls if the number of
@@ -3138,13 +3139,13 @@ static void t_blosc_do_job(void *ctxt)
31383139

31393140
if ((cbytes == 0) || (ntdest + cbytes > maxbytes)) {
31403141
context->thread_giveup_code = 0; /* incompressible buf */
3141-
pthread_mutex_unlock(&context->count_mutex);
3142+
pthread_spin_unlock(&context->count_spin);
31423143
break;
31433144
}
31443145
context->thread_nblock++;
31453146
nblock_ = context->thread_nblock;
31463147
context->output_bytes += cbytes;
3147-
pthread_mutex_unlock(&context->count_mutex);
3148+
pthread_spin_unlock(&context->count_spin);
31483149
/* End of critical section */
31493150

31503151
/* Copy the compressed buffer to destination */
@@ -3154,22 +3155,22 @@ static void t_blosc_do_job(void *ctxt)
31543155
nblock_++;
31553156
}
31563157
else {
3157-
pthread_mutex_lock(&context->count_mutex);
3158+
pthread_spin_lock(&context->count_spin);
31583159
context->thread_nblock++;
31593160
nblock_ = context->thread_nblock;
31603161
context->output_bytes += cbytes;
3161-
pthread_mutex_unlock(&context->count_mutex);
3162+
pthread_spin_unlock(&context->count_spin);
31623163
}
31633164

31643165
} /* closes while (nblock_) */
31653166

31663167
if (static_schedule) {
3167-
pthread_mutex_lock(&context->count_mutex);
3168+
pthread_spin_lock(&context->count_spin);
31683169
context->output_bytes = context->sourcesize;
31693170
if (compress) {
31703171
context->output_bytes += context->header_overhead;
31713172
}
3172-
pthread_mutex_unlock(&context->count_mutex);
3173+
pthread_spin_unlock(&context->count_spin);
31733174
}
31743175

31753176
}
@@ -3208,7 +3209,7 @@ int init_threadpool(blosc2_context *context) {
32083209
int rc2;
32093210

32103211
/* Initialize mutex and condition variable objects */
3211-
pthread_mutex_init(&context->count_mutex, NULL);
3212+
pthread_spin_init(&context->count_spin, PTHREAD_PROCESS_SHARED);
32123213
pthread_mutex_init(&context->delta_mutex, NULL);
32133214
pthread_mutex_init(&context->nchunk_mutex, NULL);
32143215
pthread_cond_init(&context->delta_cv, NULL);
@@ -3613,7 +3614,7 @@ int release_threadpool(blosc2_context *context) {
36133614
}
36143615

36153616
/* Release mutex and condition variable objects */
3616-
pthread_mutex_destroy(&context->count_mutex);
3617+
pthread_spin_destroy(&context->count_spin);
36173618
pthread_mutex_destroy(&context->delta_mutex);
36183619
pthread_mutex_destroy(&context->nchunk_mutex);
36193620
pthread_cond_destroy(&context->delta_cv);

blosc/context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ struct blosc2_context_s {
121121
int16_t end_threads;
122122
pthread_t *threads;
123123
struct thread_context *thread_contexts; /* only for user-managed threads */
124-
pthread_mutex_t count_mutex;
124+
pthread_spinlock_t count_spin;
125125
pthread_mutex_t nchunk_mutex;
126126
#ifdef BLOSC_POSIX_BARRIERS
127127
pthread_barrier_t barr_init;

0 commit comments

Comments
 (0)