Skip to content

Commit bff61e0

Browse files
committed
blosc/blosc2.c: optimize the mutex lock with spin lock
The mutex lock cost lots of cycles in t_blosc_do_job when the thread number is large. The updating to global values of protected part is not that heavy. It will save lots of cycles if we Replace the mutex lock with spin lock. Apply the patch and test the c-blosc2/bench. $ ./bench/b2bench blosclz noshuffle single 160 83886080 4 Score gain: 7.4% CPU: ICX 8380 x 2 sockets Core number: 160 threads
1 parent 0d9eac4 commit bff61e0

File tree

2 files changed

+15
-11
lines changed

2 files changed

+15
-11
lines changed

blosc/blosc2.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <sys/types.h>
1717
#include <assert.h>
1818
#include <math.h>
19+
#include <pthread.h>
1920

2021
#include "blosc2.h"
2122
#include "blosc-private.h"
@@ -3055,10 +3056,10 @@ static void t_blosc_do_job(void *ctxt)
30553056
}
30563057
else {
30573058
// Use dynamic schedule via a queue. Get the next block.
3058-
pthread_mutex_lock(&context->count_mutex);
3059+
pthread_spin_lock(&context->count_spin);
30593060
context->thread_nblock++;
30603061
nblock_ = context->thread_nblock;
3061-
pthread_mutex_unlock(&context->count_mutex);
3062+
pthread_spin_unlock(&context->count_spin);
30623063
tblock = nblocks;
30633064
}
30643065

@@ -3119,15 +3120,15 @@ static void t_blosc_do_job(void *ctxt)
31193120
/* Check results for the compressed/decompressed block */
31203121
if (cbytes < 0) { /* compr/decompr failure */
31213122
/* Set giveup_code error */
3122-
pthread_mutex_lock(&context->count_mutex);
3123+
pthread_spin_lock(&context->count_spin);
31233124
context->thread_giveup_code = cbytes;
3124-
pthread_mutex_unlock(&context->count_mutex);
3125+
pthread_spin_unlock(&context->count_spin);
31253126
break;
31263127
}
31273128

31283129
if (compress && !memcpyed) {
31293130
/* Start critical section */
3130-
pthread_mutex_lock(&context->count_mutex);
3131+
pthread_spin_lock(&context->count_spin);
31313132
ntdest = context->output_bytes;
31323133
// Note: do not use a typical local dict_training variable here
31333134
// because it is probably cached from previous calls if the number of
@@ -3138,13 +3139,13 @@ static void t_blosc_do_job(void *ctxt)
31383139

31393140
if ((cbytes == 0) || (ntdest + cbytes > maxbytes)) {
31403141
context->thread_giveup_code = 0; /* incompressible buf */
3141-
pthread_mutex_unlock(&context->count_mutex);
3142+
pthread_spin_unlock(&context->count_spin);
31423143
break;
31433144
}
31443145
context->thread_nblock++;
31453146
nblock_ = context->thread_nblock;
31463147
context->output_bytes += cbytes;
3147-
pthread_mutex_unlock(&context->count_mutex);
3148+
pthread_spin_unlock(&context->count_spin);
31483149
/* End of critical section */
31493150

31503151
/* Copy the compressed buffer to destination */
@@ -3154,22 +3155,22 @@ static void t_blosc_do_job(void *ctxt)
31543155
nblock_++;
31553156
}
31563157
else {
3157-
pthread_mutex_lock(&context->count_mutex);
3158+
pthread_spin_lock(&context->count_spin);
31583159
context->thread_nblock++;
31593160
nblock_ = context->thread_nblock;
31603161
context->output_bytes += cbytes;
3161-
pthread_mutex_unlock(&context->count_mutex);
3162+
pthread_spin_unlock(&context->count_spin);
31623163
}
31633164

31643165
} /* closes while (nblock_) */
31653166

31663167
if (static_schedule) {
3167-
pthread_mutex_lock(&context->count_mutex);
3168+
pthread_spin_lock(&context->count_spin);
31683169
context->output_bytes = context->sourcesize;
31693170
if (compress) {
31703171
context->output_bytes += context->header_overhead;
31713172
}
3172-
pthread_mutex_unlock(&context->count_mutex);
3173+
pthread_spin_unlock(&context->count_spin);
31733174
}
31743175

31753176
}
@@ -3208,6 +3209,7 @@ int init_threadpool(blosc2_context *context) {
32083209
int rc2;
32093210

32103211
/* Initialize mutex and condition variable objects */
3212+
pthread_spin_init(&context->count_spin, PTHREAD_PROCESS_SHARED);
32113213
pthread_mutex_init(&context->count_mutex, NULL);
32123214
pthread_mutex_init(&context->delta_mutex, NULL);
32133215
pthread_mutex_init(&context->nchunk_mutex, NULL);
@@ -3613,6 +3615,7 @@ int release_threadpool(blosc2_context *context) {
36133615
}
36143616

36153617
/* Release mutex and condition variable objects */
3618+
pthread_spin_destroy(&context->count_spin);
36163619
pthread_mutex_destroy(&context->count_mutex);
36173620
pthread_mutex_destroy(&context->delta_mutex);
36183621
pthread_mutex_destroy(&context->nchunk_mutex);

blosc/context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ struct blosc2_context_s {
121121
int16_t end_threads;
122122
pthread_t *threads;
123123
struct thread_context *thread_contexts; /* only for user-managed threads */
124+
pthread_spinlock_t count_spin;
124125
pthread_mutex_t count_mutex;
125126
pthread_mutex_t nchunk_mutex;
126127
#ifdef BLOSC_POSIX_BARRIERS

0 commit comments

Comments
 (0)