Skip to content

Commit 9ba2b21

Browse files
committed
osq_lock - ensure spin node and spin queue alignment
calloc() does not return memory aligned for 128 byte cache lines. Use aligned_alloc() to do so. Make alignment parameterized, at least in a #define.
1 parent c6fd724 commit 9ba2b21

File tree

1 file changed

+21
-7
lines changed

1 file changed

+21
-7
lines changed

ext/linux/osq_lock.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,20 +116,24 @@
116116
* Using 128 bytes alignment to eliminate false sharing for various Armv8 core
117117
* cache line size
118118
*/
119+
120+
#define SPIN_NODE_ALIGNMENT 128UL
121+
#define SPIN_TAIL_ALIGNMENT 128UL
122+
119123
struct optimistic_spin_node {
120124
struct optimistic_spin_node *next, *prev;
121125
int locked; /* 1 if lock acquired */
122126
int cpu; /* encoded CPU # + 1 value */
123127
int random_sleep; /* random sleep in us */
124-
} __attribute__ ((aligned (128)));
128+
} __attribute__ ((aligned (SPIN_NODE_ALIGNMENT)));
125129

126130
struct optimistic_spin_queue {
127131
/*
128132
* Stores an encoded value of the CPU # of the tail node in the queue.
129133
* If the queue is empty, then it's set to OSQ_UNLOCKED_VAL.
130134
*/
131135
atomic_t tail;
132-
};
136+
} __attribute__ ((aligned (SPIN_TAIL_ALIGNMENT)));
133137

134138
/* 0 means thread unlocked, 1~N represents each individual thread on core 1~N */
135139
#define OSQ_UNLOCKED_VAL (0)
@@ -216,14 +220,24 @@ static void osq_parse_args(test_args unused, int argc, char** argv) {
216220
static inline void osq_lock_init(uint64_t *lock, unsigned long cores)
217221
{
218222
/*
219-
* Allocate optimistic_spin_node from heap during main thread initialization.
220-
* Each cpu core will have its own spinning node, aligned to 128 bytes maximum
221-
* cache line, calloc will set memory to zero automatically, therefore no need
222-
* to bzero the nodepool.
223+
* Allocate optimistic_spin_node from the heap during main thread
224+
* initialization. Each cpu core will have its own spinning node,
225+
* aligned to 128 cache line.
223226
*/
224-
global_osq_nodepool_ptr = calloc(cores + 1, sizeof(struct optimistic_spin_node));
227+
228+
size_t size = (cores + 1) * sizeof(struct optimistic_spin_node);
229+
230+
if (size % SPIN_NODE_ALIGNMENT) {
231+
printf("size = %zu, is not a multiple of %zu\n", size, SPIN_NODE_ALIGNMENT);
232+
exit(-1);
233+
}
234+
235+
global_osq_nodepool_ptr = aligned_alloc(SPIN_NODE_ALIGNMENT, size);
236+
225237
if (global_osq_nodepool_ptr == NULL) exit(errno);
226238

239+
memset(global_osq_nodepool_ptr, 0, size);
240+
227241
/*
228242
* If osq spins more than unqueue_retry times, the spinning cpu may backoff
229243
* and sleep for 1 ~ 10 microseconds (on average 5 microseconds). Each spinning

0 commit comments

Comments
 (0)