|
116 | 116 | * Using 128 bytes alignment to eliminate false sharing for various Armv8 core |
117 | 117 | * cache line size |
118 | 118 | */ |
| 119 | + |
| 120 | +#define SPIN_NODE_ALIGNMENT 128UL |
| 121 | +#define SPIN_TAIL_ALIGNMENT 128UL |
| 122 | + |
119 | 123 | struct optimistic_spin_node { |
120 | 124 | struct optimistic_spin_node *next, *prev; |
121 | 125 | int locked; /* 1 if lock acquired */ |
122 | 126 | int cpu; /* encoded CPU # + 1 value */ |
123 | 127 | int random_sleep; /* random sleep in us */ |
124 | | -} __attribute__ ((aligned (128))); |
| 128 | +} __attribute__ ((aligned (SPIN_NODE_ALIGNMENT))); |
125 | 129 |
|
126 | 130 | struct optimistic_spin_queue { |
127 | 131 | /* |
128 | 132 | * Stores an encoded value of the CPU # of the tail node in the queue. |
129 | 133 | * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL. |
130 | 134 | */ |
131 | 135 | atomic_t tail; |
132 | | -}; |
| 136 | +} __attribute__ ((aligned (SPIN_TAIL_ALIGNMENT))); |
133 | 137 |
|
134 | 138 | /* 0 means thread unlocked, 1~N represents each individual thread on core 1~N */ |
135 | 139 | #define OSQ_UNLOCKED_VAL (0) |
@@ -216,14 +220,24 @@ static void osq_parse_args(test_args unused, int argc, char** argv) { |
216 | 220 | static inline void osq_lock_init(uint64_t *lock, unsigned long cores) |
217 | 221 | { |
218 | 222 | /* |
219 | | - * Allocate optimistic_spin_node from heap during main thread initialization. |
220 | | - * Each cpu core will have its own spinning node, aligned to 128 bytes maximum |
221 | | - * cache line, calloc will set memory to zero automatically, therefore no need |
222 | | - * to bzero the nodepool. |
| 223 | + * Allocate optimistic_spin_node from the heap during main thread |
| 224 | + * initialization. Each cpu core will have its own spinning node, |
| 225 | + * aligned to 128 cache line. |
223 | 226 | */ |
224 | | - global_osq_nodepool_ptr = calloc(cores + 1, sizeof(struct optimistic_spin_node)); |
| 227 | + |
| 228 | + size_t size = (cores + 1) * sizeof(struct optimistic_spin_node); |
| 229 | + |
| 230 | + if (size % SPIN_NODE_ALIGNMENT) { |
| 231 | + printf("size = %zu, is not a multiple of %zu\n", size, SPIN_NODE_ALIGNMENT); |
| 232 | + exit(-1); |
| 233 | + } |
| 234 | + |
| 235 | + global_osq_nodepool_ptr = aligned_alloc(SPIN_NODE_ALIGNMENT, size); |
| 236 | + |
225 | 237 | if (global_osq_nodepool_ptr == NULL) exit(errno); |
226 | 238 |
|
| 239 | + memset(global_osq_nodepool_ptr, 0, size); |
| 240 | + |
227 | 241 | /* |
228 | 242 | * If osq spins more than unqueue_retry times, the spinning cpu may backoff |
229 | 243 | * and sleep for 1 ~ 10 microseconds (on average 5 microseconds). Each spinning |
|
0 commit comments