Skip to content

Commit 12f0ff6

Browse files
committed
atomic aggregate _Py_atomic_source_memcpy_relaxed()
1 parent fff827e commit 12f0ff6

File tree

1 file changed

+122
-27
lines changed

1 file changed

+122
-27
lines changed

Modules/arraymodule.c

Lines changed: 122 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -217,38 +217,136 @@ arraydata_realloc(arraydata *data, Py_ssize_t size, int itemsize)
217217

218218
#ifdef Py_GIL_DISABLED
219219

220+
// This really doesn't belong here, for show at the moment.
220221
static void
221-
atomic_itemcpy(void *dest, const void *src, size_t n, int itemsize)
222+
_Py_atomic_source_memcpy_relaxed(void *dest, void *src, size_t n)
222223
{
223-
if (itemsize == 1) {
224-
for (char *d = (char *) dest, *end = d + n, *s = (char *) src;
225-
d < end; d++, s++) {
226-
*d = _Py_atomic_load_char_relaxed(s);
224+
int diff = (int)((uintptr_t)dest ^ (uintptr_t)src);
225+
226+
// the first half is needed to deal with misalignment
227+
228+
if (diff & 1) { // dest and src not word aligned with each other
229+
for (void *end = (char *)dest + n; dest < end;
230+
dest = (char *)dest + 1, src = (char *)src + 1) {
231+
*((char *)dest) = _Py_atomic_load_char_relaxed((char *)src);
227232
}
233+
234+
return;
228235
}
229-
else if (itemsize == 2) {
230-
for (short *d = (short *) dest, *end = d + n, *s = (short *) src;
231-
d < end; d++, s++) {
232-
*d = _Py_atomic_load_short_relaxed(s);
236+
237+
if ((uintptr_t)dest & 1) { // dest and src not word aligned in memory
238+
if (n) {
239+
*(char *)dest = _Py_atomic_load_char_relaxed((char *)src);
240+
dest = (char *)dest + 1;
241+
src = (char *)src + 1;
242+
n -= 1;
243+
}
244+
245+
if (!n) {
246+
return;
233247
}
234248
}
235-
else if (itemsize == 4) {
236-
for (PY_UINT32_T *d = (PY_UINT32_T *) dest, *end = d + n, *s = (PY_UINT32_T *) src;
237-
d < end; d++, s++) {
238-
*d = (PY_UINT32_T) _Py_atomic_load_uint32_relaxed(s);
249+
250+
if (diff & 2) { // dest and src not dword aligned with each other
251+
size_t n2 = n / 2;
252+
253+
for (void *end = (short *)dest + n2; dest < end;
254+
dest = (short *)dest + 1, src = (short *)src + 1) {
255+
*((short *)dest) = _Py_atomic_load_short_relaxed((short *)src);
256+
}
257+
258+
if (n & 1) {
259+
*((char *)dest) = _Py_atomic_load_char_relaxed((char *)src);
239260
}
261+
262+
return;
240263
}
241-
else if (itemsize == 8) {
242-
for (PY_UINT64_T *d = (PY_UINT64_T *) dest, *end = d + n, *s = (PY_UINT64_T *) src;
243-
d < end; d++, s++) {
244-
*d = (PY_UINT64_T) _Py_atomic_load_uint64_relaxed(s);
264+
265+
if ((uintptr_t)dest & 2) { // dest and src not dword aligned in memory
266+
if (n >= 2) {
267+
*(short *)dest = _Py_atomic_load_short_relaxed((short *)src);
268+
dest = (short *)dest + 1;
269+
src = (short *)src + 1;
270+
n -= 2;
271+
}
272+
273+
if (!n) {
274+
return;
245275
}
246276
}
247-
else {
248-
assert(false);
277+
278+
if (diff & 4) { // dest and src not qword aligned with each other
279+
size_t n4 = n / 4;
280+
281+
for (void *end = (PY_UINT32_T *)dest + n4; dest < end;
282+
dest = (PY_UINT32_T *)dest + 1, src = (PY_UINT32_T *)src + 1) {
283+
*((PY_UINT32_T *)dest) = (PY_UINT32_T)_Py_atomic_load_uint32_relaxed((PY_UINT32_T *)src);
284+
}
285+
286+
if (n & 2) {
287+
*((short *)dest) = _Py_atomic_load_short_relaxed((short *)src);
288+
dest = (short *)dest + 1;
289+
src = (short *)src + 1;
290+
}
291+
292+
if (n & 1) {
293+
*((char *)dest) = _Py_atomic_load_char_relaxed((char *)src);
294+
}
295+
296+
return;
297+
}
298+
299+
if ((uintptr_t)dest & 4) { // dest and src not qword aligned in memory
300+
if (n >= 4) {
301+
*(PY_UINT32_T *)dest = _Py_atomic_load_uint32_relaxed((PY_UINT32_T *)src);
302+
dest = (PY_UINT32_T *)dest + 1;
303+
src = (PY_UINT32_T *)src + 1;
304+
n -= 4;
305+
}
306+
307+
if (!n) {
308+
return;
309+
}
310+
}
311+
312+
// the second half is aligned copy
313+
314+
size_t n8 = n / 8;
315+
316+
if (n8) {
317+
for (void *end = (PY_UINT64_T *)dest + n8; dest < end;
318+
dest = (PY_UINT64_T *)dest + 1, src = (PY_UINT64_T *)src + 1) {
319+
*((PY_UINT64_T *)dest) = (PY_UINT64_T)_Py_atomic_load_uint64_relaxed((PY_UINT64_T *)src);
320+
}
321+
322+
n -= n8 * 8;
323+
}
324+
325+
if (n & 4) {
326+
*((PY_UINT32_T *)dest) = (PY_UINT32_T)_Py_atomic_load_uint32_relaxed((PY_UINT32_T *)src);
327+
dest = (PY_UINT32_T *)dest + 1;
328+
src = (PY_UINT32_T *)src + 1;
329+
}
330+
331+
if (n & 2) {
332+
*((short *)dest) = _Py_atomic_load_short_relaxed((short *)src);
333+
dest = (short *)dest + 1;
334+
src = (short *)src + 1;
335+
}
336+
337+
if (n & 1) {
338+
*((char *)dest) = _Py_atomic_load_char_relaxed((char *)src);
249339
}
250340
}
251341

342+
#define FT_ATOMIC_SOURCE_MEMCPY_RELAXED(dest, src, n) \
343+
_Py_atomic_source_memcpy_relaxed((dest), (src), (n))
344+
345+
#else
346+
347+
#define FT_ATOMIC_SOURCE_MEMCPY_RELAXED(dest, src, n) \
348+
memcpy((dest), (src), (n))
349+
252350
#endif
253351

254352
static int
@@ -327,11 +425,7 @@ array_resize(arrayobject *self, Py_ssize_t newsize)
327425
}
328426
if (data != NULL) {
329427
Py_ssize_t size = Py_SIZE(self);
330-
#ifdef Py_GIL_DISABLED
331-
atomic_itemcpy(newdata->items, data->items, Py_MIN(size, newsize), itemsize);
332-
#else
333-
memcpy(newdata->items, data->items, Py_MIN(size, newsize) * itemsize);
334-
#endif
428+
FT_ATOMIC_SOURCE_MEMCPY_RELAXED(newdata->items, data->items, Py_MIN(size, newsize) * itemsize);
335429
arraydata_free(data, _PyObject_GC_IS_SHARED(self));
336430
}
337431
_Py_atomic_store_ptr_release(&self->data, newdata);
@@ -1243,8 +1337,9 @@ array_slice(arrayobject *a, Py_ssize_t ilow, Py_ssize_t ihigh)
12431337
if (np == NULL)
12441338
return NULL;
12451339
if (ihigh > ilow) {
1246-
memcpy(np->data->items, a->data->items + ilow * a->ob_descr->itemsize,
1247-
(ihigh-ilow) * a->ob_descr->itemsize);
1340+
FT_ATOMIC_SOURCE_MEMCPY_RELAXED(
1341+
np->data->items, a->data->items + ilow * a->ob_descr->itemsize,
1342+
(ihigh-ilow) * a->ob_descr->itemsize);
12481343
}
12491344
return (PyObject *)np;
12501345
}
@@ -2895,7 +2990,7 @@ array_subscr_slice_lock_held(PyObject *op, PyObject *item)
28952990
slicelength, self->ob_descr);
28962991
if (result == NULL)
28972992
return NULL;
2898-
memcpy(((arrayobject *)result)->data->items,
2993+
FT_ATOMIC_SOURCE_MEMCPY_RELAXED(((arrayobject *)result)->data->items,
28992994
self->data->items + start * itemsize,
29002995
slicelength * itemsize);
29012996
return result;

0 commit comments

Comments
 (0)