@@ -217,38 +217,136 @@ arraydata_realloc(arraydata *data, Py_ssize_t size, int itemsize)
217217
218218#ifdef Py_GIL_DISABLED
219219
220+ // This really doesn't belong here, for show at the moment.
220221static void
221- atomic_itemcpy (void * dest , const void * src , size_t n , int itemsize )
222+ _Py_atomic_source_memcpy_relaxed (void * dest , void * src , size_t n )
222223{
223- if (itemsize == 1 ) {
224- for (char * d = (char * ) dest , * end = d + n , * s = (char * ) src ;
225- d < end ; d ++ , s ++ ) {
226- * d = _Py_atomic_load_char_relaxed (s );
224+ int diff = (int )((uintptr_t )dest ^ (uintptr_t )src );
225+
226+ // the first half is needed to deal with misalignment
227+
228+ if (diff & 1 ) { // dest and src not word aligned with each other
229+ for (void * end = (char * )dest + n ; dest < end ;
230+ dest = (char * )dest + 1 , src = (char * )src + 1 ) {
231+ * ((char * )dest ) = _Py_atomic_load_char_relaxed ((char * )src );
227232 }
233+
234+ return ;
228235 }
229- else if (itemsize == 2 ) {
230- for (short * d = (short * ) dest , * end = d + n , * s = (short * ) src ;
231- d < end ; d ++ , s ++ ) {
232- * d = _Py_atomic_load_short_relaxed (s );
236+
237+ if ((uintptr_t )dest & 1 ) { // dest and src not word aligned in memory
238+ if (n ) {
239+ * (char * )dest = _Py_atomic_load_char_relaxed ((char * )src );
240+ dest = (char * )dest + 1 ;
241+ src = (char * )src + 1 ;
242+ n -= 1 ;
243+ }
244+
245+ if (!n ) {
246+ return ;
233247 }
234248 }
235- else if (itemsize == 4 ) {
236- for (PY_UINT32_T * d = (PY_UINT32_T * ) dest , * end = d + n , * s = (PY_UINT32_T * ) src ;
237- d < end ; d ++ , s ++ ) {
238- * d = (PY_UINT32_T ) _Py_atomic_load_uint32_relaxed (s );
249+
250+ if (diff & 2 ) { // dest and src not dword aligned with each other
251+ size_t n2 = n / 2 ;
252+
253+ for (void * end = (short * )dest + n2 ; dest < end ;
254+ dest = (short * )dest + 1 , src = (short * )src + 1 ) {
255+ * ((short * )dest ) = _Py_atomic_load_short_relaxed ((short * )src );
256+ }
257+
258+ if (n & 1 ) {
259+ * ((char * )dest ) = _Py_atomic_load_char_relaxed ((char * )src );
239260 }
261+
262+ return ;
240263 }
241- else if (itemsize == 8 ) {
242- for (PY_UINT64_T * d = (PY_UINT64_T * ) dest , * end = d + n , * s = (PY_UINT64_T * ) src ;
243- d < end ; d ++ , s ++ ) {
244- * d = (PY_UINT64_T ) _Py_atomic_load_uint64_relaxed (s );
264+
265+ if ((uintptr_t )dest & 2 ) { // dest and src not dword aligned in memory
266+ if (n >= 2 ) {
267+ * (short * )dest = _Py_atomic_load_short_relaxed ((short * )src );
268+ dest = (short * )dest + 1 ;
269+ src = (short * )src + 1 ;
270+ n -= 2 ;
271+ }
272+
273+ if (!n ) {
274+ return ;
245275 }
246276 }
247- else {
248- assert (false);
277+
278+ if (diff & 4 ) { // dest and src not qword aligned with each other
279+ size_t n4 = n / 4 ;
280+
281+ for (void * end = (PY_UINT32_T * )dest + n4 ; dest < end ;
282+ dest = (PY_UINT32_T * )dest + 1 , src = (PY_UINT32_T * )src + 1 ) {
283+ * ((PY_UINT32_T * )dest ) = (PY_UINT32_T )_Py_atomic_load_uint32_relaxed ((PY_UINT32_T * )src );
284+ }
285+
286+ if (n & 2 ) {
287+ * ((short * )dest ) = _Py_atomic_load_short_relaxed ((short * )src );
288+ dest = (short * )dest + 1 ;
289+ src = (short * )src + 1 ;
290+ }
291+
292+ if (n & 1 ) {
293+ * ((char * )dest ) = _Py_atomic_load_char_relaxed ((char * )src );
294+ }
295+
296+ return ;
297+ }
298+
299+ if ((uintptr_t )dest & 4 ) { // dest and src not qword aligned in memory
300+ if (n >= 4 ) {
301+ * (PY_UINT32_T * )dest = _Py_atomic_load_uint32_relaxed ((PY_UINT32_T * )src );
302+ dest = (PY_UINT32_T * )dest + 1 ;
303+ src = (PY_UINT32_T * )src + 1 ;
304+ n -= 4 ;
305+ }
306+
307+ if (!n ) {
308+ return ;
309+ }
310+ }
311+
312+ // the second half is aligned copy
313+
314+ size_t n8 = n / 8 ;
315+
316+ if (n8 ) {
317+ for (void * end = (PY_UINT64_T * )dest + n8 ; dest < end ;
318+ dest = (PY_UINT64_T * )dest + 1 , src = (PY_UINT64_T * )src + 1 ) {
319+ * ((PY_UINT64_T * )dest ) = (PY_UINT64_T )_Py_atomic_load_uint64_relaxed ((PY_UINT64_T * )src );
320+ }
321+
322+ n -= n8 * 8 ;
323+ }
324+
325+ if (n & 4 ) {
326+ * ((PY_UINT32_T * )dest ) = (PY_UINT32_T )_Py_atomic_load_uint32_relaxed ((PY_UINT32_T * )src );
327+ dest = (PY_UINT32_T * )dest + 1 ;
328+ src = (PY_UINT32_T * )src + 1 ;
329+ }
330+
331+ if (n & 2 ) {
332+ * ((short * )dest ) = _Py_atomic_load_short_relaxed ((short * )src );
333+ dest = (short * )dest + 1 ;
334+ src = (short * )src + 1 ;
335+ }
336+
337+ if (n & 1 ) {
338+ * ((char * )dest ) = _Py_atomic_load_char_relaxed ((char * )src );
249339 }
250340}
251341
342+ #define FT_ATOMIC_SOURCE_MEMCPY_RELAXED (dest , src , n ) \
343+ _Py_atomic_source_memcpy_relaxed((dest), (src), (n))
344+
345+ #else
346+
347+ #define FT_ATOMIC_SOURCE_MEMCPY_RELAXED (dest , src , n ) \
348+ memcpy((dest), (src), (n))
349+
252350#endif
253351
254352static int
@@ -327,11 +425,7 @@ array_resize(arrayobject *self, Py_ssize_t newsize)
327425 }
328426 if (data != NULL ) {
329427 Py_ssize_t size = Py_SIZE (self );
330- #ifdef Py_GIL_DISABLED
331- atomic_itemcpy (newdata -> items , data -> items , Py_MIN (size , newsize ), itemsize );
332- #else
333- memcpy (newdata -> items , data -> items , Py_MIN (size , newsize ) * itemsize );
334- #endif
428+ FT_ATOMIC_SOURCE_MEMCPY_RELAXED (newdata -> items , data -> items , Py_MIN (size , newsize ) * itemsize );
335429 arraydata_free (data , _PyObject_GC_IS_SHARED (self ));
336430 }
337431 _Py_atomic_store_ptr_release (& self -> data , newdata );
@@ -1243,8 +1337,9 @@ array_slice(arrayobject *a, Py_ssize_t ilow, Py_ssize_t ihigh)
12431337 if (np == NULL )
12441338 return NULL ;
12451339 if (ihigh > ilow ) {
1246- memcpy (np -> data -> items , a -> data -> items + ilow * a -> ob_descr -> itemsize ,
1247- (ihigh - ilow ) * a -> ob_descr -> itemsize );
1340+ FT_ATOMIC_SOURCE_MEMCPY_RELAXED (
1341+ np -> data -> items , a -> data -> items + ilow * a -> ob_descr -> itemsize ,
1342+ (ihigh - ilow ) * a -> ob_descr -> itemsize );
12481343 }
12491344 return (PyObject * )np ;
12501345}
@@ -2895,7 +2990,7 @@ array_subscr_slice_lock_held(PyObject *op, PyObject *item)
28952990 slicelength , self -> ob_descr );
28962991 if (result == NULL )
28972992 return NULL ;
2898- memcpy (((arrayobject * )result )-> data -> items ,
2993+ FT_ATOMIC_SOURCE_MEMCPY_RELAXED (((arrayobject * )result )-> data -> items ,
28992994 self -> data -> items + start * itemsize ,
29002995 slicelength * itemsize );
29012996 return result ;
0 commit comments