@@ -217,38 +217,136 @@ arraydata_realloc(arraydata *data, Py_ssize_t size, int itemsize)
217217
218218#ifdef  Py_GIL_DISABLED 
219219
220+ // This really doesn't belong here, for show at the moment. 
220221static  void 
221- atomic_itemcpy (void  * dest , const   void  * src , size_t  n ,  int   itemsize )
222+ _Py_atomic_source_memcpy_relaxed (void  * dest , void  * src , size_t  n )
222223{
223-     if  (itemsize  ==  1 ) {
224-         for  (char  * d  =  (char  * ) dest , * end  =  d  +  n , * s  =  (char  * ) src ;
225-              d  <  end ; d ++ , s ++ ) {
226-             * d  =  _Py_atomic_load_char_relaxed (s );
224+     int  diff  =  (int )((uintptr_t )dest  ^ (uintptr_t )src );
225+ 
226+     // the first half is needed to deal with misalignment 
227+ 
228+     if  (diff  &  1 ) {  // dest and src not word aligned with each other 
229+         for  (void  * end  =  (char  * )dest  +  n ; dest  <  end ;
230+              dest  =  (char  * )dest  +  1 , src  =  (char  * )src  +  1 ) {
231+             * ((char  * )dest ) =  _Py_atomic_load_char_relaxed ((char  * )src );
227232        }
233+ 
234+         return ;
228235    }
229-     else  if  (itemsize  ==  2 ) {
230-         for  (short  * d  =  (short  * ) dest , * end  =  d  +  n , * s  =  (short  * ) src ;
231-              d  <  end ; d ++ , s ++ ) {
232-             * d  =  _Py_atomic_load_short_relaxed (s );
236+ 
237+     if  ((uintptr_t )dest  &  1 ) {  // dest and src not word aligned in memory 
238+         if  (n ) {
239+             * (char  * )dest  =  _Py_atomic_load_char_relaxed ((char  * )src );
240+             dest  =  (char  * )dest  +  1 ;
241+             src  =  (char  * )src  +  1 ;
242+             n  -=  1 ;
243+         }
244+ 
245+         if  (!n ) {
246+             return ;
233247        }
234248    }
235-     else  if  (itemsize  ==  4 ) {
236-         for  (PY_UINT32_T  * d  =  (PY_UINT32_T  * ) dest , * end  =  d  +  n , * s  =  (PY_UINT32_T  * ) src ;
237-              d  <  end ; d ++ , s ++ ) {
238-             * d  =  (PY_UINT32_T ) _Py_atomic_load_uint32_relaxed (s );
249+ 
250+     if  (diff  &  2 ) {  // dest and src not dword aligned with each other 
251+         size_t  n2  =  n  / 2 ;
252+ 
253+         for  (void  * end  =  (short  * )dest  +  n2 ; dest  <  end ;
254+              dest  =  (short  * )dest  +  1 , src  =  (short  * )src  +  1 ) {
255+             * ((short  * )dest ) =  _Py_atomic_load_short_relaxed ((short  * )src );
256+         }
257+ 
258+         if  (n  &  1 ) {
259+             * ((char  * )dest ) =  _Py_atomic_load_char_relaxed ((char  * )src );
239260        }
261+ 
262+         return ;
240263    }
241-     else  if  (itemsize  ==  8 ) {
242-         for  (PY_UINT64_T  * d  =  (PY_UINT64_T  * ) dest , * end  =  d  +  n , * s  =  (PY_UINT64_T  * ) src ;
243-              d  <  end ; d ++ , s ++ ) {
244-             * d  =  (PY_UINT64_T ) _Py_atomic_load_uint64_relaxed (s );
264+ 
265+     if  ((uintptr_t )dest  &  2 ) {  // dest and src not dword aligned in memory 
266+         if  (n  >= 2 ) {
267+             * (short  * )dest  =  _Py_atomic_load_short_relaxed ((short  * )src );
268+             dest  =  (short  * )dest  +  1 ;
269+             src  =  (short  * )src  +  1 ;
270+             n  -=  2 ;
271+         }
272+ 
273+         if  (!n ) {
274+             return ;
245275        }
246276    }
247-     else  {
248-         assert (false);
277+ 
278+     if  (diff  &  4 ) {  // dest and src not qword aligned with each other 
279+         size_t  n4  =  n  / 4 ;
280+ 
281+         for  (void  * end  =  (PY_UINT32_T  * )dest  +  n4 ; dest  <  end ;
282+              dest  =  (PY_UINT32_T  * )dest  +  1 , src  =  (PY_UINT32_T  * )src  +  1 ) {
283+             * ((PY_UINT32_T  * )dest ) =  (PY_UINT32_T )_Py_atomic_load_uint32_relaxed ((PY_UINT32_T  * )src );
284+         }
285+ 
286+         if  (n  &  2 ) {
287+             * ((short  * )dest ) =  _Py_atomic_load_short_relaxed ((short  * )src );
288+             dest  =  (short  * )dest  +  1 ;
289+             src  =  (short  * )src  +  1 ;
290+         }
291+ 
292+         if  (n  &  1 ) {
293+             * ((char  * )dest ) =  _Py_atomic_load_char_relaxed ((char  * )src );
294+         }
295+ 
296+         return ;
297+     }
298+ 
299+     if  ((uintptr_t )dest  &  4 ) {  // dest and src not qword aligned in memory 
300+         if  (n  >= 4 ) {
301+             * (PY_UINT32_T  * )dest  =  _Py_atomic_load_uint32_relaxed ((PY_UINT32_T  * )src );
302+             dest  =  (PY_UINT32_T  * )dest  +  1 ;
303+             src  =  (PY_UINT32_T  * )src  +  1 ;
304+             n  -=  4 ;
305+         }
306+ 
307+         if  (!n ) {
308+             return ;
309+         }
310+     }
311+ 
312+     // the second half is aligned copy 
313+ 
314+     size_t  n8  =  n  / 8 ;
315+ 
316+     if  (n8 ) {
317+         for  (void  * end  =  (PY_UINT64_T  * )dest  +  n8 ; dest  <  end ;
318+              dest  =  (PY_UINT64_T  * )dest  +  1 , src  =  (PY_UINT64_T  * )src  +  1 ) {
319+             * ((PY_UINT64_T  * )dest ) =  (PY_UINT64_T )_Py_atomic_load_uint64_relaxed ((PY_UINT64_T  * )src );
320+         }
321+ 
322+         n  -=  n8  *  8 ;
323+     }
324+ 
325+     if  (n  &  4 ) {
326+         * ((PY_UINT32_T  * )dest ) =  (PY_UINT32_T )_Py_atomic_load_uint32_relaxed ((PY_UINT32_T  * )src );
327+         dest  =  (PY_UINT32_T  * )dest  +  1 ;
328+         src  =  (PY_UINT32_T  * )src  +  1 ;
329+     }
330+ 
331+     if  (n  &  2 ) {
332+         * ((short  * )dest ) =  _Py_atomic_load_short_relaxed ((short  * )src );
333+         dest  =  (short  * )dest  +  1 ;
334+         src  =  (short  * )src  +  1 ;
335+     }
336+ 
337+     if  (n  &  1 ) {
338+         * ((char  * )dest ) =  _Py_atomic_load_char_relaxed ((char  * )src );
249339    }
250340}
251341
342+ #define  FT_ATOMIC_SOURCE_MEMCPY_RELAXED (dest , src , n ) \
343+     _Py_atomic_source_memcpy_relaxed((dest), (src), (n))
344+ 
345+ #else 
346+ 
347+ #define  FT_ATOMIC_SOURCE_MEMCPY_RELAXED (dest , src , n ) \
348+     memcpy((dest), (src), (n))
349+ 
252350#endif 
253351
254352static  int 
@@ -327,11 +425,7 @@ array_resize(arrayobject *self, Py_ssize_t newsize)
327425    }
328426    if  (data  !=  NULL ) {
329427        Py_ssize_t  size  =  Py_SIZE (self );
330- #ifdef  Py_GIL_DISABLED 
331-         atomic_itemcpy (newdata -> items , data -> items , Py_MIN (size , newsize ), itemsize );
332- #else 
333-         memcpy (newdata -> items , data -> items , Py_MIN (size , newsize ) *  itemsize );
334- #endif 
428+         FT_ATOMIC_SOURCE_MEMCPY_RELAXED (newdata -> items , data -> items , Py_MIN (size , newsize ) *  itemsize );
335429        arraydata_free (data , _PyObject_GC_IS_SHARED (self ));
336430    }
337431    _Py_atomic_store_ptr_release (& self -> data , newdata );
@@ -1243,8 +1337,9 @@ array_slice(arrayobject *a, Py_ssize_t ilow, Py_ssize_t ihigh)
12431337    if  (np  ==  NULL )
12441338        return  NULL ;
12451339    if  (ihigh  >  ilow ) {
1246-         memcpy (np -> data -> items , a -> data -> items  +  ilow  *  a -> ob_descr -> itemsize ,
1247-                (ihigh - ilow ) *  a -> ob_descr -> itemsize );
1340+         FT_ATOMIC_SOURCE_MEMCPY_RELAXED (
1341+             np -> data -> items , a -> data -> items  +  ilow  *  a -> ob_descr -> itemsize ,
1342+             (ihigh - ilow ) *  a -> ob_descr -> itemsize );
12481343    }
12491344    return  (PyObject  * )np ;
12501345}
@@ -2895,7 +2990,7 @@ array_subscr_slice_lock_held(PyObject *op, PyObject *item)
28952990                                    slicelength , self -> ob_descr );
28962991            if  (result  ==  NULL )
28972992                return  NULL ;
2898-             memcpy (((arrayobject  * )result )-> data -> items ,
2993+             FT_ATOMIC_SOURCE_MEMCPY_RELAXED (((arrayobject  * )result )-> data -> items ,
28992994                   self -> data -> items  +  start  *  itemsize ,
29002995                   slicelength  *  itemsize );
29012996            return  result ;
0 commit comments