@@ -15,68 +15,107 @@ terms of the MIT license. A copy of the license can be found in the file
1515// Aligned Allocation
1616// ------------------------------------------------------
1717
18- // Fallback primitive aligned allocation -- split out for better codegen
19- static mi_decl_noinline void * mi_heap_malloc_zero_aligned_at_fallback (mi_heap_t * const heap , const size_t size , const size_t alignment , const size_t offset , const bool zero ) mi_attr_noexcept
20- {
21- mi_assert_internal (size <= PTRDIFF_MAX );
22- mi_assert_internal (alignment != 0 && _mi_is_power_of_two (alignment ));
18+ static bool mi_malloc_is_naturally_aligned ( size_t size , size_t alignment ) {
19+ // objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`).
20+ mi_assert_internal (_mi_is_power_of_two (alignment ) && (alignment > 0 ));
21+ if (alignment > size ) return false;
22+ if (alignment <= MI_MAX_ALIGN_SIZE ) return true;
23+ const size_t bsize = mi_good_size (size );
24+ return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment - 1 )) == 0 );
25+ }
2326
24- const uintptr_t align_mask = alignment - 1 ; // for any x, `(x & align_mask) == (x % alignment)`
25- const size_t padsize = size + MI_PADDING_SIZE ;
27+ #if MI_GUARDED
28+ static mi_decl_restrict void * mi_heap_malloc_guarded_aligned (mi_heap_t * heap , size_t size , size_t alignment , bool zero ) mi_attr_noexcept {
29+ // use over allocation for guarded blocksl
30+ mi_assert_internal (alignment > 0 && alignment < MI_BLOCK_ALIGNMENT_MAX );
31+ const size_t oversize = size + alignment - 1 ;
32+ void * base = _mi_heap_malloc_guarded (heap , oversize , zero );
33+ void * p = mi_align_up_ptr (base , alignment );
34+ mi_track_align (base , p , (uint8_t * )p - (uint8_t * )base , size );
35+ mi_assert_internal (mi_usable_size (p ) >= size );
36+ mi_assert_internal (_mi_is_aligned (p , alignment ));
37+ return p ;
38+ }
2639
27- // use regular allocation if it is guaranteed to fit the alignment constraints
28- if (offset == 0 && alignment <=padsize && padsize <=MI_MAX_ALIGN_GUARANTEE && (padsize & align_mask )== 0 ) {
29- void * p = _mi_heap_malloc_zero (heap , size , zero );
30- mi_assert_internal (p == NULL || ((uintptr_t )p % alignment ) == 0 );
31- return p ;
32- }
40+ static void * mi_heap_malloc_zero_no_guarded (mi_heap_t * heap , size_t size , bool zero ) {
41+ const size_t rate = heap -> guarded_sample_rate ;
42+ // only write if `rate!=0` so we don't write to the constant `_mi_heap_empty`
43+ if (rate != 0 ) { heap -> guarded_sample_rate = 0 ; }
44+ void * p = _mi_heap_malloc_zero (heap , size , zero );
45+ if (rate != 0 ) { heap -> guarded_sample_rate = rate ; }
46+ return p ;
47+ }
48+ #else
49+ static void * mi_heap_malloc_zero_no_guarded (mi_heap_t * heap , size_t size , bool zero ) {
50+ return _mi_heap_malloc_zero (heap , size , zero );
51+ }
52+ #endif
53+
54+ // Fallback aligned allocation that over-allocates -- split out for better codegen
55+ static mi_decl_noinline void * mi_heap_malloc_zero_aligned_at_overalloc (mi_heap_t * const heap , const size_t size , const size_t alignment , const size_t offset , const bool zero ) mi_attr_noexcept
56+ {
57+ mi_assert_internal (size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE ));
58+ mi_assert_internal (alignment != 0 && _mi_is_power_of_two (alignment ));
3359
3460 void * p ;
3561 size_t oversize ;
36- if mi_unlikely (alignment > MI_ALIGNMENT_MAX ) {
62+ if mi_unlikely (alignment > MI_BLOCK_ALIGNMENT_MAX ) {
3763 // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
3864 // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
3965 // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
4066 if mi_unlikely (offset != 0 ) {
4167 // todo: cannot support offset alignment for very large alignments yet
42- #if MI_DEBUG > 0
68+ #if MI_DEBUG > 0
4369 _mi_error_message (EOVERFLOW , "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n" , size , alignment , offset );
44- #endif
70+ #endif
4571 return NULL ;
4672 }
4773 oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size );
74+ // note: no guarded as alignment > 0
4875 p = _mi_heap_malloc_zero_ex (heap , oversize , false, alignment ); // the page block size should be large enough to align in the single huge page block
4976 // zero afterwards as only the area from the aligned_p may be committed!
5077 if (p == NULL ) return NULL ;
5178 }
5279 else {
5380 // otherwise over-allocate
54- oversize = size + alignment - 1 ;
55- p = _mi_heap_malloc_zero (heap , oversize , zero );
81+ oversize = ( size < MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : size ) + alignment - 1 ; // adjust for size <= 16; with size 0 and aligment 64k, we would allocate a 64k block and pointing just beyond that.
82+ p = mi_heap_malloc_zero_no_guarded (heap , oversize , zero );
5683 if (p == NULL ) return NULL ;
5784 }
85+ mi_page_t * page = _mi_ptr_page (p );
5886
5987 // .. and align within the allocation
88+ const uintptr_t align_mask = alignment - 1 ; // for any x, `(x & align_mask) == (x % alignment)`
6089 const uintptr_t poffset = ((uintptr_t )p + offset ) & align_mask ;
6190 const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset );
6291 mi_assert_internal (adjust < alignment );
6392 void * aligned_p = (void * )((uintptr_t )p + adjust );
6493 if (aligned_p != p ) {
65- mi_page_t * page = _mi_ptr_page (p );
6694 mi_page_set_has_aligned (page , true);
95+ #if MI_GUARDED
96+ // set tag to aligned so mi_usable_size works with guard pages
97+ if (adjust >= sizeof (mi_block_t )) {
98+ mi_block_t * const block = (mi_block_t * )p ;
99+ block -> next = MI_BLOCK_TAG_ALIGNED ;
100+ }
101+ #endif
67102 _mi_padding_shrink (page , (mi_block_t * )p , adjust + size );
68103 }
69104 // todo: expand padding if overallocated ?
70105
71- mi_assert_internal (mi_page_usable_block_size (_mi_ptr_page (p )) >= adjust + size );
72- mi_assert_internal (p == _mi_page_ptr_unalign (_mi_ptr_segment (aligned_p ), _mi_ptr_page (aligned_p ), aligned_p ));
106+ mi_assert_internal (mi_page_usable_block_size (page ) >= adjust + size );
73107 mi_assert_internal (((uintptr_t )aligned_p + offset ) % alignment == 0 );
74108 mi_assert_internal (mi_usable_size (aligned_p )>=size );
75109 mi_assert_internal (mi_usable_size (p ) == mi_usable_size (aligned_p )+ adjust );
110+ #if MI_DEBUG > 1
111+ mi_page_t * const apage = _mi_ptr_page (aligned_p );
112+ void * unalign_p = _mi_page_ptr_unalign (apage , aligned_p );
113+ mi_assert_internal (p == unalign_p );
114+ #endif
76115
77116 // now zero the block if needed
78- if (alignment > MI_ALIGNMENT_MAX ) {
79- // for the tracker, on huge aligned allocations only from the start of the large block is defined
117+ if (alignment > MI_BLOCK_ALIGNMENT_MAX ) {
118+ // for the tracker, on huge aligned allocations only the memory from the start of the large block is defined
80119 mi_track_mem_undefined (aligned_p , size );
81120 if (zero ) {
82121 _mi_memzero_aligned (aligned_p , mi_usable_size (aligned_p ));
@@ -85,10 +124,47 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
85124
86125 if (p != aligned_p ) {
87126 mi_track_align (p ,aligned_p ,adjust ,mi_usable_size (aligned_p ));
127+ #if MI_GUARDED
128+ mi_track_mem_defined (p , sizeof (mi_block_t ));
129+ #endif
88130 }
89131 return aligned_p ;
90132}
91133
134+ // Generic primitive aligned allocation -- split out for better codegen
135+ static mi_decl_noinline void * mi_heap_malloc_zero_aligned_at_generic (mi_heap_t * const heap , const size_t size , const size_t alignment , const size_t offset , const bool zero ) mi_attr_noexcept
136+ {
137+ mi_assert_internal (alignment != 0 && _mi_is_power_of_two (alignment ));
138+ // we don't allocate more than MI_MAX_ALLOC_SIZE (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
139+ if mi_unlikely (size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE )) {
140+ #if MI_DEBUG > 0
141+ _mi_error_message (EOVERFLOW , "aligned allocation request is too large (size %zu, alignment %zu)\n" , size , alignment );
142+ #endif
143+ return NULL ;
144+ }
145+
146+ // use regular allocation if it is guaranteed to fit the alignment constraints.
147+ // this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist
148+ // a page with the right block size, and if we always use the over-alloc fallback that would never happen.
149+ if (offset == 0 && mi_malloc_is_naturally_aligned (size ,alignment )) {
150+ void * p = mi_heap_malloc_zero_no_guarded (heap , size , zero );
151+ mi_assert_internal (p == NULL || ((uintptr_t )p % alignment ) == 0 );
152+ const bool is_aligned_or_null = (((uintptr_t )p ) & (alignment - 1 ))== 0 ;
153+ if mi_likely (is_aligned_or_null ) {
154+ return p ;
155+ }
156+ else {
157+ // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct..
158+ mi_assert (false);
159+ mi_free (p );
160+ }
161+ }
162+
163+ // fall back to over-allocation
164+ return mi_heap_malloc_zero_aligned_at_overalloc (heap ,size ,alignment ,offset ,zero );
165+ }
166+
167+
92168// Primitive aligned allocation
93169static void * mi_heap_malloc_zero_aligned_at (mi_heap_t * const heap , const size_t size , const size_t alignment , const size_t offset , const bool zero ) mi_attr_noexcept
94170{
@@ -100,33 +176,32 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
100176 return NULL ;
101177 }
102178
103- if mi_unlikely (size > PTRDIFF_MAX ) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
104- #if MI_DEBUG > 0
105- _mi_error_message (EOVERFLOW , "aligned allocation request is too large (size %zu, alignment %zu)\n" , size , alignment );
106- #endif
107- return NULL ;
179+ #if MI_GUARDED
180+ if (offset == 0 && alignment < MI_BLOCK_ALIGNMENT_MAX && mi_heap_malloc_use_guarded (heap ,size )) {
181+ return mi_heap_malloc_guarded_aligned (heap , size , alignment , zero );
108182 }
109- const uintptr_t align_mask = alignment - 1 ; // for any x, `(x & align_mask) == (x % alignment)`
110- const size_t padsize = size + MI_PADDING_SIZE ; // note: cannot overflow due to earlier size > PTRDIFF_MAX check
183+ #endif
111184
112185 // try first if there happens to be a small block available with just the right alignment
113- if mi_likely (padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize ) {
186+ if mi_likely (size <= MI_SMALL_SIZE_MAX && alignment <= size ) {
187+ const uintptr_t align_mask = alignment - 1 ; // for any x, `(x & align_mask) == (x % alignment)`
188+ const size_t padsize = size + MI_PADDING_SIZE ;
114189 mi_page_t * page = _mi_heap_get_free_small_page (heap , padsize );
115- const bool is_aligned = (((uintptr_t )page -> free + offset ) & align_mask )== 0 ;
116- if mi_likely (page -> free != NULL && is_aligned )
117- {
118- #if MI_STAT > 1
119- mi_heap_stat_increase (heap , malloc , size );
120- #endif
121- void * p = _mi_page_malloc (heap , page , padsize , zero ); // TODO: inline _mi_page_malloc
122- mi_assert_internal (p != NULL );
123- mi_assert_internal (((uintptr_t )p + offset ) % alignment == 0 );
124- mi_track_malloc (p ,size ,zero );
125- return p ;
190+ if mi_likely (page -> free != NULL ) {
191+ const bool is_aligned = (((uintptr_t )page -> free + offset ) & align_mask )== 0 ;
192+ if mi_likely (is_aligned )
193+ {
194+ void * p = (zero ? _mi_page_malloc_zeroed (heap ,page ,padsize ) : _mi_page_malloc (heap ,page ,padsize )); // call specific page malloc for better codegen
195+ mi_assert_internal (p != NULL );
196+ mi_assert_internal (((uintptr_t )p + offset ) % alignment == 0 );
197+ mi_track_malloc (p ,size ,zero );
198+ return p ;
199+ }
126200 }
127201 }
128- // fallback
129- return mi_heap_malloc_zero_aligned_at_fallback (heap , size , alignment , offset , zero );
202+
203+ // fallback to generic aligned allocation
204+ return mi_heap_malloc_zero_aligned_at_generic (heap , size , alignment , offset , zero );
130205}
131206
132207
@@ -139,27 +214,12 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he
139214}
140215
141216mi_decl_nodiscard mi_decl_restrict void * mi_heap_malloc_aligned (mi_heap_t * heap , size_t size , size_t alignment ) mi_attr_noexcept {
142- if mi_unlikely (alignment == 0 || !_mi_is_power_of_two (alignment )) return NULL ;
143- #if !MI_PADDING
144- // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
145- if mi_likely (_mi_is_power_of_two (size ) && size >= alignment && size <= MI_SMALL_SIZE_MAX )
146- #else
147- // with padding, we can only guarantee this for fixed alignments
148- if mi_likely ((alignment == sizeof (void * ) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE /2 )))
149- && size <= MI_SMALL_SIZE_MAX )
150- #endif
151- {
152- // fast path for common alignment and size
153- return mi_heap_malloc_small (heap , size );
154- }
155- else {
156- return mi_heap_malloc_aligned_at (heap , size , alignment , 0 );
157- }
217+ return mi_heap_malloc_aligned_at (heap , size , alignment , 0 );
158218}
159219
160220// ensure a definition is emitted
161221#if defined(__cplusplus )
162- static void * _mi_heap_malloc_aligned = (void * )& mi_heap_malloc_aligned ;
222+ void * _mi_extern_heap_malloc_aligned = (void * )& mi_heap_malloc_aligned ;
163223#endif
164224
165225// ------------------------------------------------------
@@ -227,9 +287,9 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne
227287 void * newp = mi_heap_malloc_aligned_at (heap ,newsize ,alignment ,offset );
228288 if (newp != NULL ) {
229289 if (zero && newsize > size ) {
230- // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
231- size_t start = (size >= sizeof (intptr_t ) ? size - sizeof (intptr_t ) : 0 );
232- _mi_memzero ((uint8_t * )newp + start , newsize - start );
290+ // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
291+ size_t start = (size >= sizeof (intptr_t ) ? size - sizeof (intptr_t ) : 0 );
292+ _mi_memzero ((uint8_t * )newp + start , newsize - start );
233293 }
234294 _mi_memcpy_aligned (newp , p , (newsize > size ? size : newsize ));
235295 mi_free (p ); // only free if successful
@@ -296,3 +356,5 @@ mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t
296356mi_decl_nodiscard void * mi_recalloc_aligned (void * p , size_t newcount , size_t size , size_t alignment ) mi_attr_noexcept {
297357 return mi_heap_recalloc_aligned (mi_prim_get_default_heap (), p , newcount , size , alignment );
298358}
359+
360+
0 commit comments