Skip to content

Commit 9d6c59c

Browse files
committed
Merge branch 'for-5.17/struct-slab' into for-linus
Series "Separate struct slab from struct page" v4 This is originally an offshoot of the folio work by Matthew. One of the more complex parts of the struct page definition are the parts used by the slab allocators. It would be good for the MM in general if struct slab were its own data type, and it also helps to prevent tail pages from slipping in anywhere. As Matthew requested in his proof of concept series, I have taken over the development of this series, so it's a mix of patches from him (often modified by me) and my own. One big difference is the use of coccinelle to perform the relatively trivial parts of the conversions automatically and at once, instead of a larger number of smaller incremental reviewable steps. Thanks to Julia Lawall and Luis Chamberlain for all their help! Another notable difference is (based also on review feedback) I don't represent with a struct slab the large kmalloc allocations which are not really a slab, but use page allocator directly. When going from an object address to a struct slab, the code tests first folio slab flag, and only if it's set it converts to struct slab. This makes the struct slab type stronger. Finally, although Matthew's version didn't use any of the folio work, the initial support has been merged meanwhile so my version builds on top of it where appropriate. This eliminates some of the redundant compound_head() being performed e.g. when testing the slab flag. To sum up, after this series, struct page fields used by slab allocators are moved from struct page to a new struct slab, that uses the same physical storage. The availability of the fields is further distinguished by the selected slab allocator implementation. The advantages include: - Similar to folios, if the slab is of order > 0, struct slab always is guaranteed to be the head page. Additionally it's guaranteed to be an actual slab page, not a large kmalloc. This removes uncertainty and potential for bugs. - It's not possible to accidentally use fields of the slab implementation that's not configured. - Other subsystems cannot use slab's fields in struct page anymore (some existing non-slab usages had to be adjusted in this series), so slab implementations have more freedom in rearranging them in the struct slab. Link: https://lore.kernel.org/all/[email protected]/
2 parents eb52c0f + b01af5c commit 9d6c59c

27 files changed

+1264
-1062
lines changed

arch/x86/mm/init_64.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,7 @@ static void __meminit free_pagetable(struct page *page, int order)
981981
if (PageReserved(page)) {
982982
__ClearPageReserved(page);
983983

984-
magic = (unsigned long)page->freelist;
984+
magic = page->index;
985985
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
986986
while (nr_pages--)
987987
put_page_bootmem(page++);

include/linux/bootmem_info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void put_page_bootmem(struct page *page);
3030
*/
3131
static inline void free_bootmem_page(struct page *page)
3232
{
33-
unsigned long magic = (unsigned long)page->freelist;
33+
unsigned long magic = page->index;
3434

3535
/*
3636
* The reserve_bootmem_region sets the reserved flag on bootmem

include/linux/kasan.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
struct kmem_cache;
1111
struct page;
12+
struct slab;
1213
struct vm_struct;
1314
struct task_struct;
1415

@@ -193,11 +194,11 @@ static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
193194
return 0;
194195
}
195196

196-
void __kasan_poison_slab(struct page *page);
197-
static __always_inline void kasan_poison_slab(struct page *page)
197+
void __kasan_poison_slab(struct slab *slab);
198+
static __always_inline void kasan_poison_slab(struct slab *slab)
198199
{
199200
if (kasan_enabled())
200-
__kasan_poison_slab(page);
201+
__kasan_poison_slab(slab);
201202
}
202203

203204
void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object);
@@ -322,7 +323,7 @@ static inline void kasan_cache_create(struct kmem_cache *cache,
322323
slab_flags_t *flags) {}
323324
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
324325
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
325-
static inline void kasan_poison_slab(struct page *page) {}
326+
static inline void kasan_poison_slab(struct slab *slab) {}
326327
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
327328
void *object) {}
328329
static inline void kasan_poison_object_data(struct kmem_cache *cache,

include/linux/memcontrol.h

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -536,61 +536,13 @@ static inline bool folio_memcg_kmem(struct folio *folio)
536536
return folio->memcg_data & MEMCG_DATA_KMEM;
537537
}
538538

539-
/*
540-
* page_objcgs - get the object cgroups vector associated with a page
541-
* @page: a pointer to the page struct
542-
*
543-
* Returns a pointer to the object cgroups vector associated with the page,
544-
* or NULL. This function assumes that the page is known to have an
545-
* associated object cgroups vector. It's not safe to call this function
546-
* against pages, which might have an associated memory cgroup: e.g.
547-
* kernel stack pages.
548-
*/
549-
static inline struct obj_cgroup **page_objcgs(struct page *page)
550-
{
551-
unsigned long memcg_data = READ_ONCE(page->memcg_data);
552-
553-
VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page);
554-
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
555-
556-
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
557-
}
558-
559-
/*
560-
* page_objcgs_check - get the object cgroups vector associated with a page
561-
* @page: a pointer to the page struct
562-
*
563-
* Returns a pointer to the object cgroups vector associated with the page,
564-
* or NULL. This function is safe to use if the page can be directly associated
565-
* with a memory cgroup.
566-
*/
567-
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
568-
{
569-
unsigned long memcg_data = READ_ONCE(page->memcg_data);
570-
571-
if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS))
572-
return NULL;
573-
574-
VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
575-
576-
return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
577-
}
578539

579540
#else
580541
static inline bool folio_memcg_kmem(struct folio *folio)
581542
{
582543
return false;
583544
}
584545

585-
static inline struct obj_cgroup **page_objcgs(struct page *page)
586-
{
587-
return NULL;
588-
}
589-
590-
static inline struct obj_cgroup **page_objcgs_check(struct page *page)
591-
{
592-
return NULL;
593-
}
594546
#endif
595547

596548
static inline bool PageMemcgKmem(struct page *page)

include/linux/mm.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,13 @@ static inline struct page *virt_to_head_page(const void *x)
863863
return compound_head(page);
864864
}
865865

866+
static inline struct folio *virt_to_folio(const void *x)
867+
{
868+
struct page *page = virt_to_page(x);
869+
870+
return page_folio(page);
871+
}
872+
866873
void __put_page(struct page *page);
867874

868875
void put_pages_list(struct list_head *pages);
@@ -1753,6 +1760,11 @@ void page_address_init(void);
17531760
#define page_address_init() do { } while(0)
17541761
#endif
17551762

1763+
static inline void *folio_address(const struct folio *folio)
1764+
{
1765+
return page_address(&folio->page);
1766+
}
1767+
17561768
extern void *page_rmapping(struct page *page);
17571769
extern struct anon_vma *page_anon_vma(struct page *page);
17581770
extern pgoff_t __page_file_index(struct page *page);

include/linux/mm_types.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ struct mem_cgroup;
5656
* in each subpage, but you may need to restore some of their values
5757
* afterwards.
5858
*
59-
* SLUB uses cmpxchg_double() to atomically update its freelist and
60-
* counters. That requires that freelist & counters be adjacent and
61-
* double-word aligned. We align all struct pages to double-word
62-
* boundaries, and ensure that 'freelist' is aligned within the
63-
* struct.
59+
* SLUB uses cmpxchg_double() to atomically update its freelist and counters.
60+
* That requires that freelist & counters in struct slab be adjacent and
61+
* double-word aligned. Because struct slab currently just reinterprets the
62+
* bits of struct page, we align all struct pages to double-word boundaries,
63+
* and ensure that 'freelist' is aligned within struct slab.
6464
*/
6565
#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
6666
#define _struct_page_alignment __aligned(2 * sizeof(unsigned long))

include/linux/slab.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -189,14 +189,6 @@ bool kmem_valid_obj(void *object);
189189
void kmem_dump_obj(void *object);
190190
#endif
191191

192-
#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
193-
void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
194-
bool to_user);
195-
#else
196-
static inline void __check_heap_object(const void *ptr, unsigned long n,
197-
struct page *page, bool to_user) { }
198-
#endif
199-
200192
/*
201193
* Some archs want to perform DMA into kmalloc caches and need a guaranteed
202194
* alignment larger than the alignment of a 64-bit integer.

include/linux/slab_def.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,11 @@ struct kmem_cache {
8787
struct kmem_cache_node *node[MAX_NUMNODES];
8888
};
8989

90-
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
90+
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
9191
void *x)
9292
{
93-
void *object = x - (x - page->s_mem) % cache->size;
94-
void *last_object = page->s_mem + (cache->num - 1) * cache->size;
93+
void *object = x - (x - slab->s_mem) % cache->size;
94+
void *last_object = slab->s_mem + (cache->num - 1) * cache->size;
9595

9696
if (unlikely(object > last_object))
9797
return last_object;
@@ -106,16 +106,16 @@ static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
106106
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
107107
*/
108108
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
109-
const struct page *page, void *obj)
109+
const struct slab *slab, void *obj)
110110
{
111-
u32 offset = (obj - page->s_mem);
111+
u32 offset = (obj - slab->s_mem);
112112
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
113113
}
114114

115-
static inline int objs_per_slab_page(const struct kmem_cache *cache,
116-
const struct page *page)
115+
static inline int objs_per_slab(const struct kmem_cache *cache,
116+
const struct slab *slab)
117117
{
118-
if (is_kfence_address(page_address(page)))
118+
if (is_kfence_address(slab_address(slab)))
119119
return 1;
120120
return cache->num;
121121
}

include/linux/slub_def.h

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ enum stat_item {
4848
struct kmem_cache_cpu {
4949
void **freelist; /* Pointer to next available object */
5050
unsigned long tid; /* Globally unique transaction id */
51-
struct page *page; /* The slab from which we are allocating */
51+
struct slab *slab; /* The slab from which we are allocating */
5252
#ifdef CONFIG_SLUB_CPU_PARTIAL
53-
struct page *partial; /* Partially allocated frozen slabs */
53+
struct slab *partial; /* Partially allocated frozen slabs */
5454
#endif
5555
local_lock_t lock; /* Protects the fields above */
5656
#ifdef CONFIG_SLUB_STATS
@@ -99,8 +99,8 @@ struct kmem_cache {
9999
#ifdef CONFIG_SLUB_CPU_PARTIAL
100100
/* Number of per cpu partial objects to keep around */
101101
unsigned int cpu_partial;
102-
/* Number of per cpu partial pages to keep around */
103-
unsigned int cpu_partial_pages;
102+
/* Number of per cpu partial slabs to keep around */
103+
unsigned int cpu_partial_slabs;
104104
#endif
105105
struct kmem_cache_order_objects oo;
106106

@@ -156,16 +156,13 @@ static inline void sysfs_slab_release(struct kmem_cache *s)
156156
}
157157
#endif
158158

159-
void object_err(struct kmem_cache *s, struct page *page,
160-
u8 *object, char *reason);
161-
162159
void *fixup_red_left(struct kmem_cache *s, void *p);
163160

164-
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
161+
static inline void *nearest_obj(struct kmem_cache *cache, const struct slab *slab,
165162
void *x) {
166-
void *object = x - (x - page_address(page)) % cache->size;
167-
void *last_object = page_address(page) +
168-
(page->objects - 1) * cache->size;
163+
void *object = x - (x - slab_address(slab)) % cache->size;
164+
void *last_object = slab_address(slab) +
165+
(slab->objects - 1) * cache->size;
169166
void *result = (unlikely(object > last_object)) ? last_object : object;
170167

171168
result = fixup_red_left(cache, result);
@@ -181,16 +178,16 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
181178
}
182179

183180
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
184-
const struct page *page, void *obj)
181+
const struct slab *slab, void *obj)
185182
{
186183
if (is_kfence_address(obj))
187184
return 0;
188-
return __obj_to_index(cache, page_address(page), obj);
185+
return __obj_to_index(cache, slab_address(slab), obj);
189186
}
190187

191-
static inline int objs_per_slab_page(const struct kmem_cache *cache,
192-
const struct page *page)
188+
static inline int objs_per_slab(const struct kmem_cache *cache,
189+
const struct slab *slab)
193190
{
194-
return page->objects;
191+
return slab->objects;
195192
}
196193
#endif /* _LINUX_SLUB_DEF_H */

mm/bootmem_info.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,21 @@
1515

1616
void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
1717
{
18-
page->freelist = (void *)type;
18+
page->index = type;
1919
SetPagePrivate(page);
2020
set_page_private(page, info);
2121
page_ref_inc(page);
2222
}
2323

2424
void put_page_bootmem(struct page *page)
2525
{
26-
unsigned long type;
26+
unsigned long type = page->index;
2727

28-
type = (unsigned long) page->freelist;
2928
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
3029
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
3130

3231
if (page_ref_dec_return(page) == 1) {
33-
page->freelist = NULL;
32+
page->index = 0;
3433
ClearPagePrivate(page);
3534
set_page_private(page, 0);
3635
INIT_LIST_HEAD(&page->lru);

0 commit comments

Comments
 (0)