Skip to content

Commit 45ff8b4

Browse files
Dave Chinnerdchinner
authored andcommitted
xfs: can't use kmem_zalloc() for attribute buffers
Because heap allocation of 64kB buffers will fail: .... XFS: fs_mark(8414) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8417) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8409) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8428) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8430) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8437) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8433) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8406) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8412) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8432) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) XFS: fs_mark(8424) possible memory allocation deadlock size 65768 in kmem_alloc (mode:0x2d40) .... I'd use kvmalloc() instead, but.... - 48.19% xfs_attr_create_intent - 46.89% xfs_attri_init - kvmalloc_node - 46.04% __kmalloc_node - kmalloc_large_node - 45.99% __alloc_pages - 39.39% __alloc_pages_slowpath.constprop.0 - 38.89% __alloc_pages_direct_compact - 38.71% try_to_compact_pages - compact_zone_order - compact_zone - 21.09% isolate_migratepages_block 10.31% PageHuge 5.82% set_pfnblock_flags_mask 0.86% get_pfnblock_flags_mask - 4.48% __reset_isolation_suitable 4.44% __reset_isolation_pfn - 3.56% __pageblock_pfn_to_page 1.33% pfn_to_online_page 2.83% get_pfnblock_flags_mask - 0.87% migrate_pages 0.86% compaction_alloc 0.84% find_suitable_fallback - 6.60% get_page_from_freelist 4.99% clear_page_erms - 1.19% _raw_spin_lock_irqsave - do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.86% __vmalloc_node_range 0.65% __alloc_pages_bulk .... this is just yet another reminder of how much kvmalloc() sucks. So lift xlog_cil_kvmalloc(), rename it to xlog_kvmalloc() and use that instead.... We also clean up the attribute name and value lengths as they no longer need to be rounded out to sizes compatible with log vectors. Signed-off-by: Dave Chinner <[email protected]> Reviewed-by: Allison Henderson <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> Signed-off-by: Dave Chinner <[email protected]>
1 parent 51e6104 commit 45ff8b4

File tree

3 files changed

+50
-54
lines changed

3 files changed

+50
-54
lines changed

fs/xfs/xfs_attr_item.c

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ xfs_attri_item_free(
4444
struct xfs_attri_log_item *attrip)
4545
{
4646
kmem_free(attrip->attri_item.li_lv_shadow);
47-
kmem_free(attrip);
47+
kvfree(attrip);
4848
}
4949

5050
/*
@@ -119,11 +119,11 @@ xfs_attri_item_format(
119119
sizeof(struct xfs_attri_log_format));
120120
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME,
121121
attrip->attri_name,
122-
xlog_calc_iovec_len(attrip->attri_name_len));
122+
attrip->attri_name_len);
123123
if (attrip->attri_value_len > 0)
124124
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE,
125125
attrip->attri_value,
126-
xlog_calc_iovec_len(attrip->attri_value_len));
126+
attrip->attri_value_len);
127127
}
128128

129129
/*
@@ -163,26 +163,21 @@ xfs_attri_init(
163163

164164
{
165165
struct xfs_attri_log_item *attrip;
166-
uint32_t name_vec_len = 0;
167-
uint32_t value_vec_len = 0;
168-
uint32_t buffer_size;
169-
170-
if (name_len)
171-
name_vec_len = xlog_calc_iovec_len(name_len);
172-
if (value_len)
173-
value_vec_len = xlog_calc_iovec_len(value_len);
174-
175-
buffer_size = name_vec_len + value_vec_len;
166+
uint32_t buffer_size = name_len + value_len;
176167

177168
if (buffer_size) {
178-
attrip = kmem_zalloc(sizeof(struct xfs_attri_log_item) +
179-
buffer_size, KM_NOFS);
180-
if (attrip == NULL)
181-
return NULL;
169+
/*
170+
* This could be over 64kB in length, so we have to use
171+
* kvmalloc() for this. But kvmalloc() utterly sucks, so we
172+
* use own version.
173+
*/
174+
attrip = xlog_kvmalloc(sizeof(struct xfs_attri_log_item) +
175+
buffer_size);
182176
} else {
183-
attrip = kmem_cache_zalloc(xfs_attri_cache,
184-
GFP_NOFS | __GFP_NOFAIL);
177+
attrip = kmem_cache_alloc(xfs_attri_cache,
178+
GFP_NOFS | __GFP_NOFAIL);
185179
}
180+
memset(attrip, 0, sizeof(struct xfs_attri_log_item));
186181

187182
attrip->attri_name_len = name_len;
188183
if (name_len)
@@ -195,7 +190,7 @@ xfs_attri_init(
195190
if (value_len)
196191
attrip->attri_value = ((char *)attrip) +
197192
sizeof(struct xfs_attri_log_item) +
198-
name_vec_len;
193+
name_len;
199194
else
200195
attrip->attri_value = NULL;
201196

fs/xfs/xfs_log_cil.c

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -134,39 +134,6 @@ xlog_cil_iovec_space(
134134
sizeof(uint64_t));
135135
}
136136

137-
/*
138-
* shadow buffers can be large, so we need to use kvmalloc() here to ensure
139-
* success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts to fall
140-
* back to vmalloc, so we can't actually do anything useful with gfp flags to
141-
* control the kmalloc() behaviour within kvmalloc(). Hence kmalloc() will do
142-
* direct reclaim and compaction in the slow path, both of which are
143-
* horrendously expensive. We just want kmalloc to fail fast and fall back to
144-
* vmalloc if it can't get somethign straight away from the free lists or buddy
145-
* allocator. Hence we have to open code kvmalloc outselves here.
146-
*
147-
* Also, we are in memalloc_nofs_save task context here, so despite the use of
148-
* GFP_KERNEL here, we are actually going to be doing GFP_NOFS allocations. This
149-
* is actually the only way to make vmalloc() do GFP_NOFS allocations, so lets
150-
* just all pretend this is a GFP_KERNEL context operation....
151-
*/
152-
static inline void *
153-
xlog_cil_kvmalloc(
154-
size_t buf_size)
155-
{
156-
gfp_t flags = GFP_KERNEL;
157-
void *p;
158-
159-
flags &= ~__GFP_DIRECT_RECLAIM;
160-
flags |= __GFP_NOWARN | __GFP_NORETRY;
161-
do {
162-
p = kmalloc(buf_size, flags);
163-
if (!p)
164-
p = vmalloc(buf_size);
165-
} while (!p);
166-
167-
return p;
168-
}
169-
170137
/*
171138
* Allocate or pin log vector buffers for CIL insertion.
172139
*
@@ -283,7 +250,7 @@ xlog_cil_alloc_shadow_bufs(
283250
* storage.
284251
*/
285252
kmem_free(lip->li_lv_shadow);
286-
lv = xlog_cil_kvmalloc(buf_size);
253+
lv = xlog_kvmalloc(buf_size);
287254

288255
memset(lv, 0, xlog_cil_iovec_space(niovecs));
289256

fs/xfs/xfs_log_priv.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,4 +651,38 @@ xlog_valid_lsn(
651651
return valid;
652652
}
653653

654+
/*
655+
* Log vector and shadow buffers can be large, so we need to use kvmalloc() here
656+
* to ensure success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts
657+
* to fall back to vmalloc, so we can't actually do anything useful with gfp
658+
* flags to control the kmalloc() behaviour within kvmalloc(). Hence kmalloc()
659+
* will do direct reclaim and compaction in the slow path, both of which are
660+
* horrendously expensive. We just want kmalloc to fail fast and fall back to
661+
* vmalloc if it can't get somethign straight away from the free lists or
662+
* buddy allocator. Hence we have to open code kvmalloc outselves here.
663+
*
664+
* This assumes that the caller uses memalloc_nofs_save task context here, so
665+
* despite the use of GFP_KERNEL here, we are going to be doing GFP_NOFS
666+
* allocations. This is actually the only way to make vmalloc() do GFP_NOFS
667+
* allocations, so lets just all pretend this is a GFP_KERNEL context
668+
* operation....
669+
*/
670+
static inline void *
671+
xlog_kvmalloc(
672+
size_t buf_size)
673+
{
674+
gfp_t flags = GFP_KERNEL;
675+
void *p;
676+
677+
flags &= ~__GFP_DIRECT_RECLAIM;
678+
flags |= __GFP_NOWARN | __GFP_NORETRY;
679+
do {
680+
p = kmalloc(buf_size, flags);
681+
if (!p)
682+
p = vmalloc(buf_size);
683+
} while (!p);
684+
685+
return p;
686+
}
687+
654688
#endif /* __XFS_LOG_PRIV_H__ */

0 commit comments

Comments
 (0)