Skip to content

Commit 3e302db

Browse files
maorgottliebjgunthorpe
authored andcommitted
lib/scatterlist: Fix wrong update of orig_nents
orig_nents should represent the number of entries with pages, but __sg_alloc_table_from_pages sets orig_nents as the number of total entries in the table. This is wrong when the API is used for dynamic allocation where not all the table entries are mapped with pages. It wasn't observed until now, since RDMA umem who uses this API in the dynamic form doesn't use orig_nents implicit or explicit by the scatterlist APIs. Fix it by changing the append API to track the SG append table state and have an API to free the append table according to the total number of entries in the table. Now all APIs set orig_nents as number of enries with pages. Fixes: 07da122 ("lib/scatterlist: Add support in dynamic allocation of SG table from pages") Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Maor Gottlieb <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 90e7a6d commit 3e302db

File tree

6 files changed

+136
-93
lines changed

6 files changed

+136
-93
lines changed

drivers/infiniband/core/umem.c

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
5959
unpin_user_page_range_dirty_lock(sg_page(sg),
6060
DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
6161

62-
sg_free_table(&umem->sg_head);
62+
sg_free_append_table(&umem->sgt_append);
6363
}
6464

6565
/**
@@ -155,8 +155,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
155155
unsigned long dma_attr = 0;
156156
struct mm_struct *mm;
157157
unsigned long npages;
158-
int ret;
159-
struct scatterlist *sg = NULL;
158+
int pinned, ret;
160159
unsigned int gup_flags = FOLL_WRITE;
161160

162161
/*
@@ -216,28 +215,33 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
216215

217216
while (npages) {
218217
cond_resched();
219-
ret = pin_user_pages_fast(cur_base,
218+
pinned = pin_user_pages_fast(cur_base,
220219
min_t(unsigned long, npages,
221220
PAGE_SIZE /
222221
sizeof(struct page *)),
223222
gup_flags | FOLL_LONGTERM, page_list);
224-
if (ret < 0)
223+
if (pinned < 0) {
224+
ret = pinned;
225225
goto umem_release;
226+
}
226227

227-
cur_base += ret * PAGE_SIZE;
228-
npages -= ret;
229-
sg = sg_alloc_append_table_from_pages(&umem->sg_head, page_list,
230-
ret, 0, ret << PAGE_SHIFT,
231-
ib_dma_max_seg_size(device), sg, npages,
232-
GFP_KERNEL);
233-
umem->sg_nents = umem->sg_head.nents;
234-
if (IS_ERR(sg)) {
235-
unpin_user_pages_dirty_lock(page_list, ret, 0);
236-
ret = PTR_ERR(sg);
228+
cur_base += pinned * PAGE_SIZE;
229+
npages -= pinned;
230+
ret = sg_alloc_append_table_from_pages(
231+
&umem->sgt_append, page_list, pinned, 0,
232+
pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
233+
npages, GFP_KERNEL);
234+
umem->sg_nents = umem->sgt_append.sgt.nents;
235+
if (ret) {
236+
memcpy(&umem->sg_head.sgl, &umem->sgt_append.sgt,
237+
sizeof(umem->sgt_append.sgt));
238+
unpin_user_pages_dirty_lock(page_list, pinned, 0);
237239
goto umem_release;
238240
}
239241
}
240242

243+
memcpy(&umem->sg_head.sgl, &umem->sgt_append.sgt,
244+
sizeof(umem->sgt_append.sgt));
241245
if (access & IB_ACCESS_RELAXED_ORDERING)
242246
dma_attr |= DMA_ATTR_WEAK_ORDERING;
243247

include/linux/scatterlist.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ struct sg_table {
3939
unsigned int orig_nents; /* original size of list */
4040
};
4141

42+
struct sg_append_table {
43+
struct sg_table sgt; /* The scatter list table */
44+
struct scatterlist *prv; /* last populated sge in the table */
45+
unsigned int total_nents; /* Total entries in the table */
46+
};
47+
4248
/*
4349
* Notes on SG table design.
4450
*
@@ -280,16 +286,17 @@ typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t);
280286
typedef void (sg_free_fn)(struct scatterlist *, unsigned int);
281287

282288
void __sg_free_table(struct sg_table *, unsigned int, unsigned int,
283-
sg_free_fn *);
289+
sg_free_fn *, unsigned int);
284290
void sg_free_table(struct sg_table *);
291+
void sg_free_append_table(struct sg_append_table *sgt);
285292
int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
286293
struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *);
287294
int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
288-
struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt,
289-
struct page **pages, unsigned int n_pages, unsigned int offset,
290-
unsigned long size, unsigned int max_segment,
291-
struct scatterlist *prv, unsigned int left_pages,
292-
gfp_t gfp_mask);
295+
int sg_alloc_append_table_from_pages(struct sg_append_table *sgt,
296+
struct page **pages, unsigned int n_pages,
297+
unsigned int offset, unsigned long size,
298+
unsigned int max_segment,
299+
unsigned int left_pages, gfp_t gfp_mask);
293300
int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
294301
unsigned int n_pages, unsigned int offset,
295302
unsigned long size,

include/rdma/ib_umem.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct ib_umem {
2626
u32 is_odp : 1;
2727
u32 is_dmabuf : 1;
2828
struct work_struct work;
29+
struct sg_append_table sgt_append;
2930
struct sg_table sg_head;
3031
int nmap;
3132
unsigned int sg_nents;

lib/scatterlist.c

Lines changed: 77 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
182182
* @nents_first_chunk: Number of entries int the (preallocated) first
183183
* scatterlist chunk, 0 means no such preallocated first chunk
184184
* @free_fn: Free function
185+
* @num_ents: Number of entries in the table
185186
*
186187
* Description:
187188
* Free an sg table previously allocated and setup with
@@ -190,7 +191,8 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
190191
*
191192
**/
192193
void __sg_free_table(struct sg_table *table, unsigned int max_ents,
193-
unsigned int nents_first_chunk, sg_free_fn *free_fn)
194+
unsigned int nents_first_chunk, sg_free_fn *free_fn,
195+
unsigned int num_ents)
194196
{
195197
struct scatterlist *sgl, *next;
196198
unsigned curr_max_ents = nents_first_chunk ?: max_ents;
@@ -199,8 +201,8 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
199201
return;
200202

201203
sgl = table->sgl;
202-
while (table->orig_nents) {
203-
unsigned int alloc_size = table->orig_nents;
204+
while (num_ents) {
205+
unsigned int alloc_size = num_ents;
204206
unsigned int sg_size;
205207

206208
/*
@@ -218,7 +220,7 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
218220
next = NULL;
219221
}
220222

221-
table->orig_nents -= sg_size;
223+
num_ents -= sg_size;
222224
if (nents_first_chunk)
223225
nents_first_chunk = 0;
224226
else
@@ -231,14 +233,28 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
231233
}
232234
EXPORT_SYMBOL(__sg_free_table);
233235

236+
/**
237+
* sg_free_append_table - Free a previously allocated append sg table.
238+
* @table: The mapped sg append table header
239+
*
240+
**/
241+
void sg_free_append_table(struct sg_append_table *table)
242+
{
243+
__sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, false, sg_kfree,
244+
table->total_nents);
245+
}
246+
EXPORT_SYMBOL(sg_free_append_table);
247+
248+
234249
/**
235250
* sg_free_table - Free a previously allocated sg table
236251
* @table: The mapped sg table header
237252
*
238253
**/
239254
void sg_free_table(struct sg_table *table)
240255
{
241-
__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
256+
__sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree,
257+
table->orig_nents);
242258
}
243259
EXPORT_SYMBOL(sg_free_table);
244260

@@ -359,13 +375,12 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
359375
ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
360376
NULL, 0, gfp_mask, sg_kmalloc);
361377
if (unlikely(ret))
362-
__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree);
363-
378+
sg_free_table(table);
364379
return ret;
365380
}
366381
EXPORT_SYMBOL(sg_alloc_table);
367382

368-
static struct scatterlist *get_next_sg(struct sg_table *table,
383+
static struct scatterlist *get_next_sg(struct sg_append_table *table,
369384
struct scatterlist *cur,
370385
unsigned long needed_sges,
371386
gfp_t gfp_mask)
@@ -386,80 +401,79 @@ static struct scatterlist *get_next_sg(struct sg_table *table,
386401
return ERR_PTR(-ENOMEM);
387402
sg_init_table(new_sg, alloc_size);
388403
if (cur) {
404+
table->total_nents += alloc_size - 1;
389405
__sg_chain(next_sg, new_sg);
390-
table->orig_nents += alloc_size - 1;
391406
} else {
392-
table->sgl = new_sg;
393-
table->orig_nents = alloc_size;
394-
table->nents = 0;
407+
table->sgt.sgl = new_sg;
408+
table->total_nents = alloc_size;
395409
}
396410
return new_sg;
397411
}
398412

399413
/**
400-
* sg_alloc_append_table_from_pages - Allocate and initialize an sg table from
401-
* an array of pages
402-
* @sgt: The sg table header to use
403-
* @pages: Pointer to an array of page pointers
404-
* @n_pages: Number of pages in the pages array
414+
* sg_alloc_append_table_from_pages - Allocate and initialize an append sg
415+
* table from an array of pages
416+
* @sgt_append: The sg append table to use
417+
* @pages: Pointer to an array of page pointers
418+
* @n_pages: Number of pages in the pages array
405419
* @offset: Offset from start of the first page to the start of a buffer
406420
* @size: Number of valid bytes in the buffer (after offset)
407421
* @max_segment: Maximum size of a scatterlist element in bytes
408-
* @prv: Last populated sge in sgt
409422
* @left_pages: Left pages caller have to set after this call
410423
* @gfp_mask: GFP allocation mask
411424
*
412425
* Description:
413-
* If @prv is NULL, allocate and initialize an sg table from a list of pages,
414-
* else reuse the scatterlist passed in at @prv.
415-
* Contiguous ranges of the pages are squashed into a single scatterlist
416-
* entry up to the maximum size specified in @max_segment. A user may
417-
* provide an offset at a start and a size of valid data in a buffer
418-
* specified by the page array.
426+
* In the first call it allocate and initialize an sg table from a list of
427+
* pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
428+
* the pages are squashed into a single scatterlist entry up to the maximum
429+
* size specified in @max_segment. A user may provide an offset at a start
430+
* and a size of valid data in a buffer specified by the page array. The
431+
* returned sg table is released by sg_free_append_table
419432
*
420433
* Returns:
421-
* Last SGE in sgt on success, PTR_ERR on otherwise.
422-
* The allocation in @sgt must be released by sg_free_table.
434+
* 0 on success, negative error on failure
423435
*
424436
* Notes:
425437
* If this function returns non-0 (eg failure), the caller must call
426-
* sg_free_table() to cleanup any leftover allocations.
438+
* sg_free_append_table() to cleanup any leftover allocations.
439+
*
440+
* In the fist call, sgt_append must by initialized.
427441
*/
428-
struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt,
442+
int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
429443
struct page **pages, unsigned int n_pages, unsigned int offset,
430444
unsigned long size, unsigned int max_segment,
431-
struct scatterlist *prv, unsigned int left_pages,
432-
gfp_t gfp_mask)
445+
unsigned int left_pages, gfp_t gfp_mask)
433446
{
434447
unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
435448
unsigned int added_nents = 0;
436-
struct scatterlist *s = prv;
449+
struct scatterlist *s = sgt_append->prv;
437450

438451
/*
439452
* The algorithm below requires max_segment to be aligned to PAGE_SIZE
440453
* otherwise it can overshoot.
441454
*/
442455
max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
443456
if (WARN_ON(max_segment < PAGE_SIZE))
444-
return ERR_PTR(-EINVAL);
457+
return -EINVAL;
445458

446-
if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
447-
return ERR_PTR(-EOPNOTSUPP);
459+
if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
460+
return -EOPNOTSUPP;
448461

449-
if (prv) {
450-
unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
451-
prv->offset + prv->length) /
452-
PAGE_SIZE;
462+
if (sgt_append->prv) {
463+
unsigned long paddr =
464+
(page_to_pfn(sg_page(sgt_append->prv)) * PAGE_SIZE +
465+
sgt_append->prv->offset + sgt_append->prv->length) /
466+
PAGE_SIZE;
453467

454468
if (WARN_ON(offset))
455-
return ERR_PTR(-EINVAL);
469+
return -EINVAL;
456470

457471
/* Merge contiguous pages into the last SG */
458-
prv_len = prv->length;
472+
prv_len = sgt_append->prv->length;
459473
while (n_pages && page_to_pfn(pages[0]) == paddr) {
460-
if (prv->length + PAGE_SIZE > max_segment)
474+
if (sgt_append->prv->length + PAGE_SIZE > max_segment)
461475
break;
462-
prv->length += PAGE_SIZE;
476+
sgt_append->prv->length += PAGE_SIZE;
463477
paddr++;
464478
pages++;
465479
n_pages--;
@@ -496,15 +510,16 @@ struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt,
496510
}
497511

498512
/* Pass how many chunks might be left */
499-
s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
513+
s = get_next_sg(sgt_append, s, chunks - i + left_pages,
514+
gfp_mask);
500515
if (IS_ERR(s)) {
501516
/*
502517
* Adjust entry length to be as before function was
503518
* called.
504519
*/
505-
if (prv)
506-
prv->length = prv_len;
507-
return s;
520+
if (sgt_append->prv)
521+
sgt_append->prv->length = prv_len;
522+
return PTR_ERR(s);
508523
}
509524
chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
510525
sg_set_page(s, pages[cur_page],
@@ -514,11 +529,13 @@ struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt,
514529
offset = 0;
515530
cur_page = j;
516531
}
517-
sgt->nents += added_nents;
532+
sgt_append->sgt.nents += added_nents;
533+
sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
534+
sgt_append->prv = s;
518535
out:
519536
if (!left_pages)
520537
sg_mark_end(s);
521-
return s;
538+
return 0;
522539
}
523540
EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
524541

@@ -550,8 +567,18 @@ int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
550567
unsigned long size, unsigned int max_segment,
551568
gfp_t gfp_mask)
552569
{
553-
return PTR_ERR_OR_ZERO(sg_alloc_append_table_from_pages(sgt, pages,
554-
n_pages, offset, size, max_segment, NULL, 0, gfp_mask));
570+
struct sg_append_table append = {};
571+
int err;
572+
573+
err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
574+
size, max_segment, 0, gfp_mask);
575+
if (err) {
576+
sg_free_append_table(&append);
577+
return err;
578+
}
579+
memcpy(sgt, &append.sgt, sizeof(*sgt));
580+
WARN_ON(append.total_nents != sgt->orig_nents);
581+
return 0;
555582
}
556583
EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
557584

lib/sg_pool.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ void sg_free_table_chained(struct sg_table *table,
9090
if (nents_first_chunk == 1)
9191
nents_first_chunk = 0;
9292

93-
__sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free);
93+
__sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free,
94+
table->orig_nents);
9495
}
9596
EXPORT_SYMBOL_GPL(sg_free_table_chained);
9697

0 commit comments

Comments
 (0)