Skip to content

Commit fd9f2a9

Browse files
committed
Merge branch 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu intoiommufd/for-next
Jason Gunthorpe says: ==================== iommufd follows the same design as KVM and uses memory cgroups to limit the amount of kernel memory a iommufd file descriptor can pin down. The various internal data structures already use GFP_KERNEL_ACCOUNT to charge its own memory. However, one of the biggest consumers of kernel memory is the IOPTEs stored under the iommu_domain and these allocations are not tracked. This series is the first step in fixing it. The iommu driver contract already includes a 'gfp' argument to the map_pages op, allowing iommufd to specify GFP_KERNEL_ACCOUNT and then having the driver allocate the IOPTE tables with that flag will capture a significant amount of the allocations. Update the iommu_map() API to pass in the GFP argument, and fix all call sites. Replace iommu_map_atomic(). Audit the "enterprise" iommu drivers to make sure they do the right thing. Intel and S390 ignore the GFP argument and always use GFP_ATOMIC. This is problematic for iommufd anyhow, so fix it. AMD and ARM SMMUv2/3 are already correct. A follow up series will be needed to capture the allocations made when the iommu_domain itself is allocated, which will complete the job. ==================== * 'iommu-memory-accounting' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/joro/iommu: iommu/s390: Use GFP_KERNEL in sleepable contexts iommu/s390: Push the gfp parameter to the kmem_cache_alloc()'s iommu/intel: Use GFP_KERNEL in sleepable contexts iommu/intel: Support the gfp argument to the map_pages op iommu/intel: Add a gfp parameter to alloc_pgtable_page() iommufd: Use GFP_KERNEL_ACCOUNT for iommu_map() iommu/dma: Use the gfp parameter in __iommu_dma_alloc_noncontiguous() iommu: Add a gfp parameter to iommu_map_sg() iommu: Remove iommu_map_atomic() iommu: Add a gfp parameter to iommu_map() Link: https://lore.kernel.org/linux-iommu/[email protected] Signed-off-by: Jason Gunthorpe <[email protected]>
2 parents 84798f2 + 429f27e commit fd9f2a9

File tree

22 files changed

+126
-125
lines changed

22 files changed

+126
-125
lines changed

arch/arm/mm/dma-mapping.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -984,7 +984,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
984984

985985
len = (j - i) << PAGE_SHIFT;
986986
ret = iommu_map(mapping->domain, iova, phys, len,
987-
__dma_info_to_prot(DMA_BIDIRECTIONAL, attrs));
987+
__dma_info_to_prot(DMA_BIDIRECTIONAL, attrs),
988+
GFP_KERNEL);
988989
if (ret < 0)
989990
goto fail;
990991
iova += len;
@@ -1207,7 +1208,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
12071208

12081209
prot = __dma_info_to_prot(dir, attrs);
12091210

1210-
ret = iommu_map(mapping->domain, iova, phys, len, prot);
1211+
ret = iommu_map(mapping->domain, iova, phys, len, prot,
1212+
GFP_KERNEL);
12111213
if (ret < 0)
12121214
goto fail;
12131215
count += len >> PAGE_SHIFT;
@@ -1379,7 +1381,8 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
13791381

13801382
prot = __dma_info_to_prot(dir, attrs);
13811383

1382-
ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
1384+
ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len,
1385+
prot, GFP_KERNEL);
13831386
if (ret < 0)
13841387
goto fail;
13851388

@@ -1443,7 +1446,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev,
14431446

14441447
prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO;
14451448

1446-
ret = iommu_map(mapping->domain, dma_addr, addr, len, prot);
1449+
ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL);
14471450
if (ret < 0)
14481451
goto fail;
14491452

arch/s390/include/asm/pci_dma.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,10 @@ static inline unsigned long *get_st_pto(unsigned long entry)
186186

187187
/* Prototypes */
188188
void dma_free_seg_table(unsigned long);
189-
unsigned long *dma_alloc_cpu_table(void);
189+
unsigned long *dma_alloc_cpu_table(gfp_t gfp);
190190
void dma_cleanup_tables(unsigned long *);
191-
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
191+
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
192+
gfp_t gfp);
192193
void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
193194

194195
extern const struct dma_map_ops s390_pci_dma_ops;

arch/s390/pci/pci_dma.c

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ static int zpci_refresh_global(struct zpci_dev *zdev)
2727
zdev->iommu_pages * PAGE_SIZE);
2828
}
2929

30-
unsigned long *dma_alloc_cpu_table(void)
30+
unsigned long *dma_alloc_cpu_table(gfp_t gfp)
3131
{
3232
unsigned long *table, *entry;
3333

34-
table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
34+
table = kmem_cache_alloc(dma_region_table_cache, gfp);
3535
if (!table)
3636
return NULL;
3737

@@ -45,11 +45,11 @@ static void dma_free_cpu_table(void *table)
4545
kmem_cache_free(dma_region_table_cache, table);
4646
}
4747

48-
static unsigned long *dma_alloc_page_table(void)
48+
static unsigned long *dma_alloc_page_table(gfp_t gfp)
4949
{
5050
unsigned long *table, *entry;
5151

52-
table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
52+
table = kmem_cache_alloc(dma_page_table_cache, gfp);
5353
if (!table)
5454
return NULL;
5555

@@ -63,7 +63,7 @@ static void dma_free_page_table(void *table)
6363
kmem_cache_free(dma_page_table_cache, table);
6464
}
6565

66-
static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
66+
static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
6767
{
6868
unsigned long old_rte, rte;
6969
unsigned long *sto;
@@ -72,7 +72,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
7272
if (reg_entry_isvalid(rte)) {
7373
sto = get_rt_sto(rte);
7474
} else {
75-
sto = dma_alloc_cpu_table();
75+
sto = dma_alloc_cpu_table(gfp);
7676
if (!sto)
7777
return NULL;
7878

@@ -90,7 +90,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
9090
return sto;
9191
}
9292

93-
static unsigned long *dma_get_page_table_origin(unsigned long *step)
93+
static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
9494
{
9595
unsigned long old_ste, ste;
9696
unsigned long *pto;
@@ -99,7 +99,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step)
9999
if (reg_entry_isvalid(ste)) {
100100
pto = get_st_pto(ste);
101101
} else {
102-
pto = dma_alloc_page_table();
102+
pto = dma_alloc_page_table(gfp);
103103
if (!pto)
104104
return NULL;
105105
set_st_pto(&ste, virt_to_phys(pto));
@@ -116,18 +116,19 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step)
116116
return pto;
117117
}
118118

119-
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
119+
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
120+
gfp_t gfp)
120121
{
121122
unsigned long *sto, *pto;
122123
unsigned int rtx, sx, px;
123124

124125
rtx = calc_rtx(dma_addr);
125-
sto = dma_get_seg_table_origin(&rto[rtx]);
126+
sto = dma_get_seg_table_origin(&rto[rtx], gfp);
126127
if (!sto)
127128
return NULL;
128129

129130
sx = calc_sx(dma_addr);
130-
pto = dma_get_page_table_origin(&sto[sx]);
131+
pto = dma_get_page_table_origin(&sto[sx], gfp);
131132
if (!pto)
132133
return NULL;
133134

@@ -170,7 +171,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
170171
return -EINVAL;
171172

172173
for (i = 0; i < nr_pages; i++) {
173-
entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
174+
entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
175+
GFP_ATOMIC);
174176
if (!entry) {
175177
rc = -ENOMEM;
176178
goto undo_cpu_trans;
@@ -186,7 +188,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
186188
while (i-- > 0) {
187189
page_addr -= PAGE_SIZE;
188190
dma_addr -= PAGE_SIZE;
189-
entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
191+
entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
192+
GFP_ATOMIC);
190193
if (!entry)
191194
break;
192195
dma_update_cpu_trans(entry, page_addr, flags);
@@ -576,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
576579

577580
spin_lock_init(&zdev->iommu_bitmap_lock);
578581

579-
zdev->dma_table = dma_alloc_cpu_table();
582+
zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
580583
if (!zdev->dma_table) {
581584
rc = -ENOMEM;
582585
goto out;

drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
475475
u32 offset = (r->offset + i) << imem->iommu_pgshift;
476476

477477
ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
478-
PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
478+
PAGE_SIZE, IOMMU_READ | IOMMU_WRITE,
479+
GFP_KERNEL);
479480
if (ret < 0) {
480481
nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret);
481482

drivers/gpu/drm/tegra/drm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1057,7 +1057,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma)
10571057

10581058
*dma = iova_dma_addr(&tegra->carveout.domain, alloc);
10591059
err = iommu_map(tegra->domain, *dma, virt_to_phys(virt),
1060-
size, IOMMU_READ | IOMMU_WRITE);
1060+
size, IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
10611061
if (err < 0)
10621062
goto free_iova;
10631063

drivers/gpu/host1x/cdma.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
105105

106106
pb->dma = iova_dma_addr(&host1x->iova, alloc);
107107
err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
108-
IOMMU_READ);
108+
IOMMU_READ, GFP_KERNEL);
109109
if (err)
110110
goto iommu_free_iova;
111111
} else {

drivers/infiniband/hw/usnic/usnic_uiom.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
277277
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
278278
va_start, &pa_start, size, flags);
279279
err = iommu_map(pd->domain, va_start, pa_start,
280-
size, flags);
280+
size, flags, GFP_KERNEL);
281281
if (err) {
282282
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
283283
va_start, &pa_start, size, err);
@@ -294,7 +294,7 @@ static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
294294
usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
295295
va_start, &pa_start, size, flags);
296296
err = iommu_map(pd->domain, va_start, pa_start,
297-
size, flags);
297+
size, flags, GFP_KERNEL);
298298
if (err) {
299299
usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
300300
va_start, &pa_start, size, err);

drivers/iommu/dma-iommu.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
713713
if (!iova)
714714
return DMA_MAPPING_ERROR;
715715

716-
if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) {
716+
if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) {
717717
iommu_dma_free_iova(cookie, iova, size, NULL);
718718
return DMA_MAPPING_ERROR;
719719
}
@@ -822,7 +822,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
822822
if (!iova)
823823
goto out_free_pages;
824824

825-
if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
825+
/*
826+
* Remove the zone/policy flags from the GFP - these are applied to the
827+
* __iommu_dma_alloc_pages() but are not used for the supporting
828+
* internal allocations that follow.
829+
*/
830+
gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP);
831+
832+
if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp))
826833
goto out_free_iova;
827834

828835
if (!(ioprot & IOMMU_CACHE)) {
@@ -833,7 +840,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
833840
arch_dma_prep_coherent(sg_page(sg), sg->length);
834841
}
835842

836-
ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot);
843+
ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot,
844+
gfp);
837845
if (ret < 0 || ret < size)
838846
goto out_free_sg;
839847

@@ -1281,7 +1289,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
12811289
* We'll leave any physical concatenation to the IOMMU driver's
12821290
* implementation - it knows better than we do.
12831291
*/
1284-
ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot);
1292+
ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC);
12851293
if (ret < 0 || ret < iova_len)
12861294
goto out_free_iova;
12871295

@@ -1615,7 +1623,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
16151623
if (!iova)
16161624
goto out_free_page;
16171625

1618-
if (iommu_map(domain, iova, msi_addr, size, prot))
1626+
if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL))
16191627
goto out_free_iova;
16201628

16211629
INIT_LIST_HEAD(&msi_page->list);

0 commit comments

Comments
 (0)