Skip to content

Commit afd81d9

Browse files
committed
Merge tag 'dma-mapping-6.11-2024-07-19' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig: - reduce duplicate swiotlb pool lookups (Michael Kelley) - minor small fixes (Yicong Yang, Yang Li) * tag 'dma-mapping-6.11-2024-07-19' of git://git.infradead.org/users/hch/dma-mapping: swiotlb: fix kernel-doc description for swiotlb_del_transient swiotlb: reduce swiotlb pool lookups dma-mapping: benchmark: Don't starve others when doing the test
2 parents ebcfbf0 + b69bdba commit afd81d9

File tree

8 files changed

+146
-106
lines changed

8 files changed

+146
-106
lines changed

drivers/iommu/dma-iommu.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,8 +1078,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
10781078
if (!dev_is_dma_coherent(dev))
10791079
arch_sync_dma_for_cpu(phys, size, dir);
10801080

1081-
if (is_swiotlb_buffer(dev, phys))
1082-
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
1081+
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
10831082
}
10841083

10851084
static void iommu_dma_sync_single_for_device(struct device *dev,
@@ -1091,8 +1090,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
10911090
return;
10921091

10931092
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
1094-
if (is_swiotlb_buffer(dev, phys))
1095-
swiotlb_sync_single_for_device(dev, phys, size, dir);
1093+
swiotlb_sync_single_for_device(dev, phys, size, dir);
10961094

10971095
if (!dev_is_dma_coherent(dev))
10981096
arch_sync_dma_for_device(phys, size, dir);
@@ -1186,7 +1184,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
11861184
arch_sync_dma_for_device(phys, size, dir);
11871185

11881186
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
1189-
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
1187+
if (iova == DMA_MAPPING_ERROR)
11901188
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
11911189
return iova;
11921190
}
@@ -1206,8 +1204,7 @@ static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
12061204

12071205
__iommu_dma_unmap(dev, dma_handle, size);
12081206

1209-
if (unlikely(is_swiotlb_buffer(dev, phys)))
1210-
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
1207+
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
12111208
}
12121209

12131210
/*

drivers/xen/swiotlb-xen.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
8888
return 0;
8989
}
9090

91-
static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
91+
static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev,
92+
dma_addr_t dma_addr)
9293
{
9394
unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
9495
unsigned long xen_pfn = bfn_to_local_pfn(bfn);
@@ -99,8 +100,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
99100
* in our domain. Therefore _only_ check address within our domain.
100101
*/
101102
if (pfn_valid(PFN_DOWN(paddr)))
102-
return is_swiotlb_buffer(dev, paddr);
103-
return 0;
103+
return swiotlb_find_pool(dev, paddr);
104+
return NULL;
104105
}
105106

106107
#ifdef CONFIG_X86
@@ -227,8 +228,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
227228
* Ensure that the address returned is DMA'ble
228229
*/
229230
if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
230-
swiotlb_tbl_unmap_single(dev, map, size, dir,
231-
attrs | DMA_ATTR_SKIP_CPU_SYNC);
231+
__swiotlb_tbl_unmap_single(dev, map, size, dir,
232+
attrs | DMA_ATTR_SKIP_CPU_SYNC,
233+
swiotlb_find_pool(dev, map));
232234
return DMA_MAPPING_ERROR;
233235
}
234236

@@ -254,6 +256,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
254256
size_t size, enum dma_data_direction dir, unsigned long attrs)
255257
{
256258
phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
259+
struct io_tlb_pool *pool;
257260

258261
BUG_ON(dir == DMA_NONE);
259262

@@ -265,15 +268,18 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
265268
}
266269

267270
/* NOTE: We use dev_addr here, not paddr! */
268-
if (is_xen_swiotlb_buffer(hwdev, dev_addr))
269-
swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
271+
pool = xen_swiotlb_find_pool(hwdev, dev_addr);
272+
if (pool)
273+
__swiotlb_tbl_unmap_single(hwdev, paddr, size, dir,
274+
attrs, pool);
270275
}
271276

272277
static void
273278
xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
274279
size_t size, enum dma_data_direction dir)
275280
{
276281
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
282+
struct io_tlb_pool *pool;
277283

278284
if (!dev_is_dma_coherent(dev)) {
279285
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
@@ -282,18 +288,21 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
282288
xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
283289
}
284290

285-
if (is_xen_swiotlb_buffer(dev, dma_addr))
286-
swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
291+
pool = xen_swiotlb_find_pool(dev, dma_addr);
292+
if (pool)
293+
__swiotlb_sync_single_for_cpu(dev, paddr, size, dir, pool);
287294
}
288295

289296
static void
290297
xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
291298
size_t size, enum dma_data_direction dir)
292299
{
293300
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
301+
struct io_tlb_pool *pool;
294302

295-
if (is_xen_swiotlb_buffer(dev, dma_addr))
296-
swiotlb_sync_single_for_device(dev, paddr, size, dir);
303+
pool = xen_swiotlb_find_pool(dev, dma_addr);
304+
if (pool)
305+
__swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
297306

298307
if (!dev_is_dma_coherent(dev)) {
299308
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))

include/linux/scatterlist.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
332332
* Description:
333333
* Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all
334334
* elements may have been bounced, so the caller would have to check
335-
* individual SG entries with is_swiotlb_buffer().
335+
* individual SG entries with swiotlb_find_pool().
336336
*/
337337
static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
338338
{

include/linux/swiotlb.h

Lines changed: 62 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
4242
int (*remap)(void *tlb, unsigned long nslabs));
4343
extern void __init swiotlb_update_mem_attributes(void);
4444

45-
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
46-
size_t mapping_size,
47-
unsigned int alloc_aligned_mask, enum dma_data_direction dir,
48-
unsigned long attrs);
49-
50-
extern void swiotlb_tbl_unmap_single(struct device *hwdev,
51-
phys_addr_t tlb_addr,
52-
size_t mapping_size,
53-
enum dma_data_direction dir,
54-
unsigned long attrs);
55-
56-
void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
57-
size_t size, enum dma_data_direction dir);
58-
void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
59-
size_t size, enum dma_data_direction dir);
60-
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
61-
size_t size, enum dma_data_direction dir, unsigned long attrs);
62-
6345
#ifdef CONFIG_SWIOTLB
6446

6547
/**
@@ -143,37 +125,27 @@ struct io_tlb_mem {
143125
#endif
144126
};
145127

146-
#ifdef CONFIG_SWIOTLB_DYNAMIC
147-
148-
struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
149-
150-
#else
151-
152-
static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
153-
phys_addr_t paddr)
154-
{
155-
return &dev->dma_io_tlb_mem->defpool;
156-
}
157-
158-
#endif
128+
struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
159129

160130
/**
161-
* is_swiotlb_buffer() - check if a physical address belongs to a swiotlb
131+
* swiotlb_find_pool() - find swiotlb pool to which a physical address belongs
162132
* @dev: Device which has mapped the buffer.
163133
* @paddr: Physical address within the DMA buffer.
164134
*
165-
* Check if @paddr points into a bounce buffer.
135+
* Find the swiotlb pool that @paddr points into.
166136
*
167137
* Return:
168-
* * %true if @paddr points into a bounce buffer
169-
* * %false otherwise
138+
* * pool address if @paddr points into a bounce buffer
139+
* * NULL if @paddr does not point into a bounce buffer. As such, this function
140+
* can be used to determine if @paddr denotes a swiotlb bounce buffer.
170141
*/
171-
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
142+
static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
143+
phys_addr_t paddr)
172144
{
173145
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
174146

175147
if (!mem)
176-
return false;
148+
return NULL;
177149

178150
#ifdef CONFIG_SWIOTLB_DYNAMIC
179151
/*
@@ -182,16 +154,19 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
182154
* If a SWIOTLB address is checked on another CPU, then it was
183155
* presumably loaded by the device driver from an unspecified private
184156
* data structure. Make sure that this load is ordered before reading
185-
* dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool().
157+
* dev->dma_uses_io_tlb here and mem->pools in __swiotlb_find_pool().
186158
*
187159
* This barrier pairs with smp_mb() in swiotlb_find_slots().
188160
*/
189161
smp_rmb();
190-
return READ_ONCE(dev->dma_uses_io_tlb) &&
191-
swiotlb_find_pool(dev, paddr);
162+
if (READ_ONCE(dev->dma_uses_io_tlb))
163+
return __swiotlb_find_pool(dev, paddr);
192164
#else
193-
return paddr >= mem->defpool.start && paddr < mem->defpool.end;
165+
if (paddr >= mem->defpool.start && paddr < mem->defpool.end)
166+
return &mem->defpool;
194167
#endif
168+
169+
return NULL;
195170
}
196171

197172
static inline bool is_swiotlb_force_bounce(struct device *dev)
@@ -219,9 +194,10 @@ static inline void swiotlb_dev_init(struct device *dev)
219194
{
220195
}
221196

222-
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
197+
static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
198+
phys_addr_t paddr)
223199
{
224-
return false;
200+
return NULL;
225201
}
226202
static inline bool is_swiotlb_force_bounce(struct device *dev)
227203
{
@@ -260,6 +236,49 @@ static inline phys_addr_t default_swiotlb_limit(void)
260236
}
261237
#endif /* CONFIG_SWIOTLB */
262238

239+
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
240+
size_t mapping_size, unsigned int alloc_aligned_mask,
241+
enum dma_data_direction dir, unsigned long attrs);
242+
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
243+
size_t size, enum dma_data_direction dir, unsigned long attrs);
244+
245+
void __swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
246+
size_t mapping_size, enum dma_data_direction dir,
247+
unsigned long attrs, struct io_tlb_pool *pool);
248+
static inline void swiotlb_tbl_unmap_single(struct device *dev,
249+
phys_addr_t addr, size_t size, enum dma_data_direction dir,
250+
unsigned long attrs)
251+
{
252+
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
253+
254+
if (unlikely(pool))
255+
__swiotlb_tbl_unmap_single(dev, addr, size, dir, attrs, pool);
256+
}
257+
258+
void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
259+
size_t size, enum dma_data_direction dir,
260+
struct io_tlb_pool *pool);
261+
static inline void swiotlb_sync_single_for_device(struct device *dev,
262+
phys_addr_t addr, size_t size, enum dma_data_direction dir)
263+
{
264+
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
265+
266+
if (unlikely(pool))
267+
__swiotlb_sync_single_for_device(dev, addr, size, dir, pool);
268+
}
269+
270+
void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
271+
size_t size, enum dma_data_direction dir,
272+
struct io_tlb_pool *pool);
273+
static inline void swiotlb_sync_single_for_cpu(struct device *dev,
274+
phys_addr_t addr, size_t size, enum dma_data_direction dir)
275+
{
276+
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
277+
278+
if (unlikely(pool))
279+
__swiotlb_sync_single_for_cpu(dev, addr, size, dir, pool);
280+
}
281+
263282
extern void swiotlb_print_info(void);
264283

265284
#ifdef CONFIG_DMA_RESTRICTED_POOL

kernel/dma/direct.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -404,9 +404,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
404404
for_each_sg(sgl, sg, nents, i) {
405405
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
406406

407-
if (unlikely(is_swiotlb_buffer(dev, paddr)))
408-
swiotlb_sync_single_for_device(dev, paddr, sg->length,
409-
dir);
407+
swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
410408

411409
if (!dev_is_dma_coherent(dev))
412410
arch_sync_dma_for_device(paddr, sg->length,
@@ -430,9 +428,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
430428
if (!dev_is_dma_coherent(dev))
431429
arch_sync_dma_for_cpu(paddr, sg->length, dir);
432430

433-
if (unlikely(is_swiotlb_buffer(dev, paddr)))
434-
swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
435-
dir);
431+
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
436432

437433
if (dir == DMA_FROM_DEVICE)
438434
arch_dma_mark_clean(paddr, sg->length);
@@ -640,7 +636,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
640636
bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
641637
{
642638
return !dev_is_dma_coherent(dev) ||
643-
is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
639+
swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr));
644640
}
645641

646642
/**

kernel/dma/direct.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
5858
{
5959
phys_addr_t paddr = dma_to_phys(dev, addr);
6060

61-
if (unlikely(is_swiotlb_buffer(dev, paddr)))
62-
swiotlb_sync_single_for_device(dev, paddr, size, dir);
61+
swiotlb_sync_single_for_device(dev, paddr, size, dir);
6362

6463
if (!dev_is_dma_coherent(dev))
6564
arch_sync_dma_for_device(paddr, size, dir);
@@ -75,8 +74,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
7574
arch_sync_dma_for_cpu_all();
7675
}
7776

78-
if (unlikely(is_swiotlb_buffer(dev, paddr)))
79-
swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
77+
swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
8078

8179
if (dir == DMA_FROM_DEVICE)
8280
arch_dma_mark_clean(paddr, size);
@@ -121,8 +119,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
121119
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
122120
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
123121

124-
if (unlikely(is_swiotlb_buffer(dev, phys)))
125-
swiotlb_tbl_unmap_single(dev, phys, size, dir,
122+
swiotlb_tbl_unmap_single(dev, phys, size, dir,
126123
attrs | DMA_ATTR_SKIP_CPU_SYNC);
127124
}
128125
#endif /* _KERNEL_DMA_DIRECT_H */

kernel/dma/map_benchmark.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,22 @@ static int map_benchmark_thread(void *data)
8989
atomic64_add(map_sq, &map->sum_sq_map);
9090
atomic64_add(unmap_sq, &map->sum_sq_unmap);
9191
atomic64_inc(&map->loops);
92+
93+
/*
94+
* We may test for a long time so periodically check whether
95+
* we need to schedule to avoid starving the others. Otherwise
96+
* we may hangup the kernel in a non-preemptible kernel when
97+
* the test kthreads number >= CPU number, the test kthreads
98+
* will run endless on every CPU since the thread resposible
99+
* for notifying the kthread stop (in do_map_benchmark())
100+
* could not be scheduled.
101+
*
102+
* Note this may degrade the test concurrency since the test
103+
* threads may need to share the CPU time with other load
104+
* in the system. So it's recommended to run this benchmark
105+
* on an idle system.
106+
*/
107+
cond_resched();
92108
}
93109

94110
out:

0 commit comments

Comments
 (0)