Skip to content

Commit 7296f23

Browse files
mhklinuxChristoph Hellwig
authored andcommitted
swiotlb: reduce swiotlb pool lookups
With CONFIG_SWIOTLB_DYNAMIC enabled, each round-trip map/unmap pair in the swiotlb results in 6 calls to swiotlb_find_pool(). In multiple places, the pool is found and used in one function, and then must be found again in the next function that is called because only the tlb_addr is passed as an argument. These are the six call sites: dma_direct_map_page: 1. swiotlb_map -> swiotlb_tbl_map_single -> swiotlb_bounce dma_direct_unmap_page: 2. dma_direct_sync_single_for_cpu -> is_swiotlb_buffer 3. dma_direct_sync_single_for_cpu -> swiotlb_sync_single_for_cpu -> swiotlb_bounce 4. is_swiotlb_buffer 5. swiotlb_tbl_unmap_single -> swiotlb_del_transient 6. swiotlb_tbl_unmap_single -> swiotlb_release_slots Reduce the number of calls by finding the pool at a higher level, and passing it as an argument instead of searching again. A key change is for is_swiotlb_buffer() to return a pool pointer instead of a boolean, and then pass this pool pointer to subsequent swiotlb functions. There are 9 occurrences of is_swiotlb_buffer() used to test if a buffer is a swiotlb buffer before calling a swiotlb function. To reduce code duplication in getting the pool pointer and passing it as an argument, introduce inline wrappers for this pattern. The generated code is essentially unchanged. Since is_swiotlb_buffer() no longer returns a boolean, rename some functions to reflect the change: * swiotlb_find_pool() becomes __swiotlb_find_pool() * is_swiotlb_buffer() becomes swiotlb_find_pool() * is_xen_swiotlb_buffer() becomes xen_swiotlb_find_pool() With these changes, a round-trip map/unmap pair requires only 2 pool lookups (listed using the new names and wrappers): dma_direct_unmap_page: 1. dma_direct_sync_single_for_cpu -> swiotlb_find_pool 2. swiotlb_tbl_unmap_single -> swiotlb_find_pool These changes come from noticing the inefficiencies in a code review, not from performance measurements. With CONFIG_SWIOTLB_DYNAMIC, __swiotlb_find_pool() is not trivial, and it uses an RCU read lock, so avoiding the redundant calls helps performance in a hot path. When CONFIG_SWIOTLB_DYNAMIC is *not* set, the code size reduction is minimal and the perf benefits are likely negligible, but no harm is done. No functional change is intended. Signed-off-by: Michael Kelley <[email protected]> Reviewed-by: Petr Tesarik <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]>
1 parent 54624ac commit 7296f23

File tree

7 files changed

+129
-106
lines changed

7 files changed

+129
-106
lines changed

drivers/iommu/dma-iommu.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,8 +1081,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
10811081
if (!dev_is_dma_coherent(dev))
10821082
arch_sync_dma_for_cpu(phys, size, dir);
10831083

1084-
if (is_swiotlb_buffer(dev, phys))
1085-
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
1084+
swiotlb_sync_single_for_cpu(dev, phys, size, dir);
10861085
}
10871086

10881087
static void iommu_dma_sync_single_for_device(struct device *dev,
@@ -1094,8 +1093,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
10941093
return;
10951094

10961095
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
1097-
if (is_swiotlb_buffer(dev, phys))
1098-
swiotlb_sync_single_for_device(dev, phys, size, dir);
1096+
swiotlb_sync_single_for_device(dev, phys, size, dir);
10991097

11001098
if (!dev_is_dma_coherent(dev))
11011099
arch_sync_dma_for_device(phys, size, dir);
@@ -1189,7 +1187,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
11891187
arch_sync_dma_for_device(phys, size, dir);
11901188

11911189
iova = __iommu_dma_map(dev, phys, size, prot, dma_mask);
1192-
if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys))
1190+
if (iova == DMA_MAPPING_ERROR)
11931191
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
11941192
return iova;
11951193
}
@@ -1209,8 +1207,7 @@ static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
12091207

12101208
__iommu_dma_unmap(dev, dma_handle, size);
12111209

1212-
if (unlikely(is_swiotlb_buffer(dev, phys)))
1213-
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
1210+
swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
12141211
}
12151212

12161213
/*

drivers/xen/swiotlb-xen.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
8888
return 0;
8989
}
9090

91-
static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
91+
static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev,
92+
dma_addr_t dma_addr)
9293
{
9394
unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
9495
unsigned long xen_pfn = bfn_to_local_pfn(bfn);
@@ -99,8 +100,8 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
99100
* in our domain. Therefore _only_ check address within our domain.
100101
*/
101102
if (pfn_valid(PFN_DOWN(paddr)))
102-
return is_swiotlb_buffer(dev, paddr);
103-
return 0;
103+
return swiotlb_find_pool(dev, paddr);
104+
return NULL;
104105
}
105106

106107
#ifdef CONFIG_X86
@@ -227,8 +228,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
227228
* Ensure that the address returned is DMA'ble
228229
*/
229230
if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
230-
swiotlb_tbl_unmap_single(dev, map, size, dir,
231-
attrs | DMA_ATTR_SKIP_CPU_SYNC);
231+
__swiotlb_tbl_unmap_single(dev, map, size, dir,
232+
attrs | DMA_ATTR_SKIP_CPU_SYNC,
233+
swiotlb_find_pool(dev, map));
232234
return DMA_MAPPING_ERROR;
233235
}
234236

@@ -254,6 +256,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
254256
size_t size, enum dma_data_direction dir, unsigned long attrs)
255257
{
256258
phys_addr_t paddr = xen_dma_to_phys(hwdev, dev_addr);
259+
struct io_tlb_pool *pool;
257260

258261
BUG_ON(dir == DMA_NONE);
259262

@@ -265,15 +268,18 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
265268
}
266269

267270
/* NOTE: We use dev_addr here, not paddr! */
268-
if (is_xen_swiotlb_buffer(hwdev, dev_addr))
269-
swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
271+
pool = xen_swiotlb_find_pool(hwdev, dev_addr);
272+
if (pool)
273+
__swiotlb_tbl_unmap_single(hwdev, paddr, size, dir,
274+
attrs, pool);
270275
}
271276

272277
static void
273278
xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
274279
size_t size, enum dma_data_direction dir)
275280
{
276281
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
282+
struct io_tlb_pool *pool;
277283

278284
if (!dev_is_dma_coherent(dev)) {
279285
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
@@ -282,18 +288,21 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
282288
xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
283289
}
284290

285-
if (is_xen_swiotlb_buffer(dev, dma_addr))
286-
swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
291+
pool = xen_swiotlb_find_pool(dev, dma_addr);
292+
if (pool)
293+
__swiotlb_sync_single_for_cpu(dev, paddr, size, dir, pool);
287294
}
288295

289296
static void
290297
xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
291298
size_t size, enum dma_data_direction dir)
292299
{
293300
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
301+
struct io_tlb_pool *pool;
294302

295-
if (is_xen_swiotlb_buffer(dev, dma_addr))
296-
swiotlb_sync_single_for_device(dev, paddr, size, dir);
303+
pool = xen_swiotlb_find_pool(dev, dma_addr);
304+
if (pool)
305+
__swiotlb_sync_single_for_device(dev, paddr, size, dir, pool);
297306

298307
if (!dev_is_dma_coherent(dev)) {
299308
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))

include/linux/scatterlist.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
332332
* Description:
333333
* Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all
334334
* elements may have been bounced, so the caller would have to check
335-
* individual SG entries with is_swiotlb_buffer().
335+
* individual SG entries with swiotlb_find_pool().
336336
*/
337337
static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
338338
{

include/linux/swiotlb.h

Lines changed: 62 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
4242
int (*remap)(void *tlb, unsigned long nslabs));
4343
extern void __init swiotlb_update_mem_attributes(void);
4444

45-
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
46-
size_t mapping_size,
47-
unsigned int alloc_aligned_mask, enum dma_data_direction dir,
48-
unsigned long attrs);
49-
50-
extern void swiotlb_tbl_unmap_single(struct device *hwdev,
51-
phys_addr_t tlb_addr,
52-
size_t mapping_size,
53-
enum dma_data_direction dir,
54-
unsigned long attrs);
55-
56-
void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
57-
size_t size, enum dma_data_direction dir);
58-
void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
59-
size_t size, enum dma_data_direction dir);
60-
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
61-
size_t size, enum dma_data_direction dir, unsigned long attrs);
62-
6345
#ifdef CONFIG_SWIOTLB
6446

6547
/**
@@ -143,37 +125,27 @@ struct io_tlb_mem {
143125
#endif
144126
};
145127

146-
#ifdef CONFIG_SWIOTLB_DYNAMIC
147-
148-
struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
149-
150-
#else
151-
152-
static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
153-
phys_addr_t paddr)
154-
{
155-
return &dev->dma_io_tlb_mem->defpool;
156-
}
157-
158-
#endif
128+
struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr);
159129

160130
/**
161-
* is_swiotlb_buffer() - check if a physical address belongs to a swiotlb
131+
* swiotlb_find_pool() - find swiotlb pool to which a physical address belongs
162132
* @dev: Device which has mapped the buffer.
163133
* @paddr: Physical address within the DMA buffer.
164134
*
165-
* Check if @paddr points into a bounce buffer.
135+
* Find the swiotlb pool that @paddr points into.
166136
*
167137
* Return:
168-
* * %true if @paddr points into a bounce buffer
169-
* * %false otherwise
138+
* * pool address if @paddr points into a bounce buffer
139+
* * NULL if @paddr does not point into a bounce buffer. As such, this function
140+
* can be used to determine if @paddr denotes a swiotlb bounce buffer.
170141
*/
171-
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
142+
static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
143+
phys_addr_t paddr)
172144
{
173145
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
174146

175147
if (!mem)
176-
return false;
148+
return NULL;
177149

178150
#ifdef CONFIG_SWIOTLB_DYNAMIC
179151
/*
@@ -182,16 +154,19 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
182154
* If a SWIOTLB address is checked on another CPU, then it was
183155
* presumably loaded by the device driver from an unspecified private
184156
* data structure. Make sure that this load is ordered before reading
185-
* dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool().
157+
* dev->dma_uses_io_tlb here and mem->pools in __swiotlb_find_pool().
186158
*
187159
* This barrier pairs with smp_mb() in swiotlb_find_slots().
188160
*/
189161
smp_rmb();
190-
return READ_ONCE(dev->dma_uses_io_tlb) &&
191-
swiotlb_find_pool(dev, paddr);
162+
if (READ_ONCE(dev->dma_uses_io_tlb))
163+
return __swiotlb_find_pool(dev, paddr);
192164
#else
193-
return paddr >= mem->defpool.start && paddr < mem->defpool.end;
165+
if (paddr >= mem->defpool.start && paddr < mem->defpool.end)
166+
return &mem->defpool;
194167
#endif
168+
169+
return NULL;
195170
}
196171

197172
static inline bool is_swiotlb_force_bounce(struct device *dev)
@@ -219,9 +194,10 @@ static inline void swiotlb_dev_init(struct device *dev)
219194
{
220195
}
221196

222-
static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
197+
static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev,
198+
phys_addr_t paddr)
223199
{
224-
return false;
200+
return NULL;
225201
}
226202
static inline bool is_swiotlb_force_bounce(struct device *dev)
227203
{
@@ -260,6 +236,49 @@ static inline phys_addr_t default_swiotlb_limit(void)
260236
}
261237
#endif /* CONFIG_SWIOTLB */
262238

239+
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
240+
size_t mapping_size, unsigned int alloc_aligned_mask,
241+
enum dma_data_direction dir, unsigned long attrs);
242+
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
243+
size_t size, enum dma_data_direction dir, unsigned long attrs);
244+
245+
void __swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
246+
size_t mapping_size, enum dma_data_direction dir,
247+
unsigned long attrs, struct io_tlb_pool *pool);
248+
static inline void swiotlb_tbl_unmap_single(struct device *dev,
249+
phys_addr_t addr, size_t size, enum dma_data_direction dir,
250+
unsigned long attrs)
251+
{
252+
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
253+
254+
if (unlikely(pool))
255+
__swiotlb_tbl_unmap_single(dev, addr, size, dir, attrs, pool);
256+
}
257+
258+
void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
259+
size_t size, enum dma_data_direction dir,
260+
struct io_tlb_pool *pool);
261+
static inline void swiotlb_sync_single_for_device(struct device *dev,
262+
phys_addr_t addr, size_t size, enum dma_data_direction dir)
263+
{
264+
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
265+
266+
if (unlikely(pool))
267+
__swiotlb_sync_single_for_device(dev, addr, size, dir, pool);
268+
}
269+
270+
void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
271+
size_t size, enum dma_data_direction dir,
272+
struct io_tlb_pool *pool);
273+
static inline void swiotlb_sync_single_for_cpu(struct device *dev,
274+
phys_addr_t addr, size_t size, enum dma_data_direction dir)
275+
{
276+
struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr);
277+
278+
if (unlikely(pool))
279+
__swiotlb_sync_single_for_cpu(dev, addr, size, dir, pool);
280+
}
281+
263282
extern void swiotlb_print_info(void);
264283

265284
#ifdef CONFIG_DMA_RESTRICTED_POOL

kernel/dma/direct.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -404,9 +404,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
404404
for_each_sg(sgl, sg, nents, i) {
405405
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
406406

407-
if (unlikely(is_swiotlb_buffer(dev, paddr)))
408-
swiotlb_sync_single_for_device(dev, paddr, sg->length,
409-
dir);
407+
swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
410408

411409
if (!dev_is_dma_coherent(dev))
412410
arch_sync_dma_for_device(paddr, sg->length,
@@ -430,9 +428,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
430428
if (!dev_is_dma_coherent(dev))
431429
arch_sync_dma_for_cpu(paddr, sg->length, dir);
432430

433-
if (unlikely(is_swiotlb_buffer(dev, paddr)))
434-
swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
435-
dir);
431+
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
436432

437433
if (dir == DMA_FROM_DEVICE)
438434
arch_dma_mark_clean(paddr, sg->length);
@@ -640,7 +636,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
640636
bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
641637
{
642638
return !dev_is_dma_coherent(dev) ||
643-
is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
639+
swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr));
644640
}
645641

646642
/**

kernel/dma/direct.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
5858
{
5959
phys_addr_t paddr = dma_to_phys(dev, addr);
6060

61-
if (unlikely(is_swiotlb_buffer(dev, paddr)))
62-
swiotlb_sync_single_for_device(dev, paddr, size, dir);
61+
swiotlb_sync_single_for_device(dev, paddr, size, dir);
6362

6463
if (!dev_is_dma_coherent(dev))
6564
arch_sync_dma_for_device(paddr, size, dir);
@@ -75,8 +74,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
7574
arch_sync_dma_for_cpu_all();
7675
}
7776

78-
if (unlikely(is_swiotlb_buffer(dev, paddr)))
79-
swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
77+
swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
8078

8179
if (dir == DMA_FROM_DEVICE)
8280
arch_dma_mark_clean(paddr, size);
@@ -121,8 +119,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
121119
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
122120
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
123121

124-
if (unlikely(is_swiotlb_buffer(dev, phys)))
125-
swiotlb_tbl_unmap_single(dev, phys, size, dir,
122+
swiotlb_tbl_unmap_single(dev, phys, size, dir,
126123
attrs | DMA_ATTR_SKIP_CPU_SYNC);
127124
}
128125
#endif /* _KERNEL_DMA_DIRECT_H */

0 commit comments

Comments
 (0)