Skip to content

Commit af13356

Browse files
Petr TesarikChristoph Hellwig
authored andcommitted
swiotlb: extend buffer pre-padding to alloc_align_mask if necessary
Allow a buffer pre-padding of up to alloc_align_mask, even if it requires allocating additional IO TLB slots. If the allocation alignment is bigger than IO_TLB_SIZE and min_align_mask covers any non-zero bits in the original address between IO_TLB_SIZE and alloc_align_mask, these bits are not preserved in the swiotlb buffer address. To fix this case, increase the allocation size and use a larger offset within the allocated buffer. As a result, extra padding slots may be allocated before the mapping start address. Leave orig_addr in these padding slots initialized to INVALID_PHYS_ADDR. These slots do not correspond to any CPU buffer, so attempts to sync the data should be ignored. The padding slots should be automatically released when the buffer is unmapped. However, swiotlb_tbl_unmap_single() takes only the address of the DMA buffer slot, not the first padding slot. Save the number of padding slots in struct io_tlb_slot and use it to adjust the slot index in swiotlb_release_slots(), so all allocated slots are properly freed. Fixes: 2fd4fa5 ("swiotlb: Fix alignment checks when both allocation and DMA masks are present") Link: https://lore.kernel.org/linux-iommu/[email protected]/ Signed-off-by: Petr Tesarik <[email protected]> Reviewed-by: Michael Kelley <[email protected]> Tested-by: Michael Kelley <[email protected]> Signed-off-by: Christoph Hellwig <[email protected]>
1 parent 026e680 commit af13356

File tree

1 file changed

+46
-13
lines changed

1 file changed

+46
-13
lines changed

kernel/dma/swiotlb.c

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,14 @@
6969
* @alloc_size: Size of the allocated buffer.
7070
* @list: The free list describing the number of free entries available
7171
* from each index.
72+
* @pad_slots: Number of preceding padding slots. Valid only in the first
73+
* allocated non-padding slot.
7274
*/
7375
struct io_tlb_slot {
7476
phys_addr_t orig_addr;
7577
size_t alloc_size;
76-
unsigned int list;
78+
unsigned short list;
79+
unsigned short pad_slots;
7780
};
7881

7982
static bool swiotlb_force_bounce;
@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
287290
mem->nslabs - i);
288291
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
289292
mem->slots[i].alloc_size = 0;
293+
mem->slots[i].pad_slots = 0;
290294
}
291295

292296
memset(vaddr, 0, bytes);
@@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev)
821825
#endif
822826
}
823827

824-
/*
825-
* Return the offset into a iotlb slot required to keep the device happy.
828+
/**
829+
* swiotlb_align_offset() - Get required offset into an IO TLB allocation.
830+
* @dev: Owning device.
831+
* @align_mask: Allocation alignment mask.
832+
* @addr: DMA address.
833+
*
834+
* Return the minimum offset from the start of an IO TLB allocation which is
835+
* required for a given buffer address and allocation alignment to keep the
836+
* device happy.
837+
*
838+
* First, the address bits covered by min_align_mask must be identical in the
839+
* original address and the bounce buffer address. High bits are preserved by
840+
* choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
841+
* padding bytes before the bounce buffer.
842+
*
843+
* Second, @align_mask specifies which bits of the first allocated slot must
844+
* be zero. This may require allocating additional padding slots, and then the
845+
* offset (in bytes) from the first such padding slot is returned.
826846
*/
827-
static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
847+
static unsigned int swiotlb_align_offset(struct device *dev,
848+
unsigned int align_mask, u64 addr)
828849
{
829-
return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
850+
return addr & dma_get_min_align_mask(dev) &
851+
(align_mask | (IO_TLB_SIZE - 1));
830852
}
831853

832854
/*
@@ -847,7 +869,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
847869
return;
848870

849871
tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
850-
orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
872+
orig_addr_offset = swiotlb_align_offset(dev, 0, orig_addr);
851873
if (tlb_offset < orig_addr_offset) {
852874
dev_WARN_ONCE(dev, 1,
853875
"Access before mapping start detected. orig offset %u, requested offset %u.\n",
@@ -1005,7 +1027,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
10051027
unsigned long max_slots = get_max_slots(boundary_mask);
10061028
unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
10071029
unsigned int nslots = nr_slots(alloc_size), stride;
1008-
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
1030+
unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
10091031
unsigned int index, slots_checked, count = 0, i;
10101032
unsigned long flags;
10111033
unsigned int slot_base;
@@ -1328,11 +1350,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
13281350
unsigned long attrs)
13291351
{
13301352
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
1331-
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
1353+
unsigned int offset;
13321354
struct io_tlb_pool *pool;
13331355
unsigned int i;
13341356
int index;
13351357
phys_addr_t tlb_addr;
1358+
unsigned short pad_slots;
13361359

13371360
if (!mem || !mem->nslabs) {
13381361
dev_warn_ratelimited(dev,
@@ -1349,6 +1372,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
13491372
return (phys_addr_t)DMA_MAPPING_ERROR;
13501373
}
13511374

1375+
offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
13521376
index = swiotlb_find_slots(dev, orig_addr,
13531377
alloc_size + offset, alloc_align_mask, &pool);
13541378
if (index == -1) {
@@ -1364,6 +1388,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
13641388
* This is needed when we sync the memory. Then we sync the buffer if
13651389
* needed.
13661390
*/
1391+
pad_slots = offset >> IO_TLB_SHIFT;
1392+
offset &= (IO_TLB_SIZE - 1);
1393+
index += pad_slots;
1394+
pool->slots[index].pad_slots = pad_slots;
13671395
for (i = 0; i < nr_slots(alloc_size + offset); i++)
13681396
pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
13691397
tlb_addr = slot_addr(pool->start, index) + offset;
@@ -1384,13 +1412,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
13841412
{
13851413
struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
13861414
unsigned long flags;
1387-
unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
1388-
int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
1389-
int nslots = nr_slots(mem->slots[index].alloc_size + offset);
1390-
int aindex = index / mem->area_nslabs;
1391-
struct io_tlb_area *area = &mem->areas[aindex];
1415+
unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
1416+
int index, nslots, aindex;
1417+
struct io_tlb_area *area;
13921418
int count, i;
13931419

1420+
index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
1421+
index -= mem->slots[index].pad_slots;
1422+
nslots = nr_slots(mem->slots[index].alloc_size + offset);
1423+
aindex = index / mem->area_nslabs;
1424+
area = &mem->areas[aindex];
1425+
13941426
/*
13951427
* Return the buffer to the free list by setting the corresponding
13961428
* entries to indicate the number of contiguous entries available.
@@ -1413,6 +1445,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
14131445
mem->slots[i].list = ++count;
14141446
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
14151447
mem->slots[i].alloc_size = 0;
1448+
mem->slots[i].pad_slots = 0;
14161449
}
14171450

14181451
/*

0 commit comments

Comments
 (0)