Skip to content

Commit 84b2baf

Browse files
ashishmhetre8joergroedel
authored andcommitted
iommu/io-pgtable-arm: Optimise non-coherent unmap
The current __arm_lpae_unmap() function calls dma_sync() on individual PTEs after clearing them. Overall unmap performance can be improved by around 25% for large buffer sizes by combining the syncs for adjacent leaf entries. Optimize the unmap time by clearing all the leaf entries and issuing a single dma_sync() for them. Below is detailed analysis of average unmap latency(in us) with and without this optimization obtained by running dma_map_benchmark for different buffer sizes. UnMap Latency(us) Size Without With % gain with optimiztion optimization optimization 4KB 3 3 0 8KB 4 3.8 5 16KB 6.1 5.4 11.48 32KB 10.2 8.5 16.67 64KB 18.5 14.9 19.46 128KB 35 27.5 21.43 256KB 67.5 52.2 22.67 512KB 127.9 97.2 24.00 1MB 248.6 187.4 24.62 2MB 65.5 65.5 0 4MB 119.2 119 0.17 Reviewed-by: Robin Murphy <[email protected]> Signed-off-by: Ashish Mhetre <[email protected]> Acked-by: Will Deacon <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent 6c17c7d commit 84b2baf

File tree

1 file changed

+17
-14
lines changed

1 file changed

+17
-14
lines changed

drivers/iommu/io-pgtable-arm.c

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -274,13 +274,13 @@ static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
274274
sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
275275
}
276276

277-
static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg)
277+
static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
278278
{
279+
for (int i = 0; i < num_entries; i++)
280+
ptep[i] = 0;
279281

280-
*ptep = 0;
281-
282-
if (!cfg->coherent_walk)
283-
__arm_lpae_sync_pte(ptep, 1, cfg);
282+
if (!cfg->coherent_walk && num_entries)
283+
__arm_lpae_sync_pte(ptep, num_entries, cfg);
284284
}
285285

286286
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
@@ -654,26 +654,29 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
654654
max_entries = ARM_LPAE_PTES_PER_TABLE(data) - unmap_idx_start;
655655
num_entries = min_t(int, pgcount, max_entries);
656656

657-
while (i < num_entries) {
658-
pte = READ_ONCE(*ptep);
657+
/* Find and handle non-leaf entries */
658+
for (i = 0; i < num_entries; i++) {
659+
pte = READ_ONCE(ptep[i]);
659660
if (WARN_ON(!pte))
660661
break;
661662

662-
__arm_lpae_clear_pte(ptep, &iop->cfg);
663-
664663
if (!iopte_leaf(pte, lvl, iop->fmt)) {
664+
__arm_lpae_clear_pte(&ptep[i], &iop->cfg, 1);
665+
665666
/* Also flush any partial walks */
666667
io_pgtable_tlb_flush_walk(iop, iova + i * size, size,
667668
ARM_LPAE_GRANULE(data));
668669
__arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data));
669-
} else if (!iommu_iotlb_gather_queued(gather)) {
670-
io_pgtable_tlb_add_page(iop, gather, iova + i * size, size);
671670
}
672-
673-
ptep++;
674-
i++;
675671
}
676672

673+
/* Clear the remaining entries */
674+
__arm_lpae_clear_pte(ptep, &iop->cfg, i);
675+
676+
if (gather && !iommu_iotlb_gather_queued(gather))
677+
for (int j = 0; j < i; j++)
678+
io_pgtable_tlb_add_page(iop, gather, iova + j * size, size);
679+
677680
return i * size;
678681
} else if (iopte_leaf(pte, lvl, iop->fmt)) {
679682
/*

0 commit comments

Comments
 (0)