Skip to content

Commit 705c1cd

Browse files
TinaZhangZWjoergroedel
authored andcommitted
iommu/vt-d: Introduce batched cache invalidation
Converts IOTLB and Dev-IOTLB invalidation to a batched model. Cache tag invalidation requests for a domain are now accumulated in a qi_batch structure before being flushed in bulk. It replaces the previous per- request qi_flush approach with a more efficient batching mechanism. Co-developed-by: Lu Baolu <[email protected]> Signed-off-by: Lu Baolu <[email protected]> Signed-off-by: Tina Zhang <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent 777cdd8 commit 705c1cd

File tree

1 file changed

+107
-15
lines changed

1 file changed

+107
-15
lines changed

drivers/iommu/intel/cache.c

Lines changed: 107 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,79 @@ static unsigned long calculate_psi_aligned_address(unsigned long start,
262262
return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
263263
}
264264

265+
static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
266+
{
267+
if (!iommu || !batch->index)
268+
return;
269+
270+
qi_submit_sync(iommu, batch->descs, batch->index, 0);
271+
272+
/* Reset the index value and clean the whole batch buffer. */
273+
memset(batch, 0, sizeof(*batch));
274+
}
275+
276+
static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
277+
{
278+
if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
279+
qi_batch_flush_descs(iommu, batch);
280+
}
281+
282+
static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
283+
unsigned int size_order, u64 type,
284+
struct qi_batch *batch)
285+
{
286+
qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
287+
qi_batch_increment_index(iommu, batch);
288+
}
289+
290+
static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
291+
u16 qdep, u64 addr, unsigned int mask,
292+
struct qi_batch *batch)
293+
{
294+
/*
295+
* According to VT-d spec, software is recommended to not submit any Device-TLB
296+
* invalidation requests while address remapping hardware is disabled.
297+
*/
298+
if (!(iommu->gcmd & DMA_GCMD_TE))
299+
return;
300+
301+
qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
302+
qi_batch_increment_index(iommu, batch);
303+
}
304+
305+
static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
306+
u64 addr, unsigned long npages, bool ih,
307+
struct qi_batch *batch)
308+
{
309+
/*
310+
* npages == -1 means a PASID-selective invalidation, otherwise,
311+
* a positive value for Page-selective-within-PASID invalidation.
312+
* 0 is not a valid input.
313+
*/
314+
if (!npages)
315+
return;
316+
317+
qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
318+
qi_batch_increment_index(iommu, batch);
319+
}
320+
321+
static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
322+
u32 pasid, u16 qdep, u64 addr,
323+
unsigned int size_order, struct qi_batch *batch)
324+
{
325+
/*
326+
* According to VT-d spec, software is recommended to not submit any
327+
* Device-TLB invalidation requests while address remapping hardware
328+
* is disabled.
329+
*/
330+
if (!(iommu->gcmd & DMA_GCMD_TE))
331+
return;
332+
333+
qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
334+
&batch->descs[batch->index]);
335+
qi_batch_increment_index(iommu, batch);
336+
}
337+
265338
static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
266339
unsigned long addr, unsigned long pages,
267340
unsigned long mask, int ih)
@@ -270,7 +343,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
270343
u64 type = DMA_TLB_PSI_FLUSH;
271344

272345
if (domain->use_first_level) {
273-
qi_flush_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih);
346+
qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
347+
pages, ih, domain->qi_batch);
274348
return;
275349
}
276350

@@ -287,7 +361,8 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
287361
}
288362

289363
if (ecap_qis(iommu->ecap))
290-
qi_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
364+
qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
365+
domain->qi_batch);
291366
else
292367
__iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
293368
}
@@ -303,19 +378,20 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_
303378
sid = PCI_DEVID(info->bus, info->devfn);
304379

305380
if (tag->pasid == IOMMU_NO_PASID) {
306-
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
307-
addr, mask);
381+
qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
382+
addr, mask, domain->qi_batch);
308383
if (info->dtlb_extra_inval)
309-
qi_flush_dev_iotlb(iommu, sid, info->pfsid,
310-
info->ats_qdep, addr, mask);
384+
qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
385+
addr, mask, domain->qi_batch);
311386
return;
312387
}
313388

314-
qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
315-
info->ats_qdep, addr, mask);
389+
qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
390+
info->ats_qdep, addr, mask, domain->qi_batch);
316391
if (info->dtlb_extra_inval)
317-
qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, tag->pasid,
318-
info->ats_qdep, addr, mask);
392+
qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
393+
info->ats_qdep, addr, mask,
394+
domain->qi_batch);
319395
}
320396

321397
static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
@@ -327,11 +403,11 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_
327403
info = dev_iommu_priv_get(tag->dev);
328404
sid = PCI_DEVID(info->bus, info->devfn);
329405

330-
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
331-
MAX_AGAW_PFN_WIDTH);
406+
qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
407+
MAX_AGAW_PFN_WIDTH, domain->qi_batch);
332408
if (info->dtlb_extra_inval)
333-
qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
334-
MAX_AGAW_PFN_WIDTH);
409+
qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
410+
MAX_AGAW_PFN_WIDTH, domain->qi_batch);
335411
}
336412

337413
/*
@@ -341,6 +417,7 @@ static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_
341417
void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
342418
unsigned long end, int ih)
343419
{
420+
struct intel_iommu *iommu = NULL;
344421
unsigned long pages, mask, addr;
345422
struct cache_tag *tag;
346423
unsigned long flags;
@@ -349,6 +426,10 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
349426

350427
spin_lock_irqsave(&domain->cache_lock, flags);
351428
list_for_each_entry(tag, &domain->cache_tags, node) {
429+
if (iommu && iommu != tag->iommu)
430+
qi_batch_flush_descs(iommu, domain->qi_batch);
431+
iommu = tag->iommu;
432+
352433
switch (tag->type) {
353434
case CACHE_TAG_IOTLB:
354435
case CACHE_TAG_NESTING_IOTLB:
@@ -372,6 +453,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
372453

373454
trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
374455
}
456+
qi_batch_flush_descs(iommu, domain->qi_batch);
375457
spin_unlock_irqrestore(&domain->cache_lock, flags);
376458
}
377459

@@ -381,11 +463,16 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
381463
*/
382464
void cache_tag_flush_all(struct dmar_domain *domain)
383465
{
466+
struct intel_iommu *iommu = NULL;
384467
struct cache_tag *tag;
385468
unsigned long flags;
386469

387470
spin_lock_irqsave(&domain->cache_lock, flags);
388471
list_for_each_entry(tag, &domain->cache_tags, node) {
472+
if (iommu && iommu != tag->iommu)
473+
qi_batch_flush_descs(iommu, domain->qi_batch);
474+
iommu = tag->iommu;
475+
389476
switch (tag->type) {
390477
case CACHE_TAG_IOTLB:
391478
case CACHE_TAG_NESTING_IOTLB:
@@ -399,6 +486,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
399486

400487
trace_cache_tag_flush_all(tag);
401488
}
489+
qi_batch_flush_descs(iommu, domain->qi_batch);
402490
spin_unlock_irqrestore(&domain->cache_lock, flags);
403491
}
404492

@@ -416,6 +504,7 @@ void cache_tag_flush_all(struct dmar_domain *domain)
416504
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
417505
unsigned long end)
418506
{
507+
struct intel_iommu *iommu = NULL;
419508
unsigned long pages, mask, addr;
420509
struct cache_tag *tag;
421510
unsigned long flags;
@@ -424,7 +513,9 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
424513

425514
spin_lock_irqsave(&domain->cache_lock, flags);
426515
list_for_each_entry(tag, &domain->cache_tags, node) {
427-
struct intel_iommu *iommu = tag->iommu;
516+
if (iommu && iommu != tag->iommu)
517+
qi_batch_flush_descs(iommu, domain->qi_batch);
518+
iommu = tag->iommu;
428519

429520
if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
430521
iommu_flush_write_buffer(iommu);
@@ -437,5 +528,6 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
437528

438529
trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
439530
}
531+
qi_batch_flush_descs(iommu, domain->qi_batch);
440532
spin_unlock_irqrestore(&domain->cache_lock, flags);
441533
}

0 commit comments

Comments
 (0)