Skip to content

Commit 21c1f90

Browse files
niklas88joergroedel
authored andcommitted
s390/pci: use lock-free I/O translation updates
I/O translation tables on s390 use 8 byte page table entries and tables which are allocated lazily but only freed when the entire I/O translation table is torn down. Also each IOVA can at any time only translate to one physical address Furthermore I/O table accesses by the IOMMU hardware are cache coherent. With a bit of care we can thus use atomic updates to manipulate the translation table without having to use a global lock at all. This is done analogous to the existing I/O translation table handling code used on Intel and AMD x86 systems. Signed-off-by: Niklas Schnelle <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Joerg Roedel <[email protected]>
1 parent 08955af commit 21c1f90

File tree

3 files changed

+58
-54
lines changed

3 files changed

+58
-54
lines changed

arch/s390/include/asm/pci.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ struct zpci_dev {
157157

158158
/* DMA stuff */
159159
unsigned long *dma_table;
160-
spinlock_t dma_table_lock;
161160
int tlb_refresh;
162161

163162
spinlock_t iommu_bitmap_lock;

arch/s390/pci/pci_dma.c

Lines changed: 45 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -63,37 +63,55 @@ static void dma_free_page_table(void *table)
6363
kmem_cache_free(dma_page_table_cache, table);
6464
}
6565

66-
static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
66+
static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
6767
{
68+
unsigned long old_rte, rte;
6869
unsigned long *sto;
6970

70-
if (reg_entry_isvalid(*entry))
71-
sto = get_rt_sto(*entry);
72-
else {
71+
rte = READ_ONCE(*rtep);
72+
if (reg_entry_isvalid(rte)) {
73+
sto = get_rt_sto(rte);
74+
} else {
7375
sto = dma_alloc_cpu_table();
7476
if (!sto)
7577
return NULL;
7678

77-
set_rt_sto(entry, virt_to_phys(sto));
78-
validate_rt_entry(entry);
79-
entry_clr_protected(entry);
79+
set_rt_sto(&rte, virt_to_phys(sto));
80+
validate_rt_entry(&rte);
81+
entry_clr_protected(&rte);
82+
83+
old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
84+
if (old_rte != ZPCI_TABLE_INVALID) {
85+
/* Somone else was faster, use theirs */
86+
dma_free_cpu_table(sto);
87+
sto = get_rt_sto(old_rte);
88+
}
8089
}
8190
return sto;
8291
}
8392

84-
static unsigned long *dma_get_page_table_origin(unsigned long *entry)
93+
static unsigned long *dma_get_page_table_origin(unsigned long *step)
8594
{
95+
unsigned long old_ste, ste;
8696
unsigned long *pto;
8797

88-
if (reg_entry_isvalid(*entry))
89-
pto = get_st_pto(*entry);
90-
else {
98+
ste = READ_ONCE(*step);
99+
if (reg_entry_isvalid(ste)) {
100+
pto = get_st_pto(ste);
101+
} else {
91102
pto = dma_alloc_page_table();
92103
if (!pto)
93104
return NULL;
94-
set_st_pto(entry, virt_to_phys(pto));
95-
validate_st_entry(entry);
96-
entry_clr_protected(entry);
105+
set_st_pto(&ste, virt_to_phys(pto));
106+
validate_st_entry(&ste);
107+
entry_clr_protected(&ste);
108+
109+
old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
110+
if (old_ste != ZPCI_TABLE_INVALID) {
111+
/* Somone else was faster, use theirs */
112+
dma_free_page_table(pto);
113+
pto = get_st_pto(old_ste);
114+
}
97115
}
98116
return pto;
99117
}
@@ -117,38 +135,39 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
117135
return &pto[px];
118136
}
119137

120-
void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags)
138+
void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
121139
{
140+
unsigned long pte;
141+
142+
pte = READ_ONCE(*ptep);
122143
if (flags & ZPCI_PTE_INVALID) {
123-
invalidate_pt_entry(entry);
144+
invalidate_pt_entry(&pte);
124145
} else {
125-
set_pt_pfaa(entry, page_addr);
126-
validate_pt_entry(entry);
146+
set_pt_pfaa(&pte, page_addr);
147+
validate_pt_entry(&pte);
127148
}
128149

129150
if (flags & ZPCI_TABLE_PROTECTED)
130-
entry_set_protected(entry);
151+
entry_set_protected(&pte);
131152
else
132-
entry_clr_protected(entry);
153+
entry_clr_protected(&pte);
154+
155+
xchg(ptep, pte);
133156
}
134157

135158
static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
136159
dma_addr_t dma_addr, size_t size, int flags)
137160
{
138161
unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
139162
phys_addr_t page_addr = (pa & PAGE_MASK);
140-
unsigned long irq_flags;
141163
unsigned long *entry;
142164
int i, rc = 0;
143165

144166
if (!nr_pages)
145167
return -EINVAL;
146168

147-
spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
148-
if (!zdev->dma_table) {
149-
rc = -EINVAL;
150-
goto out_unlock;
151-
}
169+
if (!zdev->dma_table)
170+
return -EINVAL;
152171

153172
for (i = 0; i < nr_pages; i++) {
154173
entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
@@ -173,8 +192,6 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
173192
dma_update_cpu_trans(entry, page_addr, flags);
174193
}
175194
}
176-
out_unlock:
177-
spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
178195
return rc;
179196
}
180197

@@ -558,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
558575
WARN_ON(zdev->s390_domain);
559576

560577
spin_lock_init(&zdev->iommu_bitmap_lock);
561-
spin_lock_init(&zdev->dma_table_lock);
562578

563579
zdev->dma_table = dma_alloc_cpu_table();
564580
if (!zdev->dma_table) {

drivers/iommu/s390-iommu.c

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ struct s390_domain {
2020
struct iommu_domain domain;
2121
struct list_head devices;
2222
unsigned long *dma_table;
23-
spinlock_t dma_table_lock;
2423
spinlock_t list_lock;
2524
struct rcu_head rcu;
2625
};
@@ -62,7 +61,6 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
6261
s390_domain->domain.geometry.aperture_start = 0;
6362
s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
6463

65-
spin_lock_init(&s390_domain->dma_table_lock);
6664
spin_lock_init(&s390_domain->list_lock);
6765
INIT_LIST_HEAD_RCU(&s390_domain->devices);
6866

@@ -265,14 +263,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
265263
unsigned long nr_pages, int flags)
266264
{
267265
phys_addr_t page_addr = pa & PAGE_MASK;
268-
unsigned long irq_flags, i;
269266
unsigned long *entry;
267+
unsigned long i;
270268
int rc;
271269

272-
if (!nr_pages)
273-
return 0;
274-
275-
spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
276270
for (i = 0; i < nr_pages; i++) {
277271
entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
278272
if (unlikely(!entry)) {
@@ -283,7 +277,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
283277
page_addr += PAGE_SIZE;
284278
dma_addr += PAGE_SIZE;
285279
}
286-
spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
287280

288281
return 0;
289282

@@ -296,22 +289,17 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
296289
break;
297290
dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
298291
}
299-
spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
300292

301293
return rc;
302294
}
303295

304296
static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
305297
dma_addr_t dma_addr, unsigned long nr_pages)
306298
{
307-
unsigned long irq_flags, i;
308299
unsigned long *entry;
300+
unsigned long i;
309301
int rc = 0;
310302

311-
if (!nr_pages)
312-
return 0;
313-
314-
spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
315303
for (i = 0; i < nr_pages; i++) {
316304
entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
317305
if (unlikely(!entry)) {
@@ -321,7 +309,6 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
321309
dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
322310
dma_addr += PAGE_SIZE;
323311
}
324-
spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
325312

326313
return rc;
327314
}
@@ -363,7 +350,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
363350
dma_addr_t iova)
364351
{
365352
struct s390_domain *s390_domain = to_s390_domain(domain);
366-
unsigned long *sto, *pto, *rto, flags;
353+
unsigned long *rto, *sto, *pto;
354+
unsigned long ste, pte, rte;
367355
unsigned int rtx, sx, px;
368356
phys_addr_t phys = 0;
369357

@@ -376,16 +364,17 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
376364
px = calc_px(iova);
377365
rto = s390_domain->dma_table;
378366

379-
spin_lock_irqsave(&s390_domain->dma_table_lock, flags);
380-
if (rto && reg_entry_isvalid(rto[rtx])) {
381-
sto = get_rt_sto(rto[rtx]);
382-
if (sto && reg_entry_isvalid(sto[sx])) {
383-
pto = get_st_pto(sto[sx]);
384-
if (pto && pt_entry_isvalid(pto[px]))
385-
phys = pto[px] & ZPCI_PTE_ADDR_MASK;
367+
rte = READ_ONCE(rto[rtx]);
368+
if (reg_entry_isvalid(rte)) {
369+
sto = get_rt_sto(rte);
370+
ste = READ_ONCE(sto[sx]);
371+
if (reg_entry_isvalid(ste)) {
372+
pto = get_st_pto(ste);
373+
pte = READ_ONCE(pto[px]);
374+
if (pt_entry_isvalid(pte))
375+
phys = pte & ZPCI_PTE_ADDR_MASK;
386376
}
387377
}
388-
spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);
389378

390379
return phys;
391380
}

0 commit comments

Comments
 (0)