Skip to content

Commit cd6cf06

Browse files
committed
genirq/msi: Convert storage to xarray
The current linked list storage for MSI descriptors is suboptimal in several ways: 1) Looking up a MSI desciptor requires a O(n) list walk in the worst case 2) The upcoming support of runtime expansion of MSI-X vectors would need to do a full list walk to figure out whether a particular index is already associated. 3) Runtime expansion of sparse allocations is even more complex as the current implementation assumes an ordered list (increasing MSI index). Use an xarray which solves all of the above problems nicely. Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Michael Kelley <[email protected]> Tested-by: Nishanth Menon <[email protected]> Reviewed-by: Greg Kroah-Hartman <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent bf5e758 commit cd6cf06

File tree

2 files changed

+83
-99
lines changed

2 files changed

+83
-99
lines changed

include/linux/msi.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818

1919
#include <linux/cpumask.h>
20+
#include <linux/xarray.h>
2021
#include <linux/mutex.h>
2122
#include <linux/list.h>
2223
#include <asm/msi.h>
@@ -123,7 +124,6 @@ struct pci_msi_desc {
123124

124125
/**
125126
* struct msi_desc - Descriptor structure for MSI based interrupts
126-
* @list: List head for management
127127
* @irq: The base interrupt number
128128
* @nvec_used: The number of vectors used
129129
* @dev: Pointer to the device which uses this descriptor
@@ -140,7 +140,6 @@ struct pci_msi_desc {
140140
*/
141141
struct msi_desc {
142142
/* Shared device/bus type independent data */
143-
struct list_head list;
144143
unsigned int irq;
145144
unsigned int nvec_used;
146145
struct device *dev;
@@ -176,16 +175,16 @@ enum msi_desc_filter {
176175
* msi_device_data - MSI per device data
177176
* @properties: MSI properties which are interesting to drivers
178177
* @platform_data: Platform-MSI specific data
179-
* @list: List of MSI descriptors associated to the device
180-
* @mutex: Mutex protecting the MSI list
181-
* @__next: Cached pointer to the next entry for iterators
178+
* @mutex: Mutex protecting the MSI descriptor store
179+
* @__store: Xarray for storing MSI descriptor pointers
180+
* @__iter_idx: Index to search the next entry for iterators
182181
*/
183182
struct msi_device_data {
184183
unsigned long properties;
185184
struct platform_msi_priv_data *platform_data;
186-
struct list_head list;
187185
struct mutex mutex;
188-
struct msi_desc *__next;
186+
struct xarray __store;
187+
unsigned long __iter_idx;
189188
};
190189

191190
int msi_setup_device_data(struct device *dev);

kernel/irq/msi.c

Lines changed: 77 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "internals.h"
2121

2222
static inline int msi_sysfs_create_group(struct device *dev);
23-
#define dev_to_msi_list(dev) (&(dev)->msi.data->list)
2423

2524
/**
2625
* msi_alloc_desc - Allocate an initialized msi_desc
@@ -41,7 +40,6 @@ static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec,
4140
if (!desc)
4241
return NULL;
4342

44-
INIT_LIST_HEAD(&desc->list);
4543
desc->dev = dev;
4644
desc->nvec_used = nvec;
4745
if (affinity) {
@@ -60,6 +58,17 @@ static void msi_free_desc(struct msi_desc *desc)
6058
kfree(desc);
6159
}
6260

61+
static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, unsigned int index)
62+
{
63+
int ret;
64+
65+
desc->msi_index = index;
66+
ret = xa_insert(&md->__store, index, desc, GFP_KERNEL);
67+
if (ret)
68+
msi_free_desc(desc);
69+
return ret;
70+
}
71+
6372
/**
6473
* msi_add_msi_desc - Allocate and initialize a MSI descriptor
6574
* @dev: Pointer to the device for which the descriptor is allocated
@@ -77,12 +86,9 @@ int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc)
7786
if (!desc)
7887
return -ENOMEM;
7988

80-
/* Copy the MSI index and type specific data to the new descriptor. */
81-
desc->msi_index = init_desc->msi_index;
89+
/* Copy type specific data to the new descriptor. */
8290
desc->pci = init_desc->pci;
83-
84-
list_add_tail(&desc->list, &dev->msi.data->list);
85-
return 0;
91+
return msi_insert_desc(dev->msi.data, desc, init_desc->msi_index);
8692
}
8793

8894
/**
@@ -95,28 +101,41 @@ int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc)
95101
*/
96102
static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsigned int ndesc)
97103
{
98-
struct msi_desc *desc, *tmp;
99-
LIST_HEAD(list);
100-
unsigned int i;
104+
unsigned int idx, last = index + ndesc - 1;
105+
struct msi_desc *desc;
106+
int ret;
101107

102108
lockdep_assert_held(&dev->msi.data->mutex);
103109

104-
for (i = 0; i < ndesc; i++) {
110+
for (idx = index; idx <= last; idx++) {
105111
desc = msi_alloc_desc(dev, 1, NULL);
106112
if (!desc)
113+
goto fail_mem;
114+
ret = msi_insert_desc(dev->msi.data, desc, idx);
115+
if (ret)
107116
goto fail;
108-
desc->msi_index = index + i;
109-
list_add_tail(&desc->list, &list);
110117
}
111-
list_splice_tail(&list, &dev->msi.data->list);
112118
return 0;
113119

120+
fail_mem:
121+
ret = -ENOMEM;
114122
fail:
115-
list_for_each_entry_safe(desc, tmp, &list, list) {
116-
list_del(&desc->list);
117-
msi_free_desc(desc);
123+
msi_free_msi_descs_range(dev, MSI_DESC_NOTASSOCIATED, index, last);
124+
return ret;
125+
}
126+
127+
static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
128+
{
129+
switch (filter) {
130+
case MSI_DESC_ALL:
131+
return true;
132+
case MSI_DESC_NOTASSOCIATED:
133+
return !desc->irq;
134+
case MSI_DESC_ASSOCIATED:
135+
return !!desc->irq;
118136
}
119-
return -ENOMEM;
137+
WARN_ON_ONCE(1);
138+
return false;
120139
}
121140

122141
/**
@@ -129,19 +148,17 @@ static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsi
129148
void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter,
130149
unsigned int first_index, unsigned int last_index)
131150
{
151+
struct xarray *xa = &dev->msi.data->__store;
132152
struct msi_desc *desc;
153+
unsigned long idx;
133154

134155
lockdep_assert_held(&dev->msi.data->mutex);
135156

136-
msi_for_each_desc(desc, dev, filter) {
137-
/*
138-
* Stupid for now to handle MSI device domain until the
139-
* storage is switched over to an xarray.
140-
*/
141-
if (desc->msi_index < first_index || desc->msi_index > last_index)
142-
continue;
143-
list_del(&desc->list);
144-
msi_free_desc(desc);
157+
xa_for_each_range(xa, idx, desc, first_index, last_index) {
158+
if (msi_desc_match(desc, filter)) {
159+
xa_erase(xa, idx);
160+
msi_free_desc(desc);
161+
}
145162
}
146163
}
147164

@@ -162,7 +179,8 @@ static void msi_device_data_release(struct device *dev, void *res)
162179
{
163180
struct msi_device_data *md = res;
164181

165-
WARN_ON_ONCE(!list_empty(&md->list));
182+
WARN_ON_ONCE(!xa_empty(&md->__store));
183+
xa_destroy(&md->__store);
166184
dev->msi.data = NULL;
167185
}
168186

@@ -194,7 +212,7 @@ int msi_setup_device_data(struct device *dev)
194212
return ret;
195213
}
196214

197-
INIT_LIST_HEAD(&md->list);
215+
xa_init(&md->__store);
198216
mutex_init(&md->mutex);
199217
dev->msi.data = md;
200218
devres_add(dev, md);
@@ -217,34 +235,21 @@ EXPORT_SYMBOL_GPL(msi_lock_descs);
217235
*/
218236
void msi_unlock_descs(struct device *dev)
219237
{
220-
/* Clear the next pointer which was cached by the iterator */
221-
dev->msi.data->__next = NULL;
238+
/* Invalidate the index wich was cached by the iterator */
239+
dev->msi.data->__iter_idx = MSI_MAX_INDEX;
222240
mutex_unlock(&dev->msi.data->mutex);
223241
}
224242
EXPORT_SYMBOL_GPL(msi_unlock_descs);
225243

226-
static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter)
227-
{
228-
switch (filter) {
229-
case MSI_DESC_ALL:
230-
return true;
231-
case MSI_DESC_NOTASSOCIATED:
232-
return !desc->irq;
233-
case MSI_DESC_ASSOCIATED:
234-
return !!desc->irq;
235-
}
236-
WARN_ON_ONCE(1);
237-
return false;
238-
}
239-
240-
static struct msi_desc *msi_find_first_desc(struct device *dev, enum msi_desc_filter filter)
244+
static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_filter filter)
241245
{
242246
struct msi_desc *desc;
243247

244-
list_for_each_entry(desc, dev_to_msi_list(dev), list) {
248+
xa_for_each_start(&md->__store, md->__iter_idx, desc, md->__iter_idx) {
245249
if (msi_desc_match(desc, filter))
246250
return desc;
247251
}
252+
md->__iter_idx = MSI_MAX_INDEX;
248253
return NULL;
249254
}
250255

@@ -261,37 +266,24 @@ static struct msi_desc *msi_find_first_desc(struct device *dev, enum msi_desc_fi
261266
*/
262267
struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter)
263268
{
264-
struct msi_desc *desc;
269+
struct msi_device_data *md = dev->msi.data;
265270

266-
if (WARN_ON_ONCE(!dev->msi.data))
271+
if (WARN_ON_ONCE(!md))
267272
return NULL;
268273

269-
lockdep_assert_held(&dev->msi.data->mutex);
274+
lockdep_assert_held(&md->mutex);
270275

271-
desc = msi_find_first_desc(dev, filter);
272-
dev->msi.data->__next = desc ? list_next_entry(desc, list) : NULL;
273-
return desc;
276+
md->__iter_idx = 0;
277+
return msi_find_desc(md, filter);
274278
}
275279
EXPORT_SYMBOL_GPL(msi_first_desc);
276280

277-
static struct msi_desc *__msi_next_desc(struct device *dev, enum msi_desc_filter filter,
278-
struct msi_desc *from)
279-
{
280-
struct msi_desc *desc = from;
281-
282-
list_for_each_entry_from(desc, dev_to_msi_list(dev), list) {
283-
if (msi_desc_match(desc, filter))
284-
return desc;
285-
}
286-
return NULL;
287-
}
288-
289281
/**
290282
* msi_next_desc - Get the next MSI descriptor of a device
291283
* @dev: Device to operate on
292284
*
293285
* The first invocation of msi_next_desc() has to be preceeded by a
294-
* successful incovation of __msi_first_desc(). Consecutive invocations are
286+
* successful invocation of __msi_first_desc(). Consecutive invocations are
295287
* only valid if the previous one was successful. All these operations have
296288
* to be done within the same MSI mutex held region.
297289
*
@@ -300,20 +292,18 @@ static struct msi_desc *__msi_next_desc(struct device *dev, enum msi_desc_filter
300292
*/
301293
struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter)
302294
{
303-
struct msi_device_data *data = dev->msi.data;
304-
struct msi_desc *desc;
295+
struct msi_device_data *md = dev->msi.data;
305296

306-
if (WARN_ON_ONCE(!data))
297+
if (WARN_ON_ONCE(!md))
307298
return NULL;
308299

309-
lockdep_assert_held(&data->mutex);
300+
lockdep_assert_held(&md->mutex);
310301

311-
if (!data->__next)
302+
if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX)
312303
return NULL;
313304

314-
desc = __msi_next_desc(dev, filter, data->__next);
315-
dev->msi.data->__next = desc ? list_next_entry(desc, list) : NULL;
316-
return desc;
305+
md->__iter_idx++;
306+
return msi_find_desc(md, filter);
317307
}
318308
EXPORT_SYMBOL_GPL(msi_next_desc);
319309

@@ -336,21 +326,18 @@ unsigned int msi_get_virq(struct device *dev, unsigned int index)
336326
pcimsi = dev_is_pci(dev) ? to_pci_dev(dev)->msi_enabled : false;
337327

338328
msi_lock_descs(dev);
339-
msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) {
340-
/* PCI-MSI has only one descriptor for multiple interrupts. */
341-
if (pcimsi) {
342-
if (index < desc->nvec_used)
343-
ret = desc->irq + index;
344-
break;
345-
}
346-
329+
desc = xa_load(&dev->msi.data->__store, pcimsi ? 0 : index);
330+
if (desc && desc->irq) {
347331
/*
332+
* PCI-MSI has only one descriptor for multiple interrupts.
348333
* PCI-MSIX and platform MSI use a descriptor per
349334
* interrupt.
350335
*/
351-
if (desc->msi_index == index) {
336+
if (pcimsi) {
337+
if (index < desc->nvec_used)
338+
ret = desc->irq + index;
339+
} else {
352340
ret = desc->irq;
353-
break;
354341
}
355342
}
356343
msi_unlock_descs(dev);
@@ -731,16 +718,13 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
731718
int ret, virq;
732719

733720
msi_lock_descs(dev);
734-
for (virq = virq_base; virq < virq_base + nvec; virq++) {
735-
desc = msi_alloc_desc(dev, 1, NULL);
736-
if (!desc) {
737-
ret = -ENOMEM;
738-
goto fail;
739-
}
721+
ret = msi_add_simple_msi_descs(dev, virq_base, nvec);
722+
if (ret)
723+
goto unlock;
740724

741-
desc->msi_index = virq;
725+
for (virq = virq_base; virq < virq_base + nvec; virq++) {
726+
desc = xa_load(&dev->msi.data->__store, virq);
742727
desc->irq = virq;
743-
list_add_tail(&desc->list, &dev->msi.data->list);
744728

745729
ops->set_desc(arg, desc);
746730
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
@@ -756,6 +740,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
756740
for (--virq; virq >= virq_base; virq--)
757741
irq_domain_free_irqs_common(domain, virq, 1);
758742
msi_free_msi_descs_range(dev, MSI_DESC_ALL, virq_base, virq_base + nvec - 1);
743+
unlock:
759744
msi_unlock_descs(dev);
760745
return ret;
761746
}

0 commit comments

Comments
 (0)