Skip to content

Commit 2b0569b

Browse files
lsgunthjonmason
authored andcommitted
NTB: Add MSI interrupt support to ntb_transport
Introduce the module parameter 'use_msi' which, when set, uses MSI interrupts instead of doorbells for each queue pair (QP). The parameter is only available if NTB MSI support is configured into the kernel. We also require there to be more than one memory window (MW) so that an extra one is available to forward the APIC region. To use MSIs, we request one interrupt per QP and forward the MSI address and data to the peer using scratch pad registers (SPADS) above the MW SPADS. (If there are not enough SPADS the MSI interrupt will not be used.) Once registered, we simply use ntb_msi_peer_trigger and the receiving ISR simply queues up the rxc_db_work for the queue. This addition can significantly improve performance of ntb_transport. In a simple, untuned, apples-to-apples comparision using ntb_netdev and iperf with switchtec hardware, I see 3.88Gb/s without MSI interrupts and 14.1Gb/s wit MSI, which is a more than 3x improvement. Signed-off-by: Logan Gunthorpe <[email protected]> Cc: Dave Jiang <[email protected]> Cc: Allen Hubbe <[email protected]> Signed-off-by: Jon Mason <[email protected]>
1 parent 76ab785 commit 2b0569b

File tree

1 file changed

+168
-1
lines changed

1 file changed

+168
-1
lines changed

drivers/ntb/ntb_transport.c

Lines changed: 168 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ static bool use_dma;
9393
module_param(use_dma, bool, 0644);
9494
MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
9595

96+
static bool use_msi;
97+
#ifdef CONFIG_NTB_MSI
98+
module_param(use_msi, bool, 0644);
99+
MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
100+
#endif
101+
96102
static struct dentry *nt_debugfs_dir;
97103

98104
/* Only two-ports NTB devices are supported */
@@ -188,6 +194,11 @@ struct ntb_transport_qp {
188194
u64 tx_err_no_buf;
189195
u64 tx_memcpy;
190196
u64 tx_async;
197+
198+
bool use_msi;
199+
int msi_irq;
200+
struct ntb_msi_desc msi_desc;
201+
struct ntb_msi_desc peer_msi_desc;
191202
};
192203

193204
struct ntb_transport_mw {
@@ -221,6 +232,10 @@ struct ntb_transport_ctx {
221232
u64 qp_bitmap;
222233
u64 qp_bitmap_free;
223234

235+
bool use_msi;
236+
unsigned int msi_spad_offset;
237+
u64 msi_db_mask;
238+
224239
bool link_is_up;
225240
struct delayed_work link_work;
226241
struct work_struct link_cleanup;
@@ -667,6 +682,114 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
667682
return 0;
668683
}
669684

685+
static irqreturn_t ntb_transport_isr(int irq, void *dev)
686+
{
687+
struct ntb_transport_qp *qp = dev;
688+
689+
tasklet_schedule(&qp->rxc_db_work);
690+
691+
return IRQ_HANDLED;
692+
}
693+
694+
static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt,
695+
unsigned int qp_num)
696+
{
697+
struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
698+
int spad = qp_num * 2 + nt->msi_spad_offset;
699+
700+
if (!nt->use_msi)
701+
return;
702+
703+
if (spad >= ntb_spad_count(nt->ndev))
704+
return;
705+
706+
qp->peer_msi_desc.addr_offset =
707+
ntb_peer_spad_read(qp->ndev, PIDX, spad);
708+
qp->peer_msi_desc.data =
709+
ntb_peer_spad_read(qp->ndev, PIDX, spad + 1);
710+
711+
dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n",
712+
qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data);
713+
714+
if (qp->peer_msi_desc.addr_offset) {
715+
qp->use_msi = true;
716+
dev_info(&qp->ndev->pdev->dev,
717+
"Using MSI interrupts for QP%d\n", qp_num);
718+
}
719+
}
720+
721+
static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt,
722+
unsigned int qp_num)
723+
{
724+
struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
725+
int spad = qp_num * 2 + nt->msi_spad_offset;
726+
int rc;
727+
728+
if (!nt->use_msi)
729+
return;
730+
731+
if (spad >= ntb_spad_count(nt->ndev)) {
732+
dev_warn_once(&qp->ndev->pdev->dev,
733+
"Not enough SPADS to use MSI interrupts\n");
734+
return;
735+
}
736+
737+
ntb_spad_write(qp->ndev, spad, 0);
738+
ntb_spad_write(qp->ndev, spad + 1, 0);
739+
740+
if (!qp->msi_irq) {
741+
qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr,
742+
KBUILD_MODNAME, qp,
743+
&qp->msi_desc);
744+
if (qp->msi_irq < 0) {
745+
dev_warn(&qp->ndev->pdev->dev,
746+
"Unable to allocate MSI interrupt for qp%d\n",
747+
qp_num);
748+
return;
749+
}
750+
}
751+
752+
rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset);
753+
if (rc)
754+
goto err_free_interrupt;
755+
756+
rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data);
757+
if (rc)
758+
goto err_free_interrupt;
759+
760+
dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n",
761+
qp_num, qp->msi_irq, qp->msi_desc.addr_offset,
762+
qp->msi_desc.data);
763+
764+
return;
765+
766+
err_free_interrupt:
767+
devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp);
768+
}
769+
770+
static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt)
771+
{
772+
int i;
773+
774+
dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed");
775+
776+
for (i = 0; i < nt->qp_count; i++)
777+
ntb_transport_setup_qp_peer_msi(nt, i);
778+
}
779+
780+
static void ntb_transport_msi_desc_changed(void *data)
781+
{
782+
struct ntb_transport_ctx *nt = data;
783+
int i;
784+
785+
dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed");
786+
787+
for (i = 0; i < nt->qp_count; i++)
788+
ntb_transport_setup_qp_msi(nt, i);
789+
790+
ntb_peer_db_set(nt->ndev, nt->msi_db_mask);
791+
}
792+
670793
static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
671794
{
672795
struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
@@ -905,6 +1028,20 @@ static void ntb_transport_link_work(struct work_struct *work)
9051028
int rc = 0, i, spad;
9061029

9071030
/* send the local info, in the opposite order of the way we read it */
1031+
1032+
if (nt->use_msi) {
1033+
rc = ntb_msi_setup_mws(ndev);
1034+
if (rc) {
1035+
dev_warn(&pdev->dev,
1036+
"Failed to register MSI memory window: %d\n",
1037+
rc);
1038+
nt->use_msi = false;
1039+
}
1040+
}
1041+
1042+
for (i = 0; i < nt->qp_count; i++)
1043+
ntb_transport_setup_qp_msi(nt, i);
1044+
9081045
for (i = 0; i < nt->mw_count; i++) {
9091046
size = nt->mw_vec[i].phys_size;
9101047

@@ -962,6 +1099,7 @@ static void ntb_transport_link_work(struct work_struct *work)
9621099
struct ntb_transport_qp *qp = &nt->qp_vec[i];
9631100

9641101
ntb_transport_setup_qp_mw(nt, i);
1102+
ntb_transport_setup_qp_peer_msi(nt, i);
9651103

9661104
if (qp->client_ready)
9671105
schedule_delayed_work(&qp->link_work, 0);
@@ -1135,6 +1273,19 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
11351273
return -ENOMEM;
11361274

11371275
nt->ndev = ndev;
1276+
1277+
/*
1278+
* If we are using MSI, and have at least one extra memory window,
1279+
* we will reserve the last MW for the MSI window.
1280+
*/
1281+
if (use_msi && mw_count > 1) {
1282+
rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed);
1283+
if (!rc) {
1284+
mw_count -= 1;
1285+
nt->use_msi = true;
1286+
}
1287+
}
1288+
11381289
spad_count = ntb_spad_count(ndev);
11391290

11401291
/* Limit the MW's based on the availability of scratchpads */
@@ -1148,6 +1299,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
11481299
max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
11491300
nt->mw_count = min(mw_count, max_mw_count_for_spads);
11501301

1302+
nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH;
1303+
11511304
nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
11521305
GFP_KERNEL, node);
11531306
if (!nt->mw_vec) {
@@ -1178,6 +1331,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
11781331
qp_bitmap = ntb_db_valid_mask(ndev);
11791332

11801333
qp_count = ilog2(qp_bitmap);
1334+
if (nt->use_msi) {
1335+
qp_count -= 1;
1336+
nt->msi_db_mask = 1 << qp_count;
1337+
ntb_db_clear_mask(ndev, nt->msi_db_mask);
1338+
}
1339+
11811340
if (max_num_clients && max_num_clients < qp_count)
11821341
qp_count = max_num_clients;
11831342
else if (nt->mw_count < qp_count)
@@ -1601,7 +1760,10 @@ static void ntb_tx_copy_callback(void *data,
16011760

16021761
iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
16031762

1604-
ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
1763+
if (qp->use_msi)
1764+
ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
1765+
else
1766+
ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
16051767

16061768
/* The entry length can only be zero if the packet is intended to be a
16071769
* "link down" or similar. Since no payload is being sent in these
@@ -2269,6 +2431,11 @@ static void ntb_transport_doorbell_callback(void *data, int vector)
22692431
u64 db_bits;
22702432
unsigned int qp_num;
22712433

2434+
if (ntb_db_read(nt->ndev) & nt->msi_db_mask) {
2435+
ntb_transport_msi_peer_desc_changed(nt);
2436+
ntb_db_clear(nt->ndev, nt->msi_db_mask);
2437+
}
2438+
22722439
db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
22732440
ntb_db_vector_mask(nt->ndev, vector));
22742441

0 commit comments

Comments
 (0)