Skip to content

Commit b5381a5

Browse files
Nick Childkuba-moo
authored andcommitted
ibmveth: Recycle buffers during replenish phase
When the length of a packet is under the rx_copybreak threshold, the buffer is copied into a new skb and sent up the stack. This allows the dma mapped memory to be recycled back to FW. Previously, the reuse of the DMA space was handled immediately. This means that further packet processing has to wait until h_add_logical_lan finishes for this packet. Therefore, when reusing a packet, offload the hcall to the replenish function. As a result, much of the shared logic between the recycle and replenish functions can be removed. This change increases TCP_RR packet rate by another 15% (370k to 430k txns). We can see the ftrace data supports this: PREV: ibmveth_poll = 8078553.0 us / 190999.0 hits = AVG 42.3 us NEW: ibmveth_poll = 7632787.0 us / 224060.0 hits = AVG 34.07 us Signed-off-by: Nick Child <[email protected]> Reviewed-by: Shannon Nelson <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent f128c7c commit b5381a5

File tree

1 file changed

+60
-84
lines changed

1 file changed

+60
-84
lines changed

drivers/net/ethernet/ibm/ibmveth.c

Lines changed: 60 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
#include "ibmveth.h"
4040

4141
static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
42-
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
42+
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
43+
bool reuse);
4344
static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
4445

4546
static struct kobj_type ktype_veth_pool;
@@ -226,6 +227,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
226227
for (i = 0; i < count; ++i) {
227228
union ibmveth_buf_desc desc;
228229

230+
free_index = pool->consumer_index;
231+
index = pool->free_map[free_index];
232+
skb = NULL;
233+
234+
BUG_ON(index == IBM_VETH_INVALID_MAP);
235+
236+
/* are we allocating a new buffer or recycling an old one */
237+
if (pool->skbuff[index])
238+
goto reuse;
239+
229240
skb = netdev_alloc_skb(adapter->netdev, pool->buff_size);
230241

231242
if (!skb) {
@@ -235,64 +246,60 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter,
235246
break;
236247
}
237248

238-
free_index = pool->consumer_index;
239-
pool->consumer_index++;
240-
if (pool->consumer_index >= pool->size)
241-
pool->consumer_index = 0;
242-
index = pool->free_map[free_index];
243-
244-
BUG_ON(index == IBM_VETH_INVALID_MAP);
245-
BUG_ON(pool->skbuff[index] != NULL);
246-
247249
dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
248250
pool->buff_size, DMA_FROM_DEVICE);
249251

250252
if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
251253
goto failure;
252254

253-
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
254255
pool->dma_addr[index] = dma_addr;
255256
pool->skbuff[index] = skb;
256257

257-
correlator = ((u64)pool->index << 32) | index;
258-
*(u64 *)skb->data = correlator;
259-
260-
desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
261-
desc.fields.address = dma_addr;
262-
263258
if (rx_flush) {
264259
unsigned int len = min(pool->buff_size,
265-
adapter->netdev->mtu +
266-
IBMVETH_BUFF_OH);
260+
adapter->netdev->mtu +
261+
IBMVETH_BUFF_OH);
267262
ibmveth_flush_buffer(skb->data, len);
268263
}
264+
reuse:
265+
dma_addr = pool->dma_addr[index];
266+
desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size;
267+
desc.fields.address = dma_addr;
268+
269+
correlator = ((u64)pool->index << 32) | index;
270+
*(u64 *)pool->skbuff[index]->data = correlator;
271+
269272
lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address,
270273
desc.desc);
271274

272275
if (lpar_rc != H_SUCCESS) {
276+
netdev_warn(adapter->netdev,
277+
"%sadd_logical_lan failed %lu\n",
278+
skb ? "" : "When recycling: ", lpar_rc);
273279
goto failure;
274-
} else {
275-
buffers_added++;
276-
adapter->replenish_add_buff_success++;
277280
}
281+
282+
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
283+
pool->consumer_index++;
284+
if (pool->consumer_index >= pool->size)
285+
pool->consumer_index = 0;
286+
287+
buffers_added++;
288+
adapter->replenish_add_buff_success++;
278289
}
279290

280291
mb();
281292
atomic_add(buffers_added, &(pool->available));
282293
return;
283294

284295
failure:
285-
pool->free_map[free_index] = index;
286-
pool->skbuff[index] = NULL;
287-
if (pool->consumer_index == 0)
288-
pool->consumer_index = pool->size - 1;
289-
else
290-
pool->consumer_index--;
291-
if (!dma_mapping_error(&adapter->vdev->dev, dma_addr))
296+
297+
if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr))
292298
dma_unmap_single(&adapter->vdev->dev,
293299
pool->dma_addr[index], pool->buff_size,
294300
DMA_FROM_DEVICE);
295-
dev_kfree_skb_any(skb);
301+
dev_kfree_skb_any(pool->skbuff[index]);
302+
pool->skbuff[index] = NULL;
296303
adapter->replenish_add_buff_failure++;
297304

298305
mb();
@@ -365,7 +372,7 @@ static void ibmveth_free_buffer_pool(struct ibmveth_adapter *adapter,
365372

366373
/* remove a buffer from a pool */
367374
static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
368-
u64 correlator)
375+
u64 correlator, bool reuse)
369376
{
370377
unsigned int pool = correlator >> 32;
371378
unsigned int index = correlator & 0xffffffffUL;
@@ -376,15 +383,23 @@ static void ibmveth_remove_buffer_from_pool(struct ibmveth_adapter *adapter,
376383
BUG_ON(index >= adapter->rx_buff_pool[pool].size);
377384

378385
skb = adapter->rx_buff_pool[pool].skbuff[index];
379-
380386
BUG_ON(skb == NULL);
381387

382-
adapter->rx_buff_pool[pool].skbuff[index] = NULL;
388+
/* if we are going to reuse the buffer then keep the pointers around
389+
* but mark index as available. replenish will see the skb pointer and
390+
* assume it is to be recycled.
391+
*/
392+
if (!reuse) {
393+
/* remove the skb pointer to mark free. actual freeing is done
394+
* by upper level networking after gro_recieve
395+
*/
396+
adapter->rx_buff_pool[pool].skbuff[index] = NULL;
383397

384-
dma_unmap_single(&adapter->vdev->dev,
385-
adapter->rx_buff_pool[pool].dma_addr[index],
386-
adapter->rx_buff_pool[pool].buff_size,
387-
DMA_FROM_DEVICE);
398+
dma_unmap_single(&adapter->vdev->dev,
399+
adapter->rx_buff_pool[pool].dma_addr[index],
400+
adapter->rx_buff_pool[pool].buff_size,
401+
DMA_FROM_DEVICE);
402+
}
388403

389404
free_index = adapter->rx_buff_pool[pool].producer_index;
390405
adapter->rx_buff_pool[pool].producer_index++;
@@ -411,51 +426,13 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada
411426
return adapter->rx_buff_pool[pool].skbuff[index];
412427
}
413428

414-
/* recycle the current buffer on the rx queue */
415-
static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
429+
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter,
430+
bool reuse)
416431
{
417-
u32 q_index = adapter->rx_queue.index;
418-
u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator;
419-
unsigned int pool = correlator >> 32;
420-
unsigned int index = correlator & 0xffffffffUL;
421-
union ibmveth_buf_desc desc;
422-
unsigned long lpar_rc;
423-
int ret = 1;
424-
425-
BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
426-
BUG_ON(index >= adapter->rx_buff_pool[pool].size);
432+
u64 cor;
427433

428-
if (!adapter->rx_buff_pool[pool].active) {
429-
ibmveth_rxq_harvest_buffer(adapter);
430-
ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]);
431-
goto out;
432-
}
433-
434-
desc.fields.flags_len = IBMVETH_BUF_VALID |
435-
adapter->rx_buff_pool[pool].buff_size;
436-
desc.fields.address = adapter->rx_buff_pool[pool].dma_addr[index];
437-
438-
lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
439-
440-
if (lpar_rc != H_SUCCESS) {
441-
netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed "
442-
"during recycle rc=%ld", lpar_rc);
443-
ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
444-
ret = 0;
445-
}
446-
447-
if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
448-
adapter->rx_queue.index = 0;
449-
adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
450-
}
451-
452-
out:
453-
return ret;
454-
}
455-
456-
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
457-
{
458-
ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
434+
cor = adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator;
435+
ibmveth_remove_buffer_from_pool(adapter, cor, reuse);
459436

460437
if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
461438
adapter->rx_queue.index = 0;
@@ -1347,7 +1324,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
13471324
wmb(); /* suggested by larson1 */
13481325
adapter->rx_invalid_buffer++;
13491326
netdev_dbg(netdev, "recycling invalid buffer\n");
1350-
ibmveth_rxq_recycle_buffer(adapter);
1327+
ibmveth_rxq_harvest_buffer(adapter, true);
13511328
} else {
13521329
struct sk_buff *skb, *new_skb;
13531330
int length = ibmveth_rxq_frame_length(adapter);
@@ -1380,11 +1357,10 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
13801357
if (rx_flush)
13811358
ibmveth_flush_buffer(skb->data,
13821359
length + offset);
1383-
if (!ibmveth_rxq_recycle_buffer(adapter))
1384-
kfree_skb(skb);
1360+
ibmveth_rxq_harvest_buffer(adapter, true);
13851361
skb = new_skb;
13861362
} else {
1387-
ibmveth_rxq_harvest_buffer(adapter);
1363+
ibmveth_rxq_harvest_buffer(adapter, false);
13881364
skb_reserve(skb, offset);
13891365
}
13901366

0 commit comments

Comments
 (0)