Skip to content

Commit f607b2b

Browse files
PhilipYangAalexdeucher
authored andcommitted
drm/amdkfd: KFD interrupt access ih_fifo data in-place
To handle 40000 to 80000 interrupts per second running CPX mode with 4 streams/queues per KFD node, KFD interrupt handler becomes the performance bottleneck. Remove the kfifo_out memcpy overhead by accessing ih_fifo data in-place and updating rptr with kfifo_skip_count. Signed-off-by: Philip Yang <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 11815bb commit f607b2b

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -114,50 +114,43 @@ void kfd_interrupt_exit(struct kfd_node *node)
114114
*/
115115
bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
116116
{
117-
int count;
118-
119-
count = kfifo_in(&node->ih_fifo, ih_ring_entry,
120-
node->kfd->device_info.ih_ring_entry_size);
121-
if (count != node->kfd->device_info.ih_ring_entry_size) {
117+
if (kfifo_is_full(&node->ih_fifo)) {
122118
dev_dbg_ratelimited(node->adev->dev,
123-
"Interrupt ring overflow, dropping interrupt %d\n",
124-
count);
119+
"Interrupt ring overflow, dropping interrupt\n");
125120
return false;
126121
}
127122

123+
kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size);
128124
return true;
129125
}
130126

131127
/*
132128
* Assumption: single reader/writer. This function is not re-entrant
133129
*/
134-
static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry)
130+
static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry)
135131
{
136132
int count;
137133

138-
count = kfifo_out(&node->ih_fifo, ih_ring_entry,
139-
node->kfd->device_info.ih_ring_entry_size);
140-
141-
WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size);
134+
if (kfifo_is_empty(&node->ih_fifo))
135+
return false;
142136

137+
count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry,
138+
node->kfd->device_info.ih_ring_entry_size);
139+
WARN_ON(count != node->kfd->device_info.ih_ring_entry_size);
143140
return count == node->kfd->device_info.ih_ring_entry_size;
144141
}
145142

146143
static void interrupt_wq(struct work_struct *work)
147144
{
148-
struct kfd_node *dev = container_of(work, struct kfd_node,
149-
interrupt_work);
150-
uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
145+
struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work);
146+
uint32_t *ih_ring_entry;
151147
unsigned long start_jiffies = jiffies;
152148

153-
if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
154-
dev_err_once(dev->adev->dev, "Ring entry too small\n");
155-
return;
156-
}
157-
158-
while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
149+
while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) {
159150
dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
160151
ih_ring_entry);
152+
kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size);
153+
161154
if (time_is_before_jiffies(start_jiffies + HZ)) {
162155
/* If we spent more than a second processing signals,
163156
* reschedule the worker to avoid soft-lockup warnings

0 commit comments

Comments
 (0)