Skip to content

Commit 11ea68f

Browse files
Ming LeiKAGA-KOKO
authored andcommitted
genirq, sched/isolation: Isolate from handling managed interrupts
The affinity of managed interrupts is completely handled in the kernel and cannot be changed via the /proc/irq/* interfaces from user space. As the kernel tries to spread out interrupts evenly accross CPUs on x86 to prevent vector exhaustion, it can happen that a managed interrupt whose affinity mask contains both isolated and housekeeping CPUs is routed to an isolated CPU. As a consequence IO submitted on a housekeeping CPU causes interrupts on the isolated CPU. Add a new sub-parameter 'managed_irq' for 'isolcpus' and the corresponding logic in the interrupt affinity selection code. The subparameter indicates to the interrupt affinity selection logic that it should try to avoid the above scenario. This isolation is best effort and only effective if the automatically assigned interrupt mask of a device queue contains isolated and housekeeping CPUs. If housekeeping CPUs are online then such interrupts are directed to the housekeeping CPU so that IO submitted on the housekeeping CPU cannot disturb the isolated CPU. If a queue's affinity mask contains only isolated CPUs then this parameter has no effect on the interrupt routing decision, though interrupts are only happening when tasks running on those isolated CPUs submit IO. IO submitted on housekeeping CPUs has no influence on those queues. If the affinity mask contains both housekeeping and isolated CPUs, but none of the contained housekeeping CPUs is online, then the interrupt is also routed to an isolated CPU. Interrupts are only delivered when one of the isolated CPUs in the affinity mask submits IO. If one of the contained housekeeping CPUs comes online, the CPU hotplug logic migrates the interrupt automatically back to the upcoming housekeeping CPU. Depending on the type of interrupt controller, this can require that at least one interrupt is delivered to the isolated CPU in order to complete the migration. [ tglx: Removed unused parameter, added and edited comments/documentation and rephrased the changelog so it contains more details. ] Signed-off-by: Ming Lei <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 099368b commit 11ea68f

File tree

5 files changed

+90
-5
lines changed

5 files changed

+90
-5
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,9 +1933,31 @@
19331933
<cpu number> begins at 0 and the maximum value is
19341934
"number of CPUs in system - 1".
19351935

1936-
The format of <cpu-list> is described above.
1937-
1936+
managed_irq
1937+
1938+
Isolate from being targeted by managed interrupts
1939+
which have an interrupt mask containing isolated
1940+
CPUs. The affinity of managed interrupts is
1941+
handled by the kernel and cannot be changed via
1942+
the /proc/irq/* interfaces.
1943+
1944+
This isolation is best effort and only effective
1945+
if the automatically assigned interrupt mask of a
1946+
device queue contains isolated and housekeeping
1947+
CPUs. If housekeeping CPUs are online then such
1948+
interrupts are directed to the housekeeping CPU
1949+
so that IO submitted on the housekeeping CPU
1950+
cannot disturb the isolated CPU.
1951+
1952+
If a queue's affinity mask contains only isolated
1953+
CPUs then this parameter has no effect on the
1954+
interrupt routing decision, though interrupts are
1955+
only delivered when tasks running on those
1956+
isolated CPUs submit IO. IO submitted on
1957+
housekeeping CPUs has no influence on those
1958+
queues.
19381959

1960+
The format of <cpu-list> is described above.
19391961

19401962
iucv= [HW,NET]
19411963

include/linux/sched/isolation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ enum hk_flags {
1313
HK_FLAG_TICK = (1 << 4),
1414
HK_FLAG_DOMAIN = (1 << 5),
1515
HK_FLAG_WQ = (1 << 6),
16+
HK_FLAG_MANAGED_IRQ = (1 << 7),
1617
};
1718

1819
#ifdef CONFIG_CPU_ISOLATION

kernel/irq/cpuhotplug.c

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/interrupt.h>
1313
#include <linux/ratelimit.h>
1414
#include <linux/irq.h>
15+
#include <linux/sched/isolation.h>
1516

1617
#include "internals.h"
1718

@@ -171,6 +172,20 @@ void irq_migrate_all_off_this_cpu(void)
171172
}
172173
}
173174

175+
static bool hk_should_isolate(struct irq_data *data, unsigned int cpu)
176+
{
177+
const struct cpumask *hk_mask;
178+
179+
if (!housekeeping_enabled(HK_FLAG_MANAGED_IRQ))
180+
return false;
181+
182+
hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
183+
if (cpumask_subset(irq_data_get_effective_affinity_mask(data), hk_mask))
184+
return false;
185+
186+
return cpumask_test_cpu(cpu, hk_mask);
187+
}
188+
174189
static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
175190
{
176191
struct irq_data *data = irq_desc_get_irq_data(desc);
@@ -188,9 +203,11 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
188203
/*
189204
* If the interrupt can only be directed to a single target
190205
* CPU then it is already assigned to a CPU in the affinity
191-
* mask. No point in trying to move it around.
206+
* mask. No point in trying to move it around unless the
207+
* isolation mechanism requests to move it to an upcoming
208+
* housekeeping CPU.
192209
*/
193-
if (!irqd_is_single_target(data))
210+
if (!irqd_is_single_target(data) || hk_should_isolate(data, cpu))
194211
irq_set_affinity_locked(data, affinity, false);
195212
}
196213

kernel/irq/manage.c

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/sched.h>
1919
#include <linux/sched/rt.h>
2020
#include <linux/sched/task.h>
21+
#include <linux/sched/isolation.h>
2122
#include <uapi/linux/sched/types.h>
2223
#include <linux/task_work.h>
2324

@@ -217,7 +218,45 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
217218
if (!chip || !chip->irq_set_affinity)
218219
return -EINVAL;
219220

220-
ret = chip->irq_set_affinity(data, mask, force);
221+
/*
222+
* If this is a managed interrupt and housekeeping is enabled on
223+
* it check whether the requested affinity mask intersects with
224+
* a housekeeping CPU. If so, then remove the isolated CPUs from
225+
* the mask and just keep the housekeeping CPU(s). This prevents
226+
* the affinity setter from routing the interrupt to an isolated
227+
* CPU to avoid that I/O submitted from a housekeeping CPU causes
228+
* interrupts on an isolated one.
229+
*
230+
* If the masks do not intersect or include online CPU(s) then
231+
* keep the requested mask. The isolated target CPUs are only
232+
* receiving interrupts when the I/O operation was submitted
233+
* directly from them.
234+
*
235+
* If all housekeeping CPUs in the affinity mask are offline, the
236+
* interrupt will be migrated by the CPU hotplug code once a
237+
* housekeeping CPU which belongs to the affinity mask comes
238+
* online.
239+
*/
240+
if (irqd_affinity_is_managed(data) &&
241+
housekeeping_enabled(HK_FLAG_MANAGED_IRQ)) {
242+
const struct cpumask *hk_mask, *prog_mask;
243+
244+
static DEFINE_RAW_SPINLOCK(tmp_mask_lock);
245+
static struct cpumask tmp_mask;
246+
247+
hk_mask = housekeeping_cpumask(HK_FLAG_MANAGED_IRQ);
248+
249+
raw_spin_lock(&tmp_mask_lock);
250+
cpumask_and(&tmp_mask, mask, hk_mask);
251+
if (!cpumask_intersects(&tmp_mask, cpu_online_mask))
252+
prog_mask = mask;
253+
else
254+
prog_mask = &tmp_mask;
255+
ret = chip->irq_set_affinity(data, prog_mask, force);
256+
raw_spin_unlock(&tmp_mask_lock);
257+
} else {
258+
ret = chip->irq_set_affinity(data, mask, force);
259+
}
221260
switch (ret) {
222261
case IRQ_SET_MASK_OK:
223262
case IRQ_SET_MASK_OK_DONE:

kernel/sched/isolation.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,12 @@ static int __init housekeeping_isolcpus_setup(char *str)
163163
continue;
164164
}
165165

166+
if (!strncmp(str, "managed_irq,", 12)) {
167+
str += 12;
168+
flags |= HK_FLAG_MANAGED_IRQ;
169+
continue;
170+
}
171+
166172
pr_warn("isolcpus: Error, unknown flag\n");
167173
return 0;
168174
}

0 commit comments

Comments
 (0)