Skip to content

Commit c8137ac

Browse files
committed
x86/iopl: Restrict iopl() permission scope
The access to the full I/O port range can be also provided by the TSS I/O bitmap, but that would require to copy 8k of data on scheduling in the task. As shown with the sched out optimization TSS.io_bitmap_base can be used to switch the incoming task to a preallocated I/O bitmap which has all bits zero, i.e. allows access to all I/O ports. Implementing this allows to provide an iopl() emulation mode which restricts the IOPL level 3 permissions to I/O port access but removes the STI/CLI permission which is coming with the hardware IOPL mechansim. Provide a config option to switch IOPL to emulation mode, make it the default and while at it also provide an option to disable IOPL completely. Signed-off-by: Thomas Gleixner <[email protected]> Acked-by: Andy Lutomirski <[email protected]>
1 parent be9afb4 commit c8137ac

File tree

6 files changed

+139
-47
lines changed

6 files changed

+139
-47
lines changed

arch/x86/Kconfig

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,6 +1254,38 @@ config X86_VSYSCALL_EMULATION
12541254
Disabling this option saves about 7K of kernel size and
12551255
possibly 4K of additional runtime pagetable memory.
12561256

1257+
choice
1258+
prompt "IOPL"
1259+
default X86_IOPL_EMULATION
1260+
1261+
config X86_IOPL_EMULATION
1262+
bool "IOPL Emulation"
1263+
---help---
1264+
Legacy IOPL support is an overbroad mechanism which allows user
1265+
space aside of accessing all 65536 I/O ports also to disable
1266+
interrupts. To gain this access the caller needs CAP_SYS_RAWIO
1267+
capabilities and permission from potentially active security
1268+
modules.
1269+
1270+
The emulation restricts the functionality of the syscall to
1271+
only allowing the full range I/O port access, but prevents the
1272+
ability to disable interrupts from user space.
1273+
1274+
config X86_IOPL_LEGACY
1275+
bool "IOPL Legacy"
1276+
---help---
1277+
Allow the full IOPL permissions, i.e. user space access to all
1278+
65536 I/O ports and also the ability to disable interrupts, which
1279+
is overbroad and can result in system lockups.
1280+
1281+
config X86_IOPL_NONE
1282+
bool "IOPL None"
1283+
---help---
1284+
Disable the IOPL permission syscall. That's the safest option as
1285+
no sane application should depend on this functionality.
1286+
1287+
endchoice
1288+
12571289
config TOSHIBA
12581290
tristate "Toshiba Laptop support"
12591291
depends on X86_32

arch/x86/include/asm/pgtable_32_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
4444
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
4545
* to avoid include recursion hell
4646
*/
47-
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
47+
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41)
4848

4949
#define CPU_ENTRY_AREA_BASE \
5050
((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \

arch/x86/include/asm/processor.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -332,19 +332,21 @@ struct x86_hw_tss {
332332
#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE)
333333
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))
334334

335-
#define IO_BITMAP_OFFSET_VALID \
335+
#define IO_BITMAP_OFFSET_VALID_MAP \
336336
(offsetof(struct tss_struct, io_bitmap.bitmap) - \
337337
offsetof(struct tss_struct, x86_tss))
338338

339+
#define IO_BITMAP_OFFSET_VALID_ALL \
340+
(offsetof(struct tss_struct, io_bitmap.mapall) - \
341+
offsetof(struct tss_struct, x86_tss))
342+
339343
/*
340-
* sizeof(unsigned long) coming from an extra "long" at the end
341-
* of the iobitmap.
342-
*
343-
* -1? seg base+limit should be pointing to the address of the
344-
* last valid byte
344+
* sizeof(unsigned long) coming from an extra "long" at the end of the
345+
* iobitmap. The limit is inclusive, i.e. the last valid byte.
345346
*/
346347
#define __KERNEL_TSS_LIMIT \
347-
(IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
348+
(IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
349+
sizeof(unsigned long) - 1)
348350

349351
/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
350352
#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
@@ -380,6 +382,12 @@ struct x86_io_bitmap {
380382
* be within the limit.
381383
*/
382384
unsigned long bitmap[IO_BITMAP_LONGS + 1];
385+
386+
/*
387+
* Special I/O bitmap to emulate IOPL(3). All bytes zero,
388+
* except the additional byte at the end.
389+
*/
390+
unsigned long mapall[IO_BITMAP_LONGS + 1];
383391
};
384392

385393
struct tss_struct {
@@ -506,7 +514,13 @@ struct thread_struct {
506514
#endif
507515
/* IO permissions: */
508516
struct io_bitmap *io_bitmap;
517+
518+
/*
519+
* IOPL. Priviledge level dependent I/O permission which includes
520+
* user space CLI/STI when granted.
521+
*/
509522
unsigned long iopl;
523+
unsigned long iopl_emul;
510524

511525
mm_segment_t addr_limit;
512526

arch/x86/kernel/cpu/common.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,6 +1864,11 @@ void cpu_init(void)
18641864
tss->io_bitmap.prev_max = 0;
18651865
tss->io_bitmap.prev_sequence = 0;
18661866
memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
1867+
/*
1868+
* Invalidate the extra array entry past the end of the all
1869+
* permission bitmap as required by the hardware.
1870+
*/
1871+
tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
18671872
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
18681873

18691874
load_TR_desc();

arch/x86/kernel/ioport.c

Lines changed: 61 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,41 @@
1717
static atomic64_t io_bitmap_sequence;
1818

1919
void io_bitmap_share(struct task_struct *tsk)
20-
{
21-
/*
22-
* Take a refcount on current's bitmap. It can be used by
23-
* both tasks as long as none of them changes the bitmap.
24-
*/
25-
refcount_inc(&current->thread.io_bitmap->refcnt);
26-
tsk->thread.io_bitmap = current->thread.io_bitmap;
20+
{
21+
/* Can be NULL when current->thread.iopl_emul == 3 */
22+
if (current->thread.io_bitmap) {
23+
/*
24+
* Take a refcount on current's bitmap. It can be used by
25+
* both tasks as long as none of them changes the bitmap.
26+
*/
27+
refcount_inc(&current->thread.io_bitmap->refcnt);
28+
tsk->thread.io_bitmap = current->thread.io_bitmap;
29+
}
2730
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
2831
}
2932

33+
static void task_update_io_bitmap(void)
34+
{
35+
struct thread_struct *t = &current->thread;
36+
37+
if (t->iopl_emul == 3 || t->io_bitmap) {
38+
/* TSS update is handled on exit to user space */
39+
set_thread_flag(TIF_IO_BITMAP);
40+
} else {
41+
clear_thread_flag(TIF_IO_BITMAP);
42+
/* Invalidate TSS */
43+
preempt_disable();
44+
tss_update_io_bitmap();
45+
preempt_enable();
46+
}
47+
}
48+
3049
void io_bitmap_exit(void)
3150
{
3251
struct io_bitmap *iobm = current->thread.io_bitmap;
3352

3453
current->thread.io_bitmap = NULL;
35-
clear_thread_flag(TIF_IO_BITMAP);
36-
preempt_disable();
37-
tss_update_io_bitmap();
38-
preempt_enable();
54+
task_update_io_bitmap();
3955
if (iobm && refcount_dec_and_test(&iobm->refcnt))
4056
kfree(iobm);
4157
}
@@ -157,36 +173,55 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
157173
*/
158174
SYSCALL_DEFINE1(iopl, unsigned int, level)
159175
{
160-
struct pt_regs *regs = current_pt_regs();
161176
struct thread_struct *t = &current->thread;
177+
struct pt_regs *regs = current_pt_regs();
178+
unsigned int old;
162179

163180
/*
164181
* Careful: the IOPL bits in regs->flags are undefined under Xen PV
165182
* and changing them has no effect.
166183
*/
167-
unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
184+
if (IS_ENABLED(CONFIG_X86_IOPL_NONE))
185+
return -ENOSYS;
168186

169187
if (level > 3)
170188
return -EINVAL;
189+
190+
if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION))
191+
old = t->iopl_emul;
192+
else
193+
old = t->iopl >> X86_EFLAGS_IOPL_BIT;
194+
195+
/* No point in going further if nothing changes */
196+
if (level == old)
197+
return 0;
198+
171199
/* Trying to gain more privileges? */
172200
if (level > old) {
173201
if (!capable(CAP_SYS_RAWIO) ||
174202
security_locked_down(LOCKDOWN_IOPORT))
175203
return -EPERM;
176204
}
177-
/*
178-
* Change the flags value on the return stack, which has been set
179-
* up on system-call entry. See also the fork and signal handling
180-
* code how this is handled.
181-
*/
182-
regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
183-
(level << X86_EFLAGS_IOPL_BIT);
184-
/* Store the new level in the thread struct */
185-
t->iopl = level << X86_EFLAGS_IOPL_BIT;
186-
/*
187-
* X86_32 switches immediately and XEN handles it via emulation.
188-
*/
189-
set_iopl_mask(t->iopl);
205+
206+
if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) {
207+
t->iopl_emul = level;
208+
task_update_io_bitmap();
209+
} else {
210+
/*
211+
* Change the flags value on the return stack, which has
212+
* been set up on system-call entry. See also the fork and
213+
* signal handling code how this is handled.
214+
*/
215+
regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
216+
(level << X86_EFLAGS_IOPL_BIT);
217+
/* Store the new level in the thread struct */
218+
t->iopl = level << X86_EFLAGS_IOPL_BIT;
219+
/*
220+
* X86_32 switches immediately and XEN handles it via
221+
* emulation.
222+
*/
223+
set_iopl_mask(t->iopl);
224+
}
190225

191226
return 0;
192227
}

arch/x86/kernel/process.c

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -376,21 +376,27 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
376376
void tss_update_io_bitmap(void)
377377
{
378378
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
379+
u16 *base = &tss->x86_tss.io_bitmap_base;
379380

380381
if (test_thread_flag(TIF_IO_BITMAP)) {
381-
struct io_bitmap *iobm = current->thread.io_bitmap;
382-
383-
/*
384-
* Only copy bitmap data when the sequence number
385-
* differs. The update time is accounted to the incoming
386-
* task.
387-
*/
388-
if (tss->io_bitmap.prev_sequence != iobm->sequence)
389-
tss_copy_io_bitmap(tss, iobm);
390-
391-
/* Enable the bitmap */
392-
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID;
393-
382+
struct thread_struct *t = &current->thread;
383+
384+
if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION) &&
385+
t->iopl_emul == 3) {
386+
*base = IO_BITMAP_OFFSET_VALID_ALL;
387+
} else {
388+
struct io_bitmap *iobm = t->io_bitmap;
389+
/*
390+
* Only copy bitmap data when the sequence number
391+
* differs. The update time is accounted to the
392+
* incoming task.
393+
*/
394+
if (tss->io_bitmap.prev_sequence != iobm->sequence)
395+
tss_copy_io_bitmap(tss, iobm);
396+
397+
/* Enable the bitmap */
398+
*base = IO_BITMAP_OFFSET_VALID_MAP;
399+
}
394400
/*
395401
* Make sure that the TSS limit is covering the io bitmap.
396402
* It might have been cut down by a VMEXIT to 0x67 which

0 commit comments

Comments
 (0)