|
| 1 | +/* |
| 2 | + * safe-syscall.h: prototypes for linux-user signal-race-safe syscalls |
| 3 | + * |
| 4 | + * This program is free software; you can redistribute it and/or modify |
| 5 | + * it under the terms of the GNU General Public License as published by |
| 6 | + * the Free Software Foundation; either version 2 of the License, or |
| 7 | + * (at your option) any later version. |
| 8 | + * |
| 9 | + * This program is distributed in the hope that it will be useful, |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | + * GNU General Public License for more details. |
| 13 | + * |
| 14 | + * You should have received a copy of the GNU General Public License |
| 15 | + * along with this program; if not, see <http://www.gnu.org/licenses/>. |
| 16 | + */ |
| 17 | + |
| 18 | +#ifndef LINUX_USER_SAFE_SYSCALL_H |
| 19 | +#define LINUX_USER_SAFE_SYSCALL_H |
| 20 | + |
| 21 | +/** |
| 22 | + * safe_syscall: |
| 23 | + * @int number: number of system call to make |
| 24 | + * ...: arguments to the system call |
| 25 | + * |
| 26 | + * Call a system call if guest signal not pending. |
| 27 | + * This has the same API as the libc syscall() function, except that it |
| 28 | + * may return -1 with errno == TARGET_ERESTARTSYS if a signal was pending. |
| 29 | + * |
| 30 | + * Returns: the system call result, or -1 with an error code in errno |
| 31 | + * (Errnos are host errnos; we rely on TARGET_ERESTARTSYS not clashing |
| 32 | + * with any of the host errno values.) |
| 33 | + */ |
| 34 | + |
| 35 | +/* |
| 36 | + * A guide to using safe_syscall() to handle interactions between guest |
| 37 | + * syscalls and guest signals: |
| 38 | + * |
| 39 | + * Guest syscalls come in two flavours: |
| 40 | + * |
| 41 | + * (1) Non-interruptible syscalls |
| 42 | + * |
| 43 | + * These are guest syscalls that never get interrupted by signals and |
| 44 | + * so never return EINTR. They can be implemented straightforwardly in |
| 45 | + * QEMU: just make sure that if the implementation code has to make any |
| 46 | + * blocking calls that those calls are retried if they return EINTR. |
| 47 | + * It's also OK to implement these with safe_syscall, though it will be |
| 48 | + * a little less efficient if a signal is delivered at the 'wrong' moment. |
| 49 | + * |
| 50 | + * Some non-interruptible syscalls need to be handled using block_signals() |
| 51 | + * to block signals for the duration of the syscall. This mainly applies |
| 52 | + * to code which needs to modify the data structures used by the |
| 53 | + * host_signal_handler() function and the functions it calls, including |
| 54 | + * all syscalls which change the thread's signal mask. |
| 55 | + * |
| 56 | + * (2) Interruptible syscalls |
| 57 | + * |
| 58 | + * These are guest syscalls that can be interrupted by signals and |
| 59 | + * for which we need to either return EINTR or arrange for the guest |
| 60 | + * syscall to be restarted. This category includes both syscalls which |
| 61 | + * always restart (and in the kernel return -ERESTARTNOINTR), ones |
| 62 | + * which only restart if there is no handler (kernel returns -ERESTARTNOHAND |
| 63 | + * or -ERESTART_RESTARTBLOCK), and the most common kind which restart |
| 64 | + * if the handler was registered with SA_RESTART (kernel returns |
| 65 | + * -ERESTARTSYS). System calls which are only interruptible in some |
| 66 | + * situations (like 'open') also need to be handled this way. |
| 67 | + * |
| 68 | + * Here it is important that the host syscall is made |
| 69 | + * via this safe_syscall() function, and *not* via the host libc. |
| 70 | + * If the host libc is used then the implementation will appear to work |
| 71 | + * most of the time, but there will be a race condition where a |
| 72 | + * signal could arrive just before we make the host syscall inside libc, |
| 73 | + * and then then guest syscall will not correctly be interrupted. |
| 74 | + * Instead the implementation of the guest syscall can use the safe_syscall |
| 75 | + * function but otherwise just return the result or errno in the usual |
| 76 | + * way; the main loop code will take care of restarting the syscall |
| 77 | + * if appropriate. |
| 78 | + * |
| 79 | + * (If the implementation needs to make multiple host syscalls this is |
| 80 | + * OK; any which might really block must be via safe_syscall(); for those |
| 81 | + * which are only technically blocking (ie which we know in practice won't |
| 82 | + * stay in the host kernel indefinitely) it's OK to use libc if necessary. |
| 83 | + * You must be able to cope with backing out correctly if some safe_syscall |
| 84 | + * you make in the implementation returns either -TARGET_ERESTARTSYS or |
| 85 | + * EINTR though.) |
| 86 | + * |
| 87 | + * block_signals() cannot be used for interruptible syscalls. |
| 88 | + * |
| 89 | + * |
| 90 | + * How and why the safe_syscall implementation works: |
| 91 | + * |
| 92 | + * The basic setup is that we make the host syscall via a known |
| 93 | + * section of host native assembly. If a signal occurs, our signal |
| 94 | + * handler checks the interrupted host PC against the addresse of that |
| 95 | + * known section. If the PC is before or at the address of the syscall |
| 96 | + * instruction then we change the PC to point at a "return |
| 97 | + * -TARGET_ERESTARTSYS" code path instead, and then exit the signal handler |
| 98 | + * (causing the safe_syscall() call to immediately return that value). |
| 99 | + * Then in the main.c loop if we see this magic return value we adjust |
| 100 | + * the guest PC to wind it back to before the system call, and invoke |
| 101 | + * the guest signal handler as usual. |
| 102 | + * |
| 103 | + * This winding-back will happen in two cases: |
| 104 | + * (1) signal came in just before we took the host syscall (a race); |
| 105 | + * in this case we'll take the guest signal and have another go |
| 106 | + * at the syscall afterwards, and this is indistinguishable for the |
| 107 | + * guest from the timing having been different such that the guest |
| 108 | + * signal really did win the race |
| 109 | + * (2) signal came in while the host syscall was blocking, and the |
| 110 | + * host kernel decided the syscall should be restarted; |
| 111 | + * in this case we want to restart the guest syscall also, and so |
| 112 | + * rewinding is the right thing. (Note that "restart" semantics mean |
| 113 | + * "first call the signal handler, then reattempt the syscall".) |
| 114 | + * The other situation to consider is when a signal came in while the |
| 115 | + * host syscall was blocking, and the host kernel decided that the syscall |
| 116 | + * should not be restarted; in this case QEMU's host signal handler will |
| 117 | + * be invoked with the PC pointing just after the syscall instruction, |
| 118 | + * with registers indicating an EINTR return; the special code in the |
| 119 | + * handler will not kick in, and we will return EINTR to the guest as |
| 120 | + * we should. |
| 121 | + * |
| 122 | + * Notice that we can leave the host kernel to make the decision for |
| 123 | + * us about whether to do a restart of the syscall or not; we do not |
| 124 | + * need to check SA_RESTART flags in QEMU or distinguish the various |
| 125 | + * kinds of restartability. |
| 126 | + */ |
| 127 | +#ifdef HAVE_SAFE_SYSCALL |
| 128 | +/* The core part of this function is implemented in assembly */ |
| 129 | +extern long safe_syscall_base(int *pending, long number, ...); |
| 130 | + |
| 131 | +#define safe_syscall(...) \ |
| 132 | + ({ \ |
| 133 | + long ret_; \ |
| 134 | + int *psp_ = &((TaskState *)thread_cpu->opaque)->signal_pending; \ |
| 135 | + ret_ = safe_syscall_base(psp_, __VA_ARGS__); \ |
| 136 | + if (is_error(ret_)) { \ |
| 137 | + errno = -ret_; \ |
| 138 | + ret_ = -1; \ |
| 139 | + } \ |
| 140 | + ret_; \ |
| 141 | + }) |
| 142 | + |
| 143 | +#else |
| 144 | + |
| 145 | +/* |
| 146 | + * Fallback for architectures which don't yet provide a safe-syscall assembly |
| 147 | + * fragment; note that this is racy! |
| 148 | + * This should go away when all host architectures have been updated. |
| 149 | + */ |
| 150 | +#define safe_syscall syscall |
| 151 | + |
| 152 | +#endif |
| 153 | + |
| 154 | +#endif |
0 commit comments