diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index c2f646f2567d..dcdc45f1f07e 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -107,6 +107,7 @@ * zinject * zinject <-a | -u pool> * zinject -c + * zinject -w [-W ] * zinject -E [-a] [-m] [-f freq] [-l level] [-r range] * [-T iotype] [-t type object | -b bookmark pool] * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level] @@ -119,6 +120,11 @@ * The '-c' option will clear the given handler, or all handlers if 'all' is * specified. * + * The '-w' flag waits until an injection event occurs. Wait calls accept a + * state value to ensure no events are lost. Use '-w 0' initially and then pass + * the state value printed on stdout to subsequent wait calls. The optional + * '-W' flag sets an optional timeout in seconds. + * * The '-e' option takes a string describing the errno to simulate. This must * be one of 'io', 'checksum', 'decompress', or 'decrypt'. In most cases this * will result in the same behavior, but RAID-Z will produce a different set of @@ -297,6 +303,14 @@ usage(void) "\t\tClear the particular record (if given a numeric ID), or\n" "\t\tall records if 'all' is specified.\n" "\n" + "\tzinject -w [-W delay]\n" + "\n" + "\t\tWait for an injection event to occur. The 'state' parameter\n" + "\t\tshould be set to zero initially then the value printed to\n" + "\t\tstdout after each call to synchronize with kernel state.\n" + "\t\tThe optional timeout is specified in milliseconds.\n" + "\t\tWaits forever if timeout is omitted.\n" + "\n" "\tzinject -p pool\n" "\t\tInject a panic fault at the specified function. Only \n" "\t\tfunctions which call spa_vdev_config_exit(), or \n" @@ -938,6 +952,8 @@ main(int argc, char **argv) int flags = 0; uint32_t dvas = 0; hrtime_t ready_delay = -1; + char *wait = NULL; + hrtime_t wait_timeout = -1; if ((g_zfs = libzfs_init()) == NULL) { (void) fprintf(stderr, "%s\n", libzfs_error_init(errno)); @@ -968,7 +984,7 @@ main(int argc, char **argv) } while ((c = getopt(argc, argv, - ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:")) != -1) { + ":aA:b:C:d:D:E:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:P:w:W:")) != -1) { switch (c) { case 'a': flags |= ZINJECT_FLUSH_ARC; @@ -1141,6 +1157,9 @@ main(int argc, char **argv) case 'u': flags |= ZINJECT_UNLOAD_SPA; break; + case 'w': + wait = optarg; + break; case 'E': ready_delay = MSEC2NSEC(strtol(optarg, &end, 10)); if (ready_delay <= 0 || *end != '\0') { @@ -1163,6 +1182,16 @@ main(int argc, char **argv) return (1); } break; + case 'W': + wait_timeout = MSEC2NSEC(strtol(optarg, &end, 10)); + if (wait_timeout < 0 || *end != '\0') { + (void) fprintf(stderr, "invalid timeout '%s': " + "must be a non-negative integer\n", optarg); + usage(); + libzfs_fini(g_zfs); + return (1); + } + break; case ':': (void) fprintf(stderr, "option -%c requires an " "operand\n", optopt); @@ -1184,19 +1213,36 @@ main(int argc, char **argv) if (record.zi_duration != 0 && record.zi_cmd == 0) record.zi_cmd = ZINJECT_IGNORED_WRITES; - if (cancel != NULL) { - /* - * '-c' is invalid with any other options. - */ - if (raw != NULL || range != NULL || type != TYPE_INVAL || - level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || - record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) { + /* + * '-c' and '-w' are invalid with any other options. + */ + if (raw != NULL || range != NULL || type != TYPE_INVAL || + level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || + record.zi_freq > 0 || dvas != 0 || ready_delay >= 0) { + if (cancel != NULL) { (void) fprintf(stderr, "cancel (-c) incompatible with " "any other options\n"); usage(); libzfs_fini(g_zfs); return (2); } + if (wait != NULL) { + (void) fprintf(stderr, "wait (-w) incompatible with " + "any other options\n"); + usage(); + libzfs_fini(g_zfs); + return (2); + } + } + + if (cancel != NULL) { + if (wait != NULL) { + (void) fprintf(stderr, "cancel (-c) incompatible with " + "wait (-w) option\n"); + usage(); + libzfs_fini(g_zfs); + return (2); + } if (argc != 0) { (void) fprintf(stderr, "extraneous argument to '-c'\n"); usage(); @@ -1219,6 +1265,32 @@ main(int argc, char **argv) } } + if (wait != NULL) { + uint64_t state; + if (argc != 0) { + (void) fprintf(stderr, "extraneous argument to '-w'\n"); + usage(); + libzfs_fini(g_zfs); + return (2); + } + state = (uint64_t)strtoull(wait, &end, 10); + if (*end != 0) { + (void) fprintf(stderr, "invalid state '%s': " + "must be an unsigned integer\n", wait); + usage(); + libzfs_fini(g_zfs); + return (1); + } + error = lzc_wait_inject(&state, wait_timeout); + if (error == ETIMEDOUT) + (void) printf("wait timeout\n"); + else if (error != 0) + (void) printf("wait failed: %s\n", strerror(error)); + else + (void) printf("%"PRIu64"\n", state); + return (error == 0 ? 0 : 1); + } + if (device != NULL) { /* * Device (-d) injection uses a completely different mechanism diff --git a/include/libzfs_core.h b/include/libzfs_core.h index 231beaa69290..55470aabd9f1 100644 --- a/include/libzfs_core.h +++ b/include/libzfs_core.h @@ -165,6 +165,8 @@ _LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **); _LIBZFS_CORE_H int lzc_ddt_prune(const char *, zpool_ddt_prune_unit_t, uint64_t); +_LIBZFS_CORE_H int lzc_wait_inject(uint64_t *state, hrtime_t timeout); + #ifdef __cplusplus } #endif diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 662fd81c5ee1..92fcdc20b2f9 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -1566,6 +1566,7 @@ typedef enum zfs_ioc { ZFS_IOC_POOL_SCRUB, /* 0x5a57 */ ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */ ZFS_IOC_DDT_PRUNE, /* 0x5a59 */ + ZFS_IOC_WAIT_INJECT, /* 0x5a5a */ /* * Per-platform (Optional) - 8/128 numbers reserved. @@ -1826,6 +1827,12 @@ typedef enum { #define DDT_PRUNE_UNIT "ddt_prune_unit" #define DDT_PRUNE_AMOUNT "ddt_prune_amount" +/* + * The following are names used when invoking ZFS_IOC_WAIT_INJECT. + */ +#define WAIT_INJECT_STATE "state" +#define WAIT_INJECT_TIMEOUT "timeout" + /* * Flags for ZFS_IOC_VDEV_SET_STATE */ diff --git a/include/sys/zio.h b/include/sys/zio.h index acb0a03a36b2..33f4731d0d40 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -704,6 +704,7 @@ extern int zio_inject_fault(char *name, int flags, int *id, struct zinject_record *record); extern int zio_inject_list_next(int *id, char *name, size_t buflen, struct zinject_record *record); +extern int zio_inject_wait(uint64_t *state, hrtime_t timeout); extern int zio_clear_fault(int id); extern void zio_handle_panic_injection(spa_t *spa, const char *tag, uint64_t type); diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c index 9347aa7c6a28..2e21ce8c4e66 100644 --- a/lib/libzfs_core/libzfs_core.c +++ b/lib/libzfs_core/libzfs_core.c @@ -1994,3 +1994,29 @@ lzc_ddt_prune(const char *pool, zpool_ddt_prune_unit_t unit, uint64_t amount) return (error); } + +/* + * Wait for injection events. + */ +int +lzc_wait_inject(uint64_t *state, hrtime_t timeout) +{ + int error; + + nvlist_t *result = NULL; + nvlist_t *args = fnvlist_alloc(); + + if (timeout != 0) + fnvlist_add_uint64(args, WAIT_INJECT_STATE, *state); + if (timeout > 0) + VERIFY0(nvlist_add_hrtime(args, WAIT_INJECT_TIMEOUT, timeout)); + + error = lzc_ioctl(ZFS_IOC_WAIT_INJECT, NULL, args, &result); + if (error == 0) + *state = fnvlist_lookup_uint64(result, WAIT_INJECT_STATE); + + fnvlist_free(args); + fnvlist_free(result); + + return (error); +} diff --git a/man/man8/zinject.8 b/man/man8/zinject.8 index 704f6a7accd8..6dc90b7a3a9f 100644 --- a/man/man8/zinject.8 +++ b/man/man8/zinject.8 @@ -61,6 +61,21 @@ Cancel injection records. . .It Xo .Nm zinject +.Fl w Ar state Ns | Ns Sy 0 +.Op Fl W Ar delay +.Xc +Wait until an injection event occurs. +The +.Ar state +parameter synchronizes with kernel state and +should be set to 0 for first call which gets the current state value, +then the value printed to stdout after each wait. +.Fl W Ar delay +sets an optional timeout in milliseconds. +If omitted, waits forever. +. +.It Xo +.Nm zinject .Fl d Ar vdev .Fl A Sy degrade Ns | Ns Sy fault .Ar pool diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 5ca7c2320c4e..a1dda7269bf5 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -6094,6 +6094,43 @@ zfs_ioc_inject_list_next(zfs_cmd_t *zc) return (error); } +/* + * Waits for an injection event to occur (event injected or handler add/remove). + * innvl: { + * "state": wait state token + * "timeout": how long to wait in nanoseconds (ignored unless "state" given) + * } + * outnvl: { + * "state": wait state token + * } + * Returns 0, EINTR, or ETIMEDOUT. + */ +static const zfs_ioc_key_t zfs_keys_wait_inject[] = { + {"state", DATA_TYPE_UINT64, ZK_OPTIONAL}, + {"timeout", DATA_TYPE_HRTIME, ZK_OPTIONAL}, +}; + +static int +zfs_ioc_wait_inject(const char *name, nvlist_t *innvl, nvlist_t *outnvl) +{ + (void) name; + uint64_t state; + hrtime_t timeout; + int error; + + if (nvlist_lookup_uint64(innvl, "state", &state) != 0) { + state = 0; + timeout = 0; + } else if (nvlist_lookup_hrtime(innvl, "timeout", &timeout) != 0) { + timeout = TIME_MAX; + } + + error = zio_inject_wait(&state, timeout); + fnvlist_add_uint64(outnvl, "state", state); + + return (error); +} + static int zfs_ioc_error_log(zfs_cmd_t *zc) { @@ -7687,6 +7724,10 @@ zfs_ioctl_init(void) zfs_ioc_clear_fault, zfs_secpolicy_inject); zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT, zfs_ioc_inject_list_next, zfs_secpolicy_inject); + zfs_ioctl_register("wait_inject", ZFS_IOC_WAIT_INJECT, + zfs_ioc_wait_inject, zfs_secpolicy_inject, + NO_NAME, POOL_CHECK_NONE, B_FALSE, B_FALSE, + zfs_keys_wait_inject, ARRAY_SIZE(zfs_keys_wait_inject)); /* * pool destroy, and export don't log the history as part of diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index 287577018ed1..d89512fbe50b 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -103,6 +103,23 @@ static kmutex_t inject_delay_mtx; */ static int inject_next_id = 1; +/* + * Lock for inject_event_cv and inject_event_count. + */ +static kmutex_t inject_event_mtx; + +/* + * Number of injection events so far. + */ +static uint64_t inject_event_count; + +/* + * Broadcasts injection events to waiters. + */ +static kcondvar_t inject_event_cv; + +static void zio_inject_notify(void); + /* * Test if the requested frequency was triggered */ @@ -166,8 +183,10 @@ zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva, injected = freq_triggered(record->zi_freq); } - if (injected) + if (injected) { record->zi_inject_count++; + zio_inject_notify(); + } return (injected); } @@ -193,6 +212,7 @@ zio_handle_panic_injection(spa_t *spa, const char *tag, uint64_t type) strcmp(tag, handler->zi_record.zi_func) == 0) { handler->zi_record.zi_match_count++; handler->zi_record.zi_inject_count++; + zio_inject_notify(); panic("Panic requested in function %s\n", tag); } } @@ -354,6 +374,7 @@ zio_handle_label_injection(zio_t *zio, int error) (offset >= start && offset <= end)) { handler->zi_record.zi_match_count++; handler->zi_record.zi_inject_count++; + zio_inject_notify(); ret = error; break; } @@ -453,6 +474,7 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2) continue; handler->zi_record.zi_inject_count++; + zio_inject_notify(); /* * For a failed open, pretend like the device @@ -491,6 +513,7 @@ zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2) if (handler->zi_record.zi_error == ENXIO) { handler->zi_record.zi_match_count++; handler->zi_record.zi_inject_count++; + zio_inject_notify(); ret = SET_ERROR(EIO); break; } @@ -549,6 +572,7 @@ zio_handle_ignored_writes(zio_t *zio) /* Have a "problem" writing 60% of the time */ if (random_in_range(100) < 60) { handler->zi_record.zi_inject_count++; + zio_inject_notify(); zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; } break; @@ -576,6 +600,7 @@ spa_handle_ignored_writes(spa_t *spa) handler->zi_record.zi_match_count++; handler->zi_record.zi_inject_count++; + zio_inject_notify(); if (handler->zi_record.zi_duration > 0) { VERIFY(handler->zi_record.zi_timer == 0 || @@ -759,7 +784,7 @@ zio_handle_io_delay(zio_t *zio) min_handler->zi_record.zi_nlanes; min_handler->zi_record.zi_inject_count++; - + zio_inject_notify(); } mutex_exit(&inject_delay_mtx); @@ -787,6 +812,7 @@ zio_handle_pool_delay(spa_t *spa, hrtime_t elapsed, zinject_type_t command) SEC2NSEC(handler->zi_record.zi_duration); if (pause > elapsed) { handler->zi_record.zi_inject_count++; + zio_inject_notify(); delay = pause - elapsed; } id = handler->zi_id; @@ -1065,6 +1091,8 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record) list_insert_tail(&inject_handlers, handler); atomic_inc_32(&zio_injection_enabled); + zio_inject_notify(); + rw_exit(&inject_lock); } @@ -1122,6 +1150,52 @@ zio_inject_list_next(int *id, char *name, size_t buflen, return (ret); } +/* + * Waits on injection events (handler added, removed, or event injected). + * Callers set *state to 0 initially and should pass the updated value back to + * successive calls to ensure that no events are not lost between wait calls. + * If timeout != NULL, waits + * Returns 0 if an event occurred since *state was updated, EINTR if a signal is + * caught, or ETIMEDOUT if no event occurred within the timeout. + */ +int +zio_inject_wait(uint64_t *state, hrtime_t timeout) +{ + int error; + int rc; + + mutex_enter(&inject_event_mtx); + if (timeout > 0 && *state > inject_event_count) { + rc = cv_timedwait_sig_hires(&inject_event_cv, &inject_event_mtx, + timeout, USEC2NSEC(1), 0); + } else { + rc = 1; + } + *state = inject_event_count + 1; + mutex_exit(&inject_event_mtx); + + if (rc > 0) + error = 0; + else if (rc < 0) + error = ETIMEDOUT; + else + error = EINTR; + + return (error); +} + +/* + * Wakes up all calls to zio_inject_wait. + */ +static void +zio_inject_notify(void) +{ + mutex_enter(&inject_event_mtx); + ++inject_event_count; + cv_broadcast(&inject_event_cv); + mutex_exit(&inject_event_mtx); +} + /* * Clear the fault handler with the given identifier, or return ENOENT if none * exists. @@ -1152,6 +1226,8 @@ zio_clear_fault(int id) list_remove(&inject_handlers, handler); rw_exit(&inject_lock); + zio_inject_notify(); + if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) { ASSERT3P(handler->zi_lanes, !=, NULL); kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) * @@ -1176,6 +1252,8 @@ zio_inject_init(void) { rw_init(&inject_lock, NULL, RW_DEFAULT, NULL); mutex_init(&inject_delay_mtx, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&inject_event_mtx, NULL, MUTEX_DEFAULT, NULL); + cv_init(&inject_event_cv, NULL, CV_DEFAULT, NULL); list_create(&inject_handlers, sizeof (inject_handler_t), offsetof(inject_handler_t, zi_link)); } @@ -1186,6 +1264,8 @@ zio_inject_fini(void) list_destroy(&inject_handlers); mutex_destroy(&inject_delay_mtx); rw_destroy(&inject_lock); + mutex_destroy(&inject_event_mtx); + cv_destroy(&inject_event_cv); } #if defined(_KERNEL)