Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 39 additions & 13 deletions Documentation/nvme-ocp-set-error-injection.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ SYNOPSIS
[verse]
'nvme ocp set-error-injection' <device> [--data=<file> | -d <file>]
[--number=<num> | -n <num>] [--no-uuid | -N]
[--type=<type> | -t <type>] [--nrtdp=<num> | -r <num>]
[--verbose | -v] [--output-format=<fmt> | -o <fmt>]
[--timeout=<timeout>]
[--all-ns | -a] [--type=<type> | -t <type>]
[--nrtdp=<num> | -r <num>] [--timeout=<timeout>]

DESCRIPTION
-----------
Expand Down Expand Up @@ -40,23 +39,18 @@ OPTIONS
Do not try to automatically detect UUID index for this command (required
for old OCP 1.0 support)

-a::
--all-ns::
Apply to all namespaces

-t <type>::
--type=<type>::
Error injection type
Error injection type (1-22: see NOTES section for valid types)

-r <num>::
--nrtdp=<num>::
Number of reads to trigger device panic

-v::
--verbose::
Increase the information detail in the output.

-o <fmt>::
--output-format=<fmt>::
Set the reporting format to 'normal', 'json' or 'binary'. Only one
output format can be used at a time.

--timeout=<timeout>::
Override default timeout value. In milliseconds.

Expand All @@ -74,6 +68,38 @@ EXAMPLES
# nvme ocp set-error-injection /dev/nvme0 -t 2 -r 5
------------

* Has the program issue a set-error-injection for all namespaces with type 1.
+
------------
# nvme ocp set-error-injection /dev/nvme0 -a -t 1
------------

NOTES
-----
Valid error injection types:
1 - CPU/controller hang
2 - NAND hang
3 - PLP defect
4 - Logical firmware error
5 - DRAM corruption critical path
6 - DRAM corruption non-critical path
7 - NAND corruption
8 - SRAM corruption
9 - HW malfunction
10 - No more NAND spares available
11 - Incomplete shutdown
12 - Metadata corruption
13 - Critical garbage collection
14 - Latency spike
15 - I/O command failure
16 - I/O command timeout
17 - Admin command failure
18 - Admin command timeout
19 - Thermal throttle engaged
20 - Thermal throttle disengaged
21 - Critical temperature event
22 - Die offline

NVME
----
Part of the nvme-user suite.
15 changes: 13 additions & 2 deletions plugins/ocp/ocp-nvme.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ struct ieee1667_get_cq_entry {

static const char *sel = "[0-3]: current/default/saved/supported";
static const char *no_uuid = "Skip UUID index search (UUID index not required for OCP 1.0)";
static const char *all_ns = "Apply to all namespaces";
const char *data = "Error injection data structure entries";
const char *number = "Number of valid error injection data entries";
static const char *type = "Error injection type";
Expand Down Expand Up @@ -2935,12 +2936,13 @@ static int get_error_injection(int argc, char **argv, struct command *cmd, struc
return error_injection_get(dev, cfg.sel, !argconfig_parse_seen(opts, "no-uuid"));
}

static int error_injection_set(struct nvme_dev *dev, struct erri_config *cfg, bool uuid)
static int error_injection_set(struct nvme_dev *dev, struct erri_config *cfg, bool uuid, __u32 nsid)
{
int err;
__u32 result;
struct nvme_set_features_args args = {
.args_size = sizeof(args),
.nsid = nsid,
.fd = dev_fd(dev),
.fid = OCP_FID_ERRI,
.cdw11 = cfg->number,
Expand Down Expand Up @@ -3010,6 +3012,7 @@ static int set_error_injection(int argc, char **argv, struct command *cmd, struc
{
const char *desc = "Inject error conditions";
int err;
__u32 nsid;
struct erri_config cfg = {
.number = 1,
};
Expand All @@ -3020,14 +3023,22 @@ static int set_error_injection(int argc, char **argv, struct command *cmd, struc
OPT_FILE("data", 'd', &cfg.file, data),
OPT_BYTE("number", 'n', &cfg.number, number),
OPT_FLAG("no-uuid", 'N', NULL, no_uuid),
OPT_FLAG("all-ns", 'a', NULL, all_ns),
OPT_SHRT("type", 't', &cfg.type, type),
OPT_SHRT("nrtdp", 'r', &cfg.nrtdp, nrtdp));

err = parse_and_open(&dev, argc, argv, desc, opts);
if (err)
return err;

return error_injection_set(dev, &cfg, !argconfig_parse_seen(opts, "no-uuid"));
/*
* Different spec versions ask for different nsid values
* OCP v1.0 - NSID: Shall be set to zero
* OCP v2.0r21 - NSID: Shall be set to FFFFFFFFh.
* OCP v2.5 - NSID: The host should either clear this to zero or set this to FFFFFFFFh
*/
nsid = argconfig_parse_seen(opts, "all-ns") ? NVME_NSID_ALL : 0;
return error_injection_set(dev, &cfg, !argconfig_parse_seen(opts, "no-uuid"), nsid);
}

static int enable_ieee1667_silo_get(struct nvme_dev *dev, const __u8 sel, bool uuid)
Expand Down