|
| 1 | +/************************************************************\ |
| 2 | + * Copyright 2025 Lawrence Livermore National Security, LLC |
| 3 | + * (c.f. AUTHORS, NOTICE.LLNS, COPYING) |
| 4 | + * |
| 5 | + * This file is part of the Flux resource manager framework. |
| 6 | + * For details, see https://github.com/flux-framework. |
| 7 | + * |
| 8 | + * SPDX-License-Identifier: LGPL-3.0 |
| 9 | +\************************************************************/ |
| 10 | + |
| 11 | +#if HAVE_CONFIG_H |
| 12 | +# include <config.h> |
| 13 | +#endif |
| 14 | +#include <unistd.h> |
| 15 | +#include <stdarg.h> |
| 16 | +#include <jansson.h> |
| 17 | +#include <time.h> |
| 18 | +#include <stdarg.h> |
| 19 | + |
| 20 | +#include "src/common/libeventlog/eventlog.h" |
| 21 | +#include "src/common/libkvs/treeobj.h" |
| 22 | +#include "src/common/libkvs/kvs_checkpoint.h" |
| 23 | +#include "src/common/libutil/fsd.h" |
| 24 | +#include "src/common/libutil/timestamp.h" |
| 25 | +#include "src/common/libutil/blobref.h" |
| 26 | +#include "src/common/libcontent/content.h" |
| 27 | +#include "ccan/str/str.h" |
| 28 | + |
| 29 | +#include "builtin.h" |
| 30 | + |
| 31 | +static void fsck_treeobj (flux_t *h, |
| 32 | + const char *path, |
| 33 | + json_t *treeobj); |
| 34 | + |
| 35 | +static bool verbose; |
| 36 | +static bool quiet; |
| 37 | +static int errorcount; |
| 38 | + |
| 39 | +static void read_verror (const char *fmt, va_list ap) |
| 40 | +{ |
| 41 | + char buf[128]; |
| 42 | + vsnprintf (buf, sizeof (buf), fmt, ap); |
| 43 | + fprintf (stderr, "%s\n", buf); |
| 44 | +} |
| 45 | + |
| 46 | +static __attribute__ ((format (printf, 1, 2))) |
| 47 | +void read_error (const char *fmt, ...) |
| 48 | +{ |
| 49 | + va_list ap; |
| 50 | + if (quiet) |
| 51 | + return; |
| 52 | + va_start (ap, fmt); |
| 53 | + read_verror (fmt, ap); |
| 54 | + va_end (ap); |
| 55 | +} |
| 56 | + |
| 57 | +static void fsck_valref (flux_t *h, |
| 58 | + const char *path, |
| 59 | + json_t *treeobj) |
| 60 | +{ |
| 61 | + int count = treeobj_get_count (treeobj); |
| 62 | + const void *buf; |
| 63 | + size_t buflen; |
| 64 | + |
| 65 | + for (int i = 0; i < count; i++) { |
| 66 | + flux_future_t *f; |
| 67 | + if (!(f = content_load_byblobref (h, |
| 68 | + treeobj_get_blobref (treeobj, i), |
| 69 | + CONTENT_FLAG_CACHE_BYPASS)) |
| 70 | + || content_load_get (f, &buf, &buflen) < 0) { |
| 71 | + if (errno == ENOENT) |
| 72 | + read_error ("%s: missing blobref index=%d", |
| 73 | + path, |
| 74 | + i); |
| 75 | + else |
| 76 | + read_error ("%s: error retrieving blobref index=%d: %s", |
| 77 | + path, |
| 78 | + i, |
| 79 | + future_strerror (f, errno)); |
| 80 | + errorcount++; |
| 81 | + flux_future_destroy (f); |
| 82 | + return; |
| 83 | + } |
| 84 | + flux_future_destroy (f); |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +static void fsck_val (flux_t *h, |
| 89 | + const char *path, |
| 90 | + json_t *treeobj) |
| 91 | +{ |
| 92 | + /* Do nothing for now */ |
| 93 | +} |
| 94 | + |
| 95 | +static void fsck_symlink (flux_t *h, |
| 96 | + const char *path, |
| 97 | + json_t *treeobj) |
| 98 | +{ |
| 99 | + /* Do nothing for now */ |
| 100 | +} |
| 101 | + |
| 102 | +static void fsck_dir (flux_t *h, |
| 103 | + const char *path, |
| 104 | + json_t *treeobj) |
| 105 | +{ |
| 106 | + json_t *dict = treeobj_get_data (treeobj); |
| 107 | + const char *name; |
| 108 | + json_t *entry; |
| 109 | + |
| 110 | + json_object_foreach (dict, name, entry) { |
| 111 | + char *newpath; |
| 112 | + if (asprintf (&newpath, "%s.%s", path, name) < 0) |
| 113 | + log_msg_exit ("out of memory"); |
| 114 | + fsck_treeobj (h, newpath, entry); // recurse |
| 115 | + free (newpath); |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +static void fsck_dirref (flux_t *h, |
| 120 | + const char *path, |
| 121 | + json_t *treeobj) |
| 122 | +{ |
| 123 | + flux_future_t *f = NULL; |
| 124 | + const void *buf; |
| 125 | + size_t buflen; |
| 126 | + json_t *treeobj_deref = NULL; |
| 127 | + int count; |
| 128 | + |
| 129 | + count = treeobj_get_count (treeobj); |
| 130 | + if (count != 1) { |
| 131 | + read_error ("%s: invalid dirref treeobj count=%d", |
| 132 | + path, |
| 133 | + count); |
| 134 | + errorcount++; |
| 135 | + return; |
| 136 | + } |
| 137 | + if (!(f = content_load_byblobref (h, |
| 138 | + treeobj_get_blobref (treeobj, 0), |
| 139 | + CONTENT_FLAG_CACHE_BYPASS)) |
| 140 | + || content_load_get (f, &buf, &buflen) < 0) { |
| 141 | + if (errno == ENOENT) |
| 142 | + read_error ("%s: missing dirref blobref", path); |
| 143 | + else |
| 144 | + read_error ("%s: error retrieving dirref blobref: %s", |
| 145 | + path, |
| 146 | + future_strerror (f, errno)); |
| 147 | + errorcount++; |
| 148 | + flux_future_destroy (f); |
| 149 | + return; |
| 150 | + } |
| 151 | + if (!(treeobj_deref = treeobj_decodeb (buf, buflen))) { |
| 152 | + read_error ("%s: could not decode directory", path); |
| 153 | + errorcount++; |
| 154 | + goto cleanup; |
| 155 | + } |
| 156 | + if (!treeobj_is_dir (treeobj_deref)) { |
| 157 | + read_error ("%s: dirref references non-directory", path); |
| 158 | + errorcount++; |
| 159 | + goto cleanup; |
| 160 | + } |
| 161 | + fsck_dir (h, path, treeobj_deref); // recurse |
| 162 | +cleanup: |
| 163 | + json_decref (treeobj_deref); |
| 164 | + flux_future_destroy (f); |
| 165 | +} |
| 166 | + |
| 167 | +static void fsck_treeobj (flux_t *h, |
| 168 | + const char *path, |
| 169 | + json_t *treeobj) |
| 170 | +{ |
| 171 | + if (treeobj_validate (treeobj) < 0) { |
| 172 | + read_error ("%s: invalid tree object", path); |
| 173 | + errorcount++; |
| 174 | + return; |
| 175 | + } |
| 176 | + if (treeobj_is_symlink (treeobj)) { |
| 177 | + if (verbose) |
| 178 | + fprintf (stderr, "%s\n", path); |
| 179 | + fsck_symlink (h, path, treeobj); |
| 180 | + } |
| 181 | + else if (treeobj_is_val (treeobj)) { |
| 182 | + if (verbose) |
| 183 | + fprintf (stderr, "%s\n", path); |
| 184 | + fsck_val (h, path, treeobj); |
| 185 | + } |
| 186 | + else if (treeobj_is_valref (treeobj)) { |
| 187 | + if (verbose) |
| 188 | + fprintf (stderr, "%s\n", path); |
| 189 | + fsck_valref (h, path, treeobj); |
| 190 | + } |
| 191 | + else if (treeobj_is_dirref (treeobj)) { |
| 192 | + fsck_dirref (h, path, treeobj); // recurse |
| 193 | + } |
| 194 | + else if (treeobj_is_dir (treeobj)) { |
| 195 | + fsck_dir (h, path, treeobj); // recurse |
| 196 | + } |
| 197 | +} |
| 198 | + |
| 199 | +static void fsck_blobref (flux_t *h, const char *blobref) |
| 200 | +{ |
| 201 | + flux_future_t *f; |
| 202 | + const void *buf; |
| 203 | + size_t buflen; |
| 204 | + json_t *treeobj; |
| 205 | + json_t *dict; |
| 206 | + const char *key; |
| 207 | + json_t *entry; |
| 208 | + |
| 209 | + if (!(f = content_load_byblobref (h, blobref, CONTENT_FLAG_CACHE_BYPASS)) |
| 210 | + || content_load_get (f, &buf, &buflen) < 0) { |
| 211 | + read_error ("cannot load root tree object: %s", |
| 212 | + future_strerror (f, errno)); |
| 213 | + errorcount++; |
| 214 | + flux_future_destroy (f); |
| 215 | + return; |
| 216 | + } |
| 217 | + if (!(treeobj = treeobj_decodeb (buf, buflen))) |
| 218 | + log_err_exit ("cannot decode root tree object"); |
| 219 | + if (treeobj_validate (treeobj) < 0) |
| 220 | + log_msg_exit ("invalid root tree object"); |
| 221 | + if (!treeobj_is_dir (treeobj)) |
| 222 | + log_msg_exit ("root tree object is not a directory"); |
| 223 | + |
| 224 | + dict = treeobj_get_data (treeobj); |
| 225 | + json_object_foreach (dict, key, entry) { |
| 226 | + fsck_treeobj (h, key, entry); |
| 227 | + } |
| 228 | + json_decref (treeobj); |
| 229 | + flux_future_destroy (f); |
| 230 | +} |
| 231 | + |
| 232 | +static int cmd_fsck (optparse_t *p, int ac, char *av[]) |
| 233 | +{ |
| 234 | + int optindex = optparse_option_index (p); |
| 235 | + flux_future_t *f; |
| 236 | + const char *blobref; |
| 237 | + double timestamp; |
| 238 | + flux_t *h; |
| 239 | + |
| 240 | + log_init ("flux-fsck"); |
| 241 | + |
| 242 | + if (optindex != ac) { |
| 243 | + optparse_print_usage (p); |
| 244 | + exit (1); |
| 245 | + } |
| 246 | + |
| 247 | + if (optparse_hasopt (p, "verbose")) |
| 248 | + verbose = true; |
| 249 | + if (optparse_hasopt (p, "quiet")) |
| 250 | + quiet = true; |
| 251 | + |
| 252 | + h = builtin_get_flux_handle (p); |
| 253 | + |
| 254 | + if (!(f = kvs_checkpoint_lookup (h, |
| 255 | + NULL, |
| 256 | + KVS_CHECKPOINT_FLAG_CACHE_BYPASS)) |
| 257 | + || kvs_checkpoint_lookup_get_timestamp (f, ×tamp) < 0 |
| 258 | + || kvs_checkpoint_lookup_get_rootref (f, &blobref) < 0) |
| 259 | + log_msg_exit ("error fetching checkpoint: %s", |
| 260 | + future_strerror (f, errno)); |
| 261 | + if (!quiet) { |
| 262 | + char buf[64] = ""; |
| 263 | + struct tm tm; |
| 264 | + if (!timestamp_from_double (timestamp, &tm, NULL)) |
| 265 | + strftime (buf, sizeof (buf), "%Y-%m-%dT%T", &tm); |
| 266 | + fprintf (stderr, |
| 267 | + "Checking integrity of checkpoint from %s\n", |
| 268 | + buf); |
| 269 | + } |
| 270 | + |
| 271 | + fsck_blobref (h, blobref); |
| 272 | + |
| 273 | + flux_future_destroy (f); |
| 274 | + |
| 275 | + flux_close (h); |
| 276 | + |
| 277 | + if (!quiet) |
| 278 | + fprintf (stderr, "Total errors: %d\n", errorcount); |
| 279 | + return (errorcount ? -1 : 0); |
| 280 | +} |
| 281 | + |
| 282 | +static struct optparse_option fsck_opts[] = { |
| 283 | + { .name = "verbose", .key = 'v', .has_arg = 0, |
| 284 | + .usage = "List keys as they are being validated", |
| 285 | + }, |
| 286 | + { .name = "quiet", .key = 'q', .has_arg = 0, |
| 287 | + .usage = "Don't output diagnostic messages and discovered errors", |
| 288 | + }, |
| 289 | + OPTPARSE_TABLE_END |
| 290 | +}; |
| 291 | + |
| 292 | +int subcommand_fsck_register (optparse_t *p) |
| 293 | +{ |
| 294 | + optparse_err_t e; |
| 295 | + e = optparse_reg_subcommand (p, |
| 296 | + "fsck", |
| 297 | + cmd_fsck, |
| 298 | + "[OPTIONS]", |
| 299 | + "check integrity of content store data", |
| 300 | + 0, |
| 301 | + fsck_opts); |
| 302 | + return (e == OPTPARSE_SUCCESS ? 0 : -1); |
| 303 | +} |
| 304 | + |
| 305 | +// vi: ts=4 sw=4 expandtab |
0 commit comments