diff --git a/.gitignore b/.gitignore
index cee56d28d..4af822fca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ a.out
 /charliecloud-*/
 
 # debugging crap
+core
 /build-cache.gv
 /build-cache.pdf
 
diff --git a/bin/Makefile.am b/bin/Makefile.am
index 0b2b9a77e..d96ef4502 100644
--- a/bin/Makefile.am
+++ b/bin/Makefile.am
@@ -6,16 +6,18 @@
 
 bin_PROGRAMS = ch-checkns ch-run
 
-ch_checkns_SOURCES = ch-checkns.c ch_misc.h ch_misc.c
+ch_checkns_SOURCES = ch-checkns.c mem.h mem.c misc.h misc.c
 
-ch_run_SOURCES = ch-run.c ch_core.h ch_core.c ch_misc.h ch_misc.c
+ch_run_SOURCES = ch-run.c core.h core.c hook.h hook.c mem.h mem.c misc.h misc.c
+if HAVE_JSON
+ch_run_SOURCES += json.h json.c
+endif
 if HAVE_LIBSQUASHFUSE
-ch_run_SOURCES += ch_fuse.h ch_fuse.c
+ch_run_SOURCES += fuse.h fuse.c
+endif
+if HAVE_SECCOMP
+ch_run_SOURCES += seccomp.h seccomp.c
 endif
-
-# additional build flags for ch-run
-ch_run_CFLAGS = $(PTHREAD_CFLAGS)
-ch_run_LDADD = $(CH_RUN_LIBS)
 
 
 ## Shell scripts - distributed as-is
diff --git a/bin/ch-checkns.c b/bin/ch-checkns.c
index 10f26969a..6bc18134e 100644
--- a/bin/ch-checkns.c
+++ b/bin/ch-checkns.c
@@ -49,7 +49,7 @@
 #include <unistd.h>
 
 #include "config.h"
-#include "ch_misc.h"
+#include "misc.h"
 
 
 const char usage[] = "\
diff --git a/bin/ch-run.c b/bin/ch-run.c
index 774f02ed9..c8833b0b6 100644
--- a/bin/ch-run.c
+++ b/bin/ch-run.c
@@ -4,6 +4,8 @@
    are modest and the program is short-lived. */
 
 #define _GNU_SOURCE
+#include "config.h"
+
 #include <argp.h>
 #include <limits.h>
 #include <stdlib.h>
@@ -12,9 +14,54 @@
 #include <sys/mman.h>
 #include <unistd.h>
 
-#include "config.h"
-#include "ch_core.h"
-#include "ch_misc.h"
+#include "core.h"
+#include "hook.h"
+#ifdef HAVE_JSON
+#include "json.h"
+#endif
+#include "mem.h"
+#include "misc.h"
+#ifdef HAVE_SECCOMP
+#include "seccomp.h"
+#endif
+
+
+/** Types **/
+
+enum env_option_type {
+   ENV_END = 0,  // list terminator sentinel
+   ENV_SET,      // --set-env
+   ENV_SET0,     // --set-env0
+   ENV_UNSET,    // --unset-env
+   ENV_CDI_DEV,  // --device (specific device)
+   ENV_CDI_ALL,  // --devices (all known devices)
+};
+
+struct env_option {
+   enum env_option_type opt;
+   char *arg;
+};
+
+struct args {
+   struct container c;
+#ifdef HAVE_JSON
+   struct cdi_config cdi;
+#endif
+   struct env_option *env_options;
+   enum log_color_when log_color;
+   enum log_test log_test;
+   char *initial_dir;
+#ifdef HAVE_SECCOMP
+   bool seccomp_p;
+#endif
+   char *storage_dir;
+   bool unsafe;
+};
+
+struct log_color_synonym {
+   char *name;
+   enum log_color_when color;
+};
 
 
 /** Constants and macros **/
@@ -30,6 +77,20 @@ char *JOIN_TAG_ENV[] = { "SLURM_STEP_ID",
 /* Default overlaid tmpfs size. */
 char *WRITE_FAKE_DEFAULT = "12%";
 
+/* Log color WHEN synonyms. Note that no argument (i.e., bare --color) is
+   handled separately. */
+struct log_color_synonym log_color_synonyms[] = {
+   { "auto",    LL_COLOR_AUTO },
+   { "tty",     LL_COLOR_AUTO },
+   { "if-tty",  LL_COLOR_AUTO },
+   { "yes",     LL_COLOR_YES },
+   { "always",  LL_COLOR_YES },
+   { "force",   LL_COLOR_YES },
+   { "no",      LL_COLOR_NO },
+   { "never",   LL_COLOR_NO },
+   { "none",    LL_COLOR_NO },
+   { NULL,      LL_COLOR_NULL } };
+
 
 /** Command line options **/
 
@@ -49,9 +110,20 @@ const char args_doc[] = "IMAGE -- COMMAND [ARG...]";
 /* Note: Long option numbers, once issued, are permanent; i.e., if you remove
    one, don’t re-number the others. */
 const struct argp_option options[] = {
+   { "abort-fatal",   -21, 0,      0,
+     "exit abnormally on error, maybe dumping core" },
    { "bind",          'b', "SRC[:DST]", 0,
      "mount SRC at guest DST (default: same as SRC)"},
    { "cd",            'c', "DIR",  0, "initial working directory in container"},
+#ifdef HAVE_JSON
+   { "cdi-dirs",      -19, "DIRS", 0, "director(y|ies) containing CDI specs" },
+#endif
+   { "color",         -20, "WHEN", OPTION_ARG_OPTIONAL,
+                           "specify when to use colored logging" },
+#ifdef HAVE_JSON
+   { "device",        -18, "DEV",  0, "inject CDI device(s) DEV (repeatable)" },
+   { "devices",       'd', 0,      0, "inject default CDI devices" },
+#endif
    { "env-no-expand", -10, 0,      0, "don't expand $ in --set-env input"},
    { "feature",       -11, "FEAT", 0, "exit successfully if FEAT is enabled" },
    { "gid",           'g', "GID",  0, "run as GID within container" },
@@ -87,33 +159,19 @@ const struct argp_option options[] = {
 };
 
 
-/** Types **/
-
-struct args {
-   struct container c;
-   struct env_delta *env_deltas;
-   char *initial_dir;
-#ifdef HAVE_SECCOMP
-   bool seccomp_p;
-#endif
-   char *storage_dir;
-   bool unsafe;
-};
-
-
 /** Function prototypes **/
 
-void fix_environment(struct args *args);
 bool get_first_env(char **array, char **name, char **value);
+void hooks_env_install(struct args *args);
 void img_directory_verify(const char *img_path, const struct args *args);
 int join_ct(int cli_ct);
 char *join_tag(char *cli_tag);
+void parse_env(struct env_option **opts, enum env_option_type opt, char *arg);
 int parse_int(char *s, bool extra_ok, char *error_tag);
 static error_t parse_opt(int key, char *arg, struct argp_state *state);
-void parse_set_env(struct args *args, char *arg, int delim);
 void privs_verify_invoking();
 char *storage_default(void);
-extern void warnings_reprint(void);
+void write_fake_enable(struct args *args, char *overlay_size);
 
 
 /** Global variables **/
@@ -138,12 +196,15 @@ int main(int argc, char *argv[])
    T_ (warnings != MAP_FAILED);
 
    privs_verify_invoking();
+   ch_memory_init();
 
+   // note: exit functions not called on fatal error if --abort-on-fatal
+   Z_ (atexit(ch_memory_exit));
    Z_ (atexit(warnings_reprint));
 
 #ifdef ENABLE_SYSLOG
-   syslog(LOG_USER|LOG_INFO, "uid=%u args=%d: %s", getuid(), argc,
-          argv_to_string(argv));
+   syslog(SYSLOG_PRI, "uid=%u args=%d: %s",
+          getuid(), argc, argv_to_string(argv));
 #endif
 
    username = getenv("USER");
@@ -151,29 +212,41 @@ int main(int argc, char *argv[])
 
    verbose = LL_INFO;  // in ch_misc.c
    args = (struct args){
-      .c = (struct container){ .binds = list_new(sizeof(struct bind), 0),
-                               .container_gid = getegid(),
-                               .container_uid = geteuid(),
-                               .env_expand = true,
-                               .host_home = NULL,
-                               .img_ref = NULL,
-                               .newroot = NULL,
-                               .join = false,
-                               .join_ct = 0,
-                               .join_pid = 0,
-                               .join_tag = NULL,
-                               .overlay_size = NULL,
-                               .private_passwd = false,
-                               .private_tmp = false,
-                               .type = IMG_NONE,
-                               .writable = false },
-      .env_deltas = list_new(sizeof(struct env_delta), 0),
-      .initial_dir = NULL,
-#ifdef HAVE_SECCOMP
-      .seccomp_p = false,
+      .c = (struct container){
+         .binds = list_new(sizeof(struct bind), 0),
+         .container_gid = getegid(),
+         .container_uid = geteuid(),
+         .env_expand = true,
+         .hooks_prestart = list_new(sizeof(struct hook), 0),
+         .host_home = NULL,
+         .img_ref = NULL,
+         .ldconfigs = list_new(sizeof(char *), 0),
+         .newroot = NULL,
+         .join = false,
+         .join_ct = 0,
+         .join_pid = 0,
+         .join_tag = NULL,
+         .overlay_size = NULL,
+         .private_passwd = false,
+         .private_tmp = false,
+         .type = IMG_NONE,
+         .writable = false
+      },
+#ifdef HAVE_JSON
+      .cdi = (struct cdi_config){
+         .spec_dirs = list_new_strings(':', env_get("CH_RUN_CDI_DIRS",
+                                                    "/etc/cdi:/var/run/cdi")),
+         .devs_all_p = false,
+         .devids = list_new(sizeof(char *), 0),
+      },
 #endif
+      .env_options = list_new(sizeof(struct env_option), 0),
+      .initial_dir = NULL,
+      .log_color = LL_COLOR_AUTO,
+      .log_test = LL_TEST_NONE,
       .storage_dir = storage_default(),
-      .unsafe = false };
+      .unsafe = false
+   };
 
    /* I couldn't find a way to set argp help defaults other than this
       environment variable. Kludge sets/unsets only if not already set. */
@@ -187,8 +260,15 @@ int main(int argc, char *argv[])
    if (!argp_help_fmt_set)
       Z_ (unsetenv("ARGP_HELP_FMT"));
 
+   logging_init(args.log_color, args.log_test);
+   ch_memory_log("init");
+#ifdef HAVE_JSON
+   json_init();
+#endif
+
+
    if (arg_next >= argc - 1) {
-      printf("usage: ch-run [OPTION...] IMAGE -- COMMAND [ARG...]\n");
+      fprintf(stderr, "usage: ch-run [OPTION...] IMAGE -- COMMAND [ARG...]\n");
       FATAL("IMAGE and/or COMMAND not specified");
    }
    args.c.img_ref = argv[arg_next++];
@@ -223,15 +303,12 @@ int main(int argc, char *argv[])
       args.c.join_tag = join_tag(args.c.join_tag);
    }
 
-   if (getenv("TMPDIR") != NULL)
-      host_tmp = getenv("TMPDIR");
-   else
-      host_tmp = "/tmp";
-
    c_argv = list_new(sizeof(char *), argc - arg_next);
    for (int i = 0; i < argc - arg_next; i++)
       c_argv[i] = argv[i + arg_next];
 
+   host_tmp = env_get("TMPDIR", "/tmp");  // global in misc.c
+
    VERBOSE("verbosity: %d", verbose);
    VERBOSE("image: %s", args.c.img_ref);
    VERBOSE("storage: %s", args.storage_dir);
@@ -240,91 +317,137 @@ int main(int argc, char *argv[])
    VERBOSE("container gid: %u", args.c.container_gid);
    VERBOSE("join: %d %d %s %d", args.c.join, args.c.join_ct, args.c.join_tag,
            args.c.join_pid);
+   VERBOSE("host $TMPDIR: %s", host_tmp);
    VERBOSE("private /tmp: %d", args.c.private_tmp);
 #ifdef HAVE_SECCOMP
-   VERBOSE("seccomp: %d", args.seccomp_p);
+   VERBOSE("seccomp: %s", bool_to_string(args.seccomp_p));
 #endif
-   VERBOSE("unsafe: %d", args.unsafe);
+   VERBOSE("unsafe: %s", bool_to_string(args.unsafe));
 
-   containerize(&args.c);
-   fix_environment(&args);
-#ifdef HAVE_SECCOMP
-   if (args.seccomp_p)
-      seccomp_install();
+#ifdef HAVE_JSON
+   cdi_init(&args.cdi);
 #endif
-   run_user_command(c_argv, args.initial_dir); // should never return
+   hooks_env_install(&args);
+   //cdi_hook_ldconfig_install(&args.c.hook_prestart, &args.cdi);
+
+   containerize(&args.c);
+   run_user_command(c_argv, args.initial_dir);  // should never return
    exit(EXIT_FAILURE);
 }
 
 
 /** Supporting functions **/
 
-/* Adjust environment variables. Call once containerized, i.e., already
-   pivoted into new root. */
-void fix_environment(struct args *args)
+/* Find the first environment variable in array that is set; put its name in
+   *name and its value in *value, and return true. If none are set, return
+   false, and *name and *value are undefined. */
+bool get_first_env(char **array, char **name, char **value)
 {
-   char *old_value, *new_value;
+   for (int i = 0; array[i] != NULL; i++) {
+      *name = array[i];
+      *value = getenv(*name);
+      if (*value != NULL)
+         return true;
+   }
+
+   return false;
+}
+
+/* Set the default environment variables that come before the user-specified
+   environment changes. d must be NULL. */
+void hook_envs_def_first(struct container *c, void *d)
+{
+   char *vold;
+   T_ (d == NULL);
 
    // $HOME: If --home, set to “/home/$USER”.
-   if (args->c.host_home) {
-      Z_ (setenv("HOME", cat("/home/", username), 1));
-   } else if (path_exists("/root", NULL, true)) {
-      Z_ (setenv("HOME", "/root", 1));
-   } else
-      Z_ (setenv("HOME", "/", 1));
+   if (c->host_home)
+      env_set("HOME", cat("/home/", username), false);
+   else if (path_exists("/root", NULL, true))
+      env_set("HOME", "/root", false);
+   else
+      env_set("HOME", "/", false);
 
    // $PATH: Append /bin if not already present.
-   old_value = getenv("PATH");
-   if (old_value == NULL) {
+   vold = getenv("PATH");
+   if (vold == NULL)
       WARNING("$PATH not set");
-   } else if (   strstr(old_value, "/bin") != old_value
-              && !strstr(old_value, ":/bin")) {
-      T_ (1 <= asprintf(&new_value, "%s:/bin", old_value));
-      Z_ (setenv("PATH", new_value, 1));
-      VERBOSE("new $PATH: %s", new_value);
-   }
+   else if (strstr(vold, "/bin") != vold && !strstr(vold, ":/bin"))
+      env_set("PATH", cat(vold, ":/bin"), false);
 
    // $TMPDIR: Unset.
    Z_ (unsetenv("TMPDIR"));
+}
 
-   // --set-env and --unset-env.
-   for (size_t i = 0; args->env_deltas[i].action != ENV_END; i++) {
-      struct env_delta ed = args->env_deltas[i];
-      switch (ed.action) {
-      case ENV_END:
-         Te (false, "unreachable code reached");
+/* Set the default environment variables that come after the user-specified
+   changes. d must be NULL. */
+void hook_envs_def_last(struct container *c, void *d)
+{
+   T_ (d == NULL);
+   env_set("CH_RUNNING", "Weird Al Yankovic", false);
+}
+
+/* Install pre-start hooks for environment variable changes. */
+void hooks_env_install(struct args *args)
+{
+   hook_add(&args->c.hooks_prestart, HOOK_DUP_FAIL,
+            "env-def-first", hook_envs_def_first, NULL);
+
+   for (int i = 0; args->env_options[i].opt != ENV_END; i++) {
+      char *name;
+      hookf_t *f;
+      void *d;
+      enum env_option_type opt = args->env_options[i].opt;
+      char *arg = args->env_options[i].arg;
+
+      switch (opt) {
+      case ENV_SET:
+      case ENV_SET0:
+         int delim = ENV_SET ? '\n' : '\0';
+         if (args == NULL) {                 // guest path; defer file read
+            struct env_file *ef;
+            name = "env-set-gfile";
+            f = hook_envs_set_file;
+            ef = ch_malloc(sizeof(struct env_file), true);
+            ef->path = arg;
+            ef->delim = delim;
+            ef->expand = args->c.env_expand;
+            d = ef;
+         } else {
+            f = hook_envs_set;
+            if (strchr(arg, '=') == NULL) {  // host path; read file now
+               name = "env-set-hfile";
+               d = env_file_read(arg, delim);
+            } else {                         // direct set
+               name = "env-set-direct";
+               d = list_new(sizeof(struct env_var), 1);
+               ((struct env_var *)d)[0] = env_var_parse(arg, NULL, 0);
+            }
+         }
          break;
-      case ENV_SET_DEFAULT:
-         ed.arg.vars = env_file_read("/ch/environment", ed.arg.delim);
-         // fall through
-      case ENV_SET_VARS:
-         for (size_t j = 0; ed.arg.vars[j].name != NULL; j++)
-            env_set(ed.arg.vars[j].name, ed.arg.vars[j].value,
-                    args->c.env_expand);
+      case ENV_UNSET:
+         name = "env-unset";
+         f = hook_envs_unset;
+         d = arg;
          break;
-      case ENV_UNSET_GLOB:
-         env_unset(ed.arg.glob);
+      case ENV_CDI_DEV:
+         name = "env-set-cdi";
+         f = hook_envs_set;
+         d = cdi_envs_get(arg);
+         break;
+      case ENV_CDI_ALL:
+         name = "env-set-cdi-all";
+         f = hook_envs_set;
+         d = cdi_envs_get(NULL);
+      case ENV_END:
+         T_ (false);  // unreachable
          break;
       }
+      hook_add(&args->c.hooks_prestart, HOOK_DUP_OK, name, f, d);
    }
 
-   // $CH_RUNNING is not affected by --unset-env or --set-env.
-   Z_ (setenv("CH_RUNNING", "Weird Al Yankovic", 1));
-}
-
-/* Find the first environment variable in array that is set; put its name in
-   *name and its value in *value, and return true. If none are set, return
-   false, and *name and *value are undefined. */
-bool get_first_env(char **array, char **name, char **value)
-{
-   for (int i = 0; array[i] != NULL; i++) {
-      *name = array[i];
-      *value = getenv(*name);
-      if (*value != NULL)
-         return true;
-   }
-
-   return false;
+   hook_add(&args->c.hooks_prestart, HOOK_DUP_FAIL,
+            "env-def-last", hook_envs_def_last, NULL);
 }
 
 /* Validate that it’s OK to run the IMG_DIRECTORY format image at path; if
@@ -380,7 +503,7 @@ char *join_tag(char *cli_tag)
    }
 
    VERBOSE("join: peer group tag from getppid(2)");
-   T_ (1 <= asprintf(&tag, "%d", getppid()));
+   tag = ch_asprintf("%d", getppid());
 
 end:
    Te(tag[0] != '\0', "join: peer group tag cannot be empty string");
@@ -425,15 +548,11 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state)
       args->c.join_pid = parse_int(arg, false, "--join-pid");
       break;
    case -6: // --set-env
-      parse_set_env(args, arg, '\n');
-      break;
-   case -7: { // --unset-env
-        struct env_delta ed;
-        Te (strlen(arg) > 0, "--unset-env: GLOB must have non-zero length");
-        ed.action = ENV_UNSET_GLOB;
-        ed.arg.glob = arg;
-        list_append((void **)&(args->env_deltas), &ed, sizeof(ed));
-      } break;
+      parse_env(&args->env_options, ENV_SET, arg);
+      break;
+   case -7: // --unset-env
+      parse_env(&args->env_options, ENV_UNSET, arg);
+      break;
    case -9: // --no-passwd
       args->c.private_passwd = true;
       break;
@@ -487,11 +606,12 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state)
       break;
 #ifdef HAVE_SECCOMP
    case -14: // --seccomp
-      args->seccomp_p = true;
+      hook_add(&args->c.hooks_prestart, HOOK_DUP_SKIP,
+               "seccomp", hook_seccomp_install, NULL);
       break;
 #endif
    case -15: // --set-env0
-      parse_set_env(args, arg, '\0');
+      parse_env(&args->env_options, ENV_SET0, arg);
       break;
    case -16: // --warnings
       for (int i = 1; i <= parse_int(arg, false, "--warnings"); i++)
@@ -500,36 +620,77 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state)
       break;
    case -17: // --test
       if (!strcmp(arg, "log"))
-         test_logging(false);
+         args->log_test = LL_TEST_YES;
       else if (!strcmp(arg, "log-fail"))
-         test_logging(true);
+         args->log_test = LL_TEST_FATAL;
       else
          FATAL("invalid --test argument: %s; see source code", arg);
       break;
+#ifdef HAVE_JSON
+   case -18: { // --device
+         struct env_option ope;
+         Te (strlen(arg) > 0, "--device: DEV must be non-empty");
+         write_fake_enable(args, NULL);
+         list_append((void **)&args->cdi.devids, &arg, sizeof(arg));
+         ope.opt = ENV_CDI_DEV;
+         ope.arg = arg;
+         list_append((void **)&args->env_options, &ope, sizeof(ope));
+      } break;
+   case -19: // --cdi-dirs
+      Te (strlen(arg) > 0, "--cdi-dirs: PATHS must be non-empty");
+      args->cdi.spec_dirs = list_new_strings(':', arg);
+      break;
+#endif
+   case -20: // --color
+      if (arg == NULL)
+         args->log_color = LL_COLOR_AUTO;
+      args->log_color = LL_COLOR_NULL;
+      for (int i = 0; true; i++) {
+         if (log_color_synonyms[i].name == NULL)
+            break;
+         if (!strcmp(arg, log_color_synonyms[i].name)) {
+            args->log_color = log_color_synonyms[i].color;
+            break;
+         }
+      }
+      Tf (args->log_color != LL_COLOR_NULL, "--color: invalid arg: %s", arg);
+      break;
+   case -21: // --abort-fatal
+      abort_fatal = true;  // in misc.c
+      break;
    case 'b': {  // --bind
-         char *src, *dst;
-         for (i = 0; args->c.binds[i].src != NULL; i++) // count existing binds
-            ;
-         T_ (args->c.binds = realloc(args->c.binds,
-                                     (i+2) * sizeof(struct bind)));
-         args->c.binds[i+1].src = NULL;                 // terminating zero
-         args->c.binds[i].dep = BD_MAKE_DST;
-         // source
-         src = strsep(&arg, ":");
-         T_ (src != NULL);
-         Te (src[0] != 0, "--bind: no source provided");
-         args->c.binds[i].src = src;
-         // destination
-         dst = arg ? arg : src;
-         Te (dst[0] != 0, "--bind: no destination provided");
-         Te (strcmp(dst, "/"), "--bind: destination can't be /");
-         Te (dst[0] == '/', "--bind: destination must be absolute");
-         args->c.binds[i].dst = dst;
+        char *src, *dst;
+        i = list_count(args->c.binds, sizeof(args->c.binds[0]));
+        args->c.binds = ch_realloc(args->c.binds, (i+2) * sizeof(struct bind),
+                                   true);
+        memset(&args->c.binds[i+1], 0, sizeof(args->c.binds[0]));  // terminate
+        args->c.binds[i].dep = BD_MAKE_DST;
+        // source
+        src = strsep(&arg, ":");
+        T_ (src != NULL);
+        Te (src[0] != 0, "--bind: no source provided");
+        args->c.binds[i].src = src;
+        // destination
+        dst = arg ? arg : src;
+        Te (dst[0] != 0, "--bind: no destination provided");
+        Te (strcmp(dst, "/"), "--bind: destination can't be /");
+        Te (dst[0] == '/', "--bind: destination must be absolute");
+        args->c.binds[i].dst = dst;
       }
       break;
    case 'c':  // --cd
       args->initial_dir = arg;
       break;
+#ifdef HAVE_JSON
+   case 'd': {  // --devices
+      // Can’t add the devices here b/c we don’t know the CDI spec dirs yet.
+      struct env_option ope;
+      args->cdi.devs_all_p = true;
+      ope.opt = ENV_CDI_ALL;
+      ope.arg = NULL;
+      list_append((void **)&args->env_options, &ope, sizeof(ope));
+      } break;
+#endif
    case 'g':  // --gid
       i = parse_int(arg, false, "--gid");
       Te (i >= 0, "--gid: must be non-negative");
@@ -573,7 +734,7 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state)
       args->c.writable = true;
       break;
    case 'W':  // --write-fake
-      args->c.overlay_size = arg != NULL ? arg : WRITE_FAKE_DEFAULT;
+      write_fake_enable(args, arg);
       break;
    case ARGP_KEY_NO_ARGS:
       argp_state_help(state, stderr, (  ARGP_HELP_SHORT_USAGE
@@ -583,31 +744,20 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state)
       exit(EXIT_FAILURE);
    default:
       return ARGP_ERR_UNKNOWN;
-   };
+   }
 
    return 0;
 }
 
-void parse_set_env(struct args *args, char *arg, int delim)
+void parse_env(struct env_option **opts, enum env_option_type opt, char *arg)
 {
-   struct env_delta ed;
-
-   if (arg == NULL) {
-      ed.action = ENV_SET_DEFAULT;
-      ed.arg.delim = delim;
-   } else {
-      ed.action = ENV_SET_VARS;
-      if (strchr(arg, '=') == NULL)
-         ed.arg.vars = env_file_read(arg, delim);
-      else {
-         ed.arg.vars = list_new(sizeof(struct env_var), 1);
-         ed.arg.vars[0] = env_var_parse(arg, NULL, 0);
-      }
-   }
-   list_append((void **)&(args->env_deltas), &ed, sizeof(ed));
+   struct env_option eo = (struct env_option){ .opt = opt,
+                                               .arg = arg };
+   Te (arg == NULL || strlen(arg) > 0,
+       "environment options: argument must have non-zero length");
+   list_append((void **)opts, &eo, sizeof(eo));
 }
 
-
 /* Validate that the UIDs and GIDs are appropriate for program start, and
    abort if not.
 
@@ -636,13 +786,28 @@ void privs_verify_invoking()
    T_ (euid == ruid && euid == suid);        // no setuid or funny business
 }
 
-/* Return path to the storage directory, if -s is not specified. */
+/* Return default path to the storage directory. */
 char *storage_default(void)
 {
    char *storage = getenv("CH_IMAGE_STORAGE");
 
    if (storage == NULL)
-      T_ (1 <= asprintf(&storage, "/var/tmp/%s.ch", username));
+      storage = ch_asprintf("/var/tmp/%s.ch", username);
 
    return storage;
 }
+
+/* Enable the overlay if not already enabled. */
+void write_fake_enable(struct args *args, char *overlay_size)
+{
+   if (overlay_size != NULL) {
+      // new overlay size specified: use it regardless of previous enablement
+      args->c.overlay_size = overlay_size;
+   } else if (args->c.overlay_size == NULL) {
+      // no new size, not yet enabled: enable with default size
+      args->c.overlay_size = WRITE_FAKE_DEFAULT;
+   } else {
+      // no new size, already enabled: keep existing size, nothing to do
+      T_ (args->c.overlay_size != NULL);
+   }
+}
diff --git a/bin/ch_core.c b/bin/core.c
similarity index 57%
rename from bin/ch_core.c
rename to bin/core.c
index 3850dbfa2..41c01fe50 100644
--- a/bin/ch_core.c
+++ b/bin/core.c
@@ -3,22 +3,12 @@
 #define _GNU_SOURCE
 #include "config.h"
 
-#include <fcntl.h>
 #include <grp.h>
-#include <libgen.h>
-#ifdef HAVE_SECCOMP
-#include <linux/audit.h>
-#include <linux/filter.h>
-#include <linux/seccomp.h>
-#endif
 #include <pwd.h>
 #include <sched.h>
 #include <semaphore.h>
+#include <stdbool.h>
 #include <stdio.h>
-#ifdef HAVE_SECCOMP
-#include <stddef.h>
-#include <stdint.h>
-#endif
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
@@ -29,10 +19,11 @@
 #include <time.h>
 #include <unistd.h>
 
-#include "ch_misc.h"
-#include "ch_core.h"
+#include "mem.h"
+#include "misc.h"
+#include "core.h"
 #ifdef HAVE_LIBSQUASHFUSE
-#include "ch_fuse.h"
+#include "fuse.h"
 #endif
 
 
@@ -88,92 +79,6 @@ struct bind BINDS_DEFAULT[] = {
    { 0 }
 };
 
-/* Special values for seccomp tables. These must be negative to avoid clashing
-   with real syscall numbers (note zero is often a valid syscal number). */
-#define NR_NON -1  // syscall does not exist on architecture
-#define NR_END -2  // end of table
-
-/* Architectures that we support for seccomp. Order matches the
-   corresponding table below.
-
-   Note: On some distros (e.g., CentOS 7), some of the architecture numbers
-   are missing. The workaround is to use the numbers I have on Debian
-   Bullseye. The reason I (Reid) feel moderately comfortable doing this is how
-   militant Linux is about not changing the userspace API. */
-#ifdef HAVE_SECCOMP
-#ifndef AUDIT_ARCH_AARCH64
-#define AUDIT_ARCH_AARCH64 0xC00000B7u  // undeclared on CentOS 7
-#undef  AUDIT_ARCH_ARM                  // uses undeclared EM_ARM on CentOS 7
-#define AUDIT_ARCH_ARM     0x40000028u
-#endif
-int SECCOMP_ARCHS[] = { AUDIT_ARCH_AARCH64,   // arm64
-                        AUDIT_ARCH_ARM,       // arm32
-                        AUDIT_ARCH_I386,      // x86 (32-bit)
-                        AUDIT_ARCH_PPC64LE,   // PPC
-                        AUDIT_ARCH_S390X,     // s390x
-                        AUDIT_ARCH_X86_64,    // x86-64
-                        NR_END };
-#endif
-
-/* System call numbers that we fake with seccomp (by doing nothing and
-   returning success). Some processors can execute multiple architectures
-   (e.g., 64-bit Intel CPUs can run both x64-64 and x86 code), and a process’
-   architecture can even change (if you execve(2) binary of different
-   architecture), so we can’t just use the build host’s architecture.
-
-   I haven’t figured out how to gather these system call numbers
-   automatically, so they are compiled from [1, 2, 3]. See also [4] for a more
-   general reference.
-
-   NOTE: The total number of faked syscalls (i.e., non-zero entries below)
-   must be somewhat less than 256. I haven’t computed the exact limit. There
-   will be an assertion failure at runtime if this is exceeded.
-
-   WARNING: Keep this list consistent with the ch-image(1) man page!
-
-   [1]: https://chromium.googlesource.com/chromiumos/docs/+/HEAD/constants/syscalls.md#Cross_arch-Numbers
-   [2]: https://github.com/strace/strace/blob/v4.26/linux/powerpc64/syscallent.h
-   [3]: https://github.com/strace/strace/blob/v6.6/src/linux/s390x/syscallent.h
-   [4]: https://unix.stackexchange.com/questions/421750 */
-#ifdef HAVE_SECCOMP
-int FAKE_SYSCALL_NRS[][6] = {
-   // arm64   arm32   x86     PPC64   s390x   x86-64
-   // ------  ------  ------  ------  ------  ------
-   {      91,    185,    185,    184,    185,    126 },  // capset
-   {  NR_NON,    182,    182,    181,    212,     92 },  // chown
-   {  NR_NON,    212,    212, NR_NON, NR_NON, NR_NON },  // chown32
-   {      55,     95,     95,     95,    207,     93 },  // fchown
-   {  NR_NON,    207,    207, NR_NON, NR_NON, NR_NON },  // fchown32
-   {      54,    325,    298,    289,    291,    260 },  // fchownat
-   {  NR_NON,     16,     16,     16,    198,     94 },  // lchown
-   {  NR_NON,    198,    198, NR_NON, NR_NON, NR_NON },  // lchown32
-   {     104,    347,    283,    268,    277,    246 },  // kexec_load
-   {     152,    139,    139,    139,    216,    123 },  // setfsgid
-   {  NR_NON,    216,    216, NR_NON, NR_NON, NR_NON },  // setfsgid32
-   {     151,    138,    138,    138,    215,    122 },  // setfsuid
-   {  NR_NON,    215,    215, NR_NON, NR_NON, NR_NON },  // setfsuid32
-   {     144,     46,     46,     46,    214,    106 },  // setgid
-   {  NR_NON,    214,    214, NR_NON, NR_NON, NR_NON },  // setgid32
-   {     159,     81,     81,     81,    206,    116 },  // setgroups
-   {  NR_NON,    206,    206, NR_NON, NR_NON, NR_NON },  // setgroups32
-   {     143,     71,     71,     71,    204,    114 },  // setregid
-   {  NR_NON,    204,    204, NR_NON, NR_NON, NR_NON },  // setregid32
-   {     149,    170,    170,    169,    210,    119 },  // setresgid
-   {  NR_NON,    210,    210, NR_NON, NR_NON, NR_NON },  // setresgid32
-   {     147,    164,    164,    164,    208,    117 },  // setresuid
-   {  NR_NON,    208,    208, NR_NON, NR_NON, NR_NON },  // setresuid32
-   {     145,     70,     70,     70,    203,    113 },  // setreuid
-   {  NR_NON,    203,    203, NR_NON, NR_NON, NR_NON },  // setreuid32
-   {     146,     23,     23,     23,    213,    105 },  // setuid
-   {  NR_NON,    213,    213, NR_NON, NR_NON, NR_NON },  // setuid32
-   { NR_END }, // end
-};
-int FAKE_MKNOD_NRS[] =
-   {  NR_NON,     14,     14,     14,     14,    133 };
-int FAKE_MKNODAT_NRS[] =
-   {      33,    324,    297,    288,    290,    259 };
-#endif
-
 
 /** Global variables **/
 
@@ -199,19 +104,16 @@ void bind_mount(const char *src, const char *dst, enum bind_dep,
                 const char *newroot, unsigned long flags, const char *scratch);
 void bind_mounts(const struct bind *binds, const char *newroot,
                  unsigned long flags, const char * scratch);
-void enter_udss(struct container *c);
-#ifdef HAVE_SECCOMP
-void iw(struct sock_fprog *p, int i,
-        uint16_t op, uint32_t k, uint8_t jt, uint8_t jf);
-#endif
 void join_begin(const char *join_tag);
-void join_namespace(pid_t pid, const char *ns);
-void join_namespaces(pid_t pid);
 void join_end(int join_ct);
-void sem_timedwait_relative(sem_t *sem, int timeout);
-void setup_namespaces(const struct container *c, uid_t uid_out, uid_t uid_in,
+void mounts_setup(struct container *c);
+void namespace_join(pid_t pid, const char *ns);
+void namespaces_join(pid_t pid);
+void namespaces_setup(const struct container *c, uid_t uid_out, uid_t uid_in,
                       gid_t gid_out, gid_t gid_in);
-void setup_passwd(const struct container *c);
+void passwd_setup(const struct container *c);
+void pivot(struct container *c);
+void sem_timedwait_relative(sem_t *sem, int timeout);
 void tmpfs_mount(const char *dst, const char *newroot, const char *data);
 
 
@@ -268,7 +170,7 @@ void bind_mounts(const struct bind *binds, const char *newroot,
 void containerize(struct container *c)
 {
    if (c->join_pid) {
-      join_namespaces(c->join_pid);
+      namespaces_join(c->join_pid);
       return;
    }
    if (c->join)
@@ -278,111 +180,72 @@ void containerize(struct container *c)
       // fusermount3 non-setuid, and the inner so we get the desired UID
       // within the container. We do this even if the image is a directory, to
       // reduce the number of code paths.
-      setup_namespaces(c, geteuid(), 0, getegid(), 0);
+      namespaces_setup(c, geteuid(), 0, getegid(), 0);
 #ifdef HAVE_LIBSQUASHFUSE
       if (c->type == IMG_SQUASH)
          sq_fork(c);
 #endif
-      setup_namespaces(c, 0, c->container_uid, 0, c->container_gid);
-      enter_udss(c);
+      namespaces_setup(c, 0, c->container_uid, 0, c->container_gid);
+      mounts_setup(c);
+      VERBOSE("prestart hooks: %d", list_count(c->hooks_prestart,
+                                               sizeof(struct hook)));
+      hooks_run(c, &c->hooks_prestart);
+      pivot(c);
    } else
-      join_namespaces(join.shared->winner_pid);
+      namespaces_join(join.shared->winner_pid);
    if (c->join)
       join_end(c->join_ct);
 
 }
 
-/* Enter the new root (UDSS). On entry, the namespaces are set up, and this
-   does the mounting and filesystem setup.
+/* Append hook function f to hook_list. When called, the hook will be passed
+   d; this lets hooks receive arbitrary arguments (i.e., it’s a poor person’s
+   closure). hook_list must be a member of c.
+
+   “dup” says what to do if a hook with the same name is already in the list:
 
-   Note that pivot_root(2) requires a complex dance to work, i.e., to avoid
-   multiple undocumented error conditions. This dance is explained in detail
-   in bin/ch-checkns.c. */
-void enter_udss(struct container *c)
+      HOOK_DUP_OK    add the hook anyway
+      HOOK_DUP_SKIP  silently do nothing (i.e., don’t add the hook)
+      HOOK_DUP_FAIL  fatal error  */
+void hook_add(struct hook **hook_list, enum hook_dup dup,
+              const char *name, hookf_t *f, void *d)
 {
-   char *nr_parent, *nr_base, *mkdir_scratch;
+   // FIXME: hooks: environment variables, seccomp, CDI
 
-   LOG_IDS;
-   mkdir_scratch = NULL;
-   path_split(c->newroot, &nr_parent, &nr_base);
+   struct hook h;
 
-   // Claim new root for this namespace. Despite MS_REC in bind_mount(), we do
-   // need both calls to avoid pivot_root(2) failing with EBUSY later.
-   DEBUG("claiming new root for this namespace")
-   bind_mount(c->newroot, c->newroot, BD_REQUIRED, "/", MS_PRIVATE, NULL);
-   bind_mount(nr_parent, nr_parent, BD_REQUIRED, "/", MS_PRIVATE, NULL);
-   // Re-mount new root read-only unless --write or already read-only.
-   if (!c->writable && !(access(c->newroot, W_OK) == -1 && errno == EROFS)) {
-      unsigned long flags =   path_mount_flags(c->newroot)
-                            | MS_REMOUNT  // Re-mount ...
-                            | MS_BIND     // only this mount point ...
-                            | MS_RDONLY;  // read-only.
-      Z_ (mount(NULL, c->newroot, NULL, flags, NULL));
-   }
-   // Overlay a tmpfs if --write-fake. See for useful details:
-   // https://www.kernel.org/doc/html/v5.11/filesystems/tmpfs.html
-   // https://www.kernel.org/doc/html/v5.11/filesystems/overlayfs.html
-   if (c->overlay_size != NULL) {
-      char *options;
-      struct stat st;
-      VERBOSE("overlaying tmpfs for --write-fake (%s)", c->overlay_size);
-      T_ (1 <= asprintf(&options, "size=%s", c->overlay_size));
-      Zf (mount(NULL, WF_MNT, "tmpfs", 0, options),
-          "cannot mount tmpfs for overlay");
-      free(options);
-      Z_ (mkdir(WF_MNT "/upper", 0700));
-      Z_ (mkdir(WF_MNT "/work", 0700));
-      Z_ (mkdir(WF_MNT "/merged", 0700));
-      mkdir_scratch = WF_MNT "/mkdir_overmount";
-      Z_ (mkdir(mkdir_scratch, 0700));
-      T_ (1 <= asprintf(&options, ("lowerdir=%s,upperdir=%s,workdir=%s,"
-                                   "index=on,userxattr,volatile"),
-                        c->newroot, WF_MNT "/upper", WF_MNT "/work"));
-      // update newroot
-      Zf (stat(c->newroot, &st),
-          "can't stat new root; overmounted by tmpfs for -W?: %s", c->newroot);
-      c->newroot = WF_MNT "/merged";
-      free(nr_parent);
-      free(nr_base);
-      path_split(c->newroot, &nr_parent, &nr_base);
-      Zf (mount(NULL, c->newroot, "overlay", 0, options),
-          "can't overlay: %s, %s", c->newroot, options);
-      VERBOSE("newroot updated: %s", c->newroot);
-      free(options);
-   }
-   DEBUG("starting bind-mounts");
-   // Bind-mount default files and directories.
-   bind_mounts(BINDS_DEFAULT, c->newroot, MS_RDONLY, NULL);
-   // /etc/passwd and /etc/group.
-   if (!c->private_passwd)
-      setup_passwd(c);
-   // Container /tmp.
-   if (c->private_tmp) {
-      tmpfs_mount("/tmp", c->newroot, NULL);
-   } else {
-      bind_mount(host_tmp, "/tmp", BD_REQUIRED, c->newroot, 0, NULL);
+   if (dup == HOOK_DUP_SKIP || dup == HOOK_DUP_FAIL) {
+      bool dup_found = false;
+      for (int i = 0; (*hook_list)[i].name != NULL; i++)
+         if (!strcmp((*hook_list)[i].name, name)) {
+            dup_found = true;
+            break;
+         }
+      if (dup_found) {
+         Te (dup == HOOK_DUP_SKIP, "invalid duplicate hook: %s", name);
+         return;  // skip adding hook
+      }
    }
-   // Bind-mount user’s home directory at /home/$USER if requested.
-   if (c->host_home) {
-      T_ (c->overlay_size != NULL);
-      bind_mount(c->host_home, cat("/home/", username),
-                 BD_MAKE_DST, c->newroot, 0, mkdir_scratch);
+
+   h.name = name;
+   h.f = f;
+   h.data = d;
+
+   list_append((void **)hook_list, &h, sizeof(h));
+}
+
+/* Run hooks in hook_list, passing c, then set *hook_list to NULL. hook_list
+   must be a member of c. */
+void hooks_run(struct container *c, struct hook **hook_list)
+{
+   int hook_ct = list_count(*hook_list, sizeof((*hook_list)[0]));
+   for (int i = 0; i < hook_ct; i++) {
+      struct hook h = (*hook_list)[i];
+      DEBUG("calling hook %d/%d: %s", i+1, hook_ct, h.name);
+      h.f(c, h.data);
    }
-   // Bind-mount user-specified directories.
-   bind_mounts(c->binds, c->newroot, 0, mkdir_scratch);
-   // Overmount / to avoid EINVAL if it’s a rootfs.
-   Z_ (chdir(nr_parent));
-   Z_ (mount(nr_parent, "/", NULL, MS_MOVE, NULL));
-   Z_ (chroot("."));
-   // Pivot into the new root. Use /dev because it’s available even in
-   // extremely minimal images.
-   c->newroot = cat("/", nr_base);
-   Zf (chdir(c->newroot), "can't chdir into new root");
-   Zf (syscall(SYS_pivot_root, c->newroot, path_join(c->newroot, "dev")),
-       "can't pivot_root(2)");
-   Zf (chroot("."), "can't chroot(2) into new root");
-   Zf (umount2("/dev", MNT_DETACH), "can't umount old root");
-   DEBUG("pivot_root(2) dance successful")
+
+   *hook_list = NULL;
 }
 
 /* Return image type of path, or exit with error if not a valid type. */
@@ -426,27 +289,13 @@ enum img_type image_type(const char *ref, const char *storage_dir)
 
 char *img_name2path(const char *name, const char *storage_dir)
 {
-   char *path;
-   char *name_fs = strdup(name);
+   char *name_fs = ch_strdup(name);
 
    replace_char(name_fs, '/', '%');
    replace_char(name_fs, ':', '+');
 
-   T_ (1 <= asprintf(&path, "%s/img/%s", storage_dir, name_fs));
-
-   free(name_fs);  // make Tim happy
-   return path;
-}
-
-/* Helper function to write seccomp-bpf programs. */
-#ifdef HAVE_SECCOMP
-void iw(struct sock_fprog *p, int i,
-        uint16_t op, uint32_t k, uint8_t jt, uint8_t jf)
-{
-   p->filter[i] = (struct sock_filter){ op, jt, jf, k };
-   DEBUG("%4d: { op=%2x k=%8x jt=%3d jf=%3d }", i, op, k, jt, jf);
+   return path_join(storage_dir, path_join("img", name_fs));
 }
-#endif
 
 /* Begin coordinated section of namespace joining. */
 void join_begin(const char *join_tag)
@@ -515,13 +364,84 @@ void join_end(int join_ct)
    VERBOSE("join: done");
 }
 
+/* Set up the container filesystem tree. Namespaces must already be done. */
+void mounts_setup(struct container *c)
+{
+   char *nr_parent, *mkdir_scratch;
+
+   VERBOSE("creating container filesystem tree");
+   LOG_IDS;
+   mkdir_scratch = NULL;
+   path_split(c->newroot, &nr_parent, NULL);
+
+   // Claim new root for this namespace. Despite MS_REC in bind_mount(), we do
+   // need both calls to avoid pivot_root(2) failing with EBUSY later.
+   DEBUG("claiming new root for this namespace");
+   bind_mount(c->newroot, c->newroot, BD_REQUIRED, "/", MS_PRIVATE, NULL);
+   bind_mount(nr_parent, nr_parent, BD_REQUIRED, "/", MS_PRIVATE, NULL);
+   // Re-mount new root read-only unless --write or already read-only.
+   if (!c->writable && !(access(c->newroot, W_OK) == -1 && errno == EROFS)) {
+      unsigned long flags =   path_mount_flags(c->newroot)
+                            | MS_REMOUNT  // Re-mount ...
+                            | MS_BIND     // only this mount point ...
+                            | MS_RDONLY;  // read-only.
+      Z_ (mount(NULL, c->newroot, NULL, flags, NULL));
+   }
+   // Overlay a tmpfs if --write-fake. See for useful details:
+   // https://www.kernel.org/doc/html/v5.11/filesystems/tmpfs.html
+   // https://www.kernel.org/doc/html/v5.11/filesystems/overlayfs.html
+   if (c->overlay_size != NULL) {
+      char *options;
+      struct stat st;
+      VERBOSE("overlaying tmpfs for --write-fake (%s)", c->overlay_size);
+      options = cat("size=", c->overlay_size);
+      Zf (mount(NULL, WF_MNT, "tmpfs", 0, options),
+          "cannot mount tmpfs for overlay");
+      Z_ (mkdir(WF_MNT "/upper", 0700));
+      Z_ (mkdir(WF_MNT "/work", 0700));
+      Z_ (mkdir(WF_MNT "/merged", 0700));
+      mkdir_scratch = WF_MNT "/mkdir_overmount";
+      Z_ (mkdir(mkdir_scratch, 0700));
+      options = ch_asprintf(("lowerdir=%s,upperdir=%s,workdir=%s,"
+                             "index=on,userxattr,volatile"),
+                            c->newroot, WF_MNT "/upper", WF_MNT "/work");
+      // update newroot
+      Zf (stat(c->newroot, &st),
+          "can't stat new root; overmounted by tmpfs for -W?: %s", c->newroot);
+      c->newroot = WF_MNT "/merged";
+      Zf (mount(NULL, c->newroot, "overlay", 0, options),
+          "can't overlay: %s, %s", c->newroot, options);
+      VERBOSE("newroot updated: %s", c->newroot);
+   }
+   DEBUG("starting bind-mounts");
+   // Bind-mount default files and directories.
+   bind_mounts(BINDS_DEFAULT, c->newroot, MS_RDONLY, NULL);
+   // /etc/passwd and /etc/group.
+   if (!c->private_passwd)
+      passwd_setup(c);
+   // Container /tmp.
+   if (c->private_tmp) {
+      tmpfs_mount("/tmp", c->newroot, NULL);
+   } else {
+      bind_mount(host_tmp, "/tmp", BD_REQUIRED, c->newroot, 0, NULL);
+   }
+   // Bind-mount user’s home directory at /home/$USER if requested.
+   if (c->host_home) {
+      T_ (c->overlay_size != NULL);
+      bind_mount(c->host_home, cat("/home/", username),
+                 BD_MAKE_DST, c->newroot, 0, mkdir_scratch);
+   }
+   // Bind-mount user-specified directories.
+   bind_mounts(c->binds, c->newroot, 0, mkdir_scratch);
+}
+
 /* Join a specific namespace. */
-void join_namespace(pid_t pid, const char *ns)
+void namespace_join(pid_t pid, const char *ns)
 {
    char *path;
    int fd;
 
-   T_ (1 <= asprintf(&path, "/proc/%d/ns/%s", pid, ns));
+   path = ch_asprintf("/proc/%d/ns/%s", pid, ns);
    fd = open(path, O_RDONLY);
    if (fd == -1) {
       if (errno == ENOENT) {
@@ -543,197 +463,23 @@ void join_namespace(pid_t pid, const char *ns)
       }
 }
 
-/* Join the existing namespaces created by the join winner. */
-void join_namespaces(pid_t pid)
+/* Join the existing namespaces containing process pid, which could be the
+   join winner or another process. */
+void namespaces_join(pid_t pid)
 {
    VERBOSE("joining namespaces of pid %d", pid);
-   join_namespace(pid, "user");
-   join_namespace(pid, "mnt");
-}
-
-/* Replace the current process with user command and arguments. */
-void run_user_command(char *argv[], const char *initial_dir)
-{
-   LOG_IDS;
-
-   if (initial_dir != NULL)
-      Zf (chdir(initial_dir), "can't cd to %s", initial_dir);
-
-   VERBOSE("executing: %s", argv_to_string(argv));
-
-   Zf (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), "can't set no_new_privs");
-   if (verbose < LL_INFO)
-      T_ (freopen("/dev/null", "w", stdout));
-   if (verbose < LL_STDERR)
-      T_ (freopen("/dev/null", "w", stderr));
-   execvp(argv[0], argv);  // only returns if error
-   Tf (0, "can't execve(2): %s", argv[0]);
-}
-
-/* Set up the fake-syscall seccomp(2) filter. This computes and installs a
-   long-ish but fairly simple BPF program to implement the filter. To
-   understand this rather hairy language:
-
-     1. https://man7.org/training/download/secisol_seccomp_slides.pdf
-     2. https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html
-     3. https://elixir.bootlin.com/linux/latest/source/samples/seccomp */
-#ifdef HAVE_SECCOMP
-void seccomp_install(void)
-{
-   int arch_ct = sizeof(SECCOMP_ARCHS)/sizeof(SECCOMP_ARCHS[0]) - 1;
-   int syscall_cts[arch_ct];
-   struct sock_fprog p = { 0 };
-   int ii, idx_allow, idx_fake, idx_mknod, idx_mknodat, idx_next_arch;
-   // Lengths of certain instruction groups. These are all obtained manually
-   // by counting below, violating DRY. We could automate these counts, but it
-   // seemed like the cost of extra buffers and code to do that would exceed
-   // that of maintaining the manual counts.
-   int ct_jump_start = 4;  // ld arch & syscall nr, arch test, end-of-arch jump
-   int ct_mknod_jump = 2;  // jump table handling for mknod(2) and mknodat(2)
-   int ct_mknod = 2;       // mknod(2) handling
-   int ct_mknodat = 6;     // mknodat(2) handling
-
-   // Count how many syscalls we are going to fake in the standard way. We
-   // need this to compute the right offsets for all the jumps.
-   for (int ai = 0; SECCOMP_ARCHS[ai] != NR_END; ai++) {
-      p.len += ct_jump_start + ct_mknod_jump;
-      syscall_cts[ai] = 0;
-      for (int si = 0; FAKE_SYSCALL_NRS[si][0] != NR_END; si++) {
-         bool syscall_p = FAKE_SYSCALL_NRS[si][ai] != NR_NON;
-         syscall_cts[ai] += syscall_p;
-         p.len += syscall_p;  // syscall jump table entry
-      }
-      DEBUG("seccomp: arch %x: found %d syscalls",
-            SECCOMP_ARCHS[ai], syscall_cts[ai]);
-   }
-
-   // Initialize program buffer.
-   p.len += (  1             // return allow
-             + 1             // return fake success
-             + ct_mknod      // mknod(2) handling
-             + ct_mknodat);  // mknodat(2) handling
-   DEBUG("seccomp(2) program has %d instructions", p.len);
-   T_ (p.filter = calloc(p.len, sizeof(struct sock_filter)));
-
-   // Return call addresses. Allow needs to come first because we’ll jump to
-   // it for unknown architectures.
-   idx_allow =   p.len - 2 - ct_mknod - ct_mknodat;
-   idx_fake =    p.len - 1 - ct_mknod - ct_mknodat;
-   idx_mknod =   p.len     - ct_mknod - ct_mknodat;
-   idx_mknodat = p.len                - ct_mknodat;
-
-   // Build a jump table for each architecture. The gist is: if architecture
-   // matches, fall through into the jump table, otherwise jump to the next
-   // architecture (or ALLOW for the last architecture).
-   ii = 0;
-   idx_next_arch = -1;  // avoid warning on some compilers
-   for (int ai = 0; SECCOMP_ARCHS[ai] != NR_END; ai++) {
-      int jump;
-      idx_next_arch = ii + syscall_cts[ai] + ct_jump_start + ct_mknod_jump;
-      // load arch into accumulator
-      iw(&p, ii++, BPF_LD|BPF_W|BPF_ABS,
-         offsetof(struct seccomp_data, arch), 0, 0);
-      // jump to next arch if arch doesn't match
-      jump = idx_next_arch - ii - 1;
-      T_ (jump <= 255);
-      iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, SECCOMP_ARCHS[ai], 0, jump);
-      // load syscall number into accumulator
-      iw(&p, ii++, BPF_LD|BPF_W|BPF_ABS,
-         offsetof(struct seccomp_data, nr), 0, 0);
-      // jump table of syscalls
-      for (int si = 0; FAKE_SYSCALL_NRS[si][0] != NR_END; si++) {
-         int nr = FAKE_SYSCALL_NRS[si][ai];
-         if (nr != NR_NON) {
-            jump = idx_fake - ii - 1;
-            T_ (jump <= 255);
-            iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, nr, jump, 0);
-         }
-      }
-      // jump to mknod(2) handling (add even if syscall not implemented to
-      // make the instruction counts simpler)
-      jump = idx_mknod - ii - 1;
-      T_ (jump <= 255);
-      iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, FAKE_MKNOD_NRS[ai], jump, 0);
-      // jump to mknodat(2) handling
-      jump = idx_mknodat - ii - 1;
-      T_ (jump <= 255);
-      iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, FAKE_MKNODAT_NRS[ai], jump, 0);
-      // unfiltered syscall, jump to allow (limit of 255 doesn’t apply to JA)
-      jump = idx_allow - ii - 1;
-      iw(&p, ii++, BPF_JMP|BPF_JA, jump, 0, 0);
-   }
-   T_ (idx_next_arch == idx_allow);
-
-   // Returns. (Note that if we wanted a non-zero errno, we’d bitwise-or with
-   // SECCOMP_RET_ERRNO. But because fake success is errno == 0, we don’t need
-   // a no-op “| 0”.)
-   T_ (ii == idx_allow);
-   iw(&p, ii++, BPF_RET|BPF_K, SECCOMP_RET_ALLOW, 0, 0);
-   T_ (ii == idx_fake);
-   iw(&p, ii++, BPF_RET|BPF_K, SECCOMP_RET_ERRNO, 0, 0);
-
-   // mknod(2) handling. This just loads the file mode and jumps to the right
-   // place in the mknodat(2) handling.
-   T_ (ii == idx_mknod);
-   // load mode argument into accumulator
-   iw(&p, ii++, BPF_LD|BPF_W|BPF_ABS,
-                offsetof(struct seccomp_data, args[1]), 0, 0);
-   // jump to mode test
-   iw(&p, ii++, BPF_JMP|BPF_JA, 1, 0, 0);
-
-   // mknodat(2) handling.
-   T_ (ii == idx_mknodat);
-   // load mode argument into accumulator
-   iw(&p, ii++, BPF_LD|BPF_W|BPF_ABS,
-                offsetof(struct seccomp_data, args[2]), 0, 0);
-   // jump to fake return if trying to create a device.
-   iw(&p, ii++, BPF_ALU|BPF_AND|BPF_K, S_IFMT, 0, 0);   // file type only
-   iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, S_IFCHR, 2, 0);
-   iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, S_IFBLK, 1, 0);
-   // returns
-   iw(&p, ii++, BPF_RET|BPF_K, SECCOMP_RET_ALLOW, 0, 0);
-   iw(&p, ii++, BPF_RET|BPF_K, SECCOMP_RET_ERRNO, 0, 0);
-
-   // Install filter. Use prctl(2) rather than seccomp(2) for slightly greater
-   // compatibility (Linux 3.5 rather than 3.17) and because there is a glibc
-   // wrapper.
-   T_ (ii == p.len);  // next instruction now one past the end of the buffer
-   Z_ (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &p));
-   DEBUG("note: see FAQ to disassemble the above")
-
-   // Test filter. This will fail if the kernel executes the call (because we
-   // are not really privileged and the arguments are bogus) or succeed if
-   // filter handles it. We selected it over something more naturally in the
-   // filter, e.g. setuid(2), because (1) no container process should ever use
-   // it and (2) it’s unlikely to be emulated by a smarter filter in the
-   // future, i.e., it won’t silently start doing something.
-   Zf (syscall(SYS_kexec_load, 0, 0, NULL, 0),
-       "seccomp root emulation failed (is your architecture supported?)");
-}
-#endif
-
-/* Wait for semaphore sem for up to timeout seconds. If timeout or an error,
-   exit unsuccessfully. */
-void sem_timedwait_relative(sem_t *sem, int timeout)
-{
-   struct timespec deadline;
-
-   // sem_timedwait() requires a deadline rather than a timeout.
-   Z_ (clock_gettime(CLOCK_REALTIME, &deadline));
-   deadline.tv_sec += timeout;
-
-   if (sem_timedwait(sem, &deadline)) {
-      Ze (errno == ETIMEDOUT, "timeout waiting for join lock");
-      Tf (0, "failure waiting for join lock");
-   }
+   namespace_join(pid, "user");
+   namespace_join(pid, "mnt");
 }
 
 /* Activate the desired isolation namespaces. */
-void setup_namespaces(const struct container *c, uid_t uid_out, uid_t uid_in,
+void namespaces_setup(const struct container *c, uid_t uid_out, uid_t uid_in,
                       gid_t gid_out, gid_t gid_in)
 {
    int fd;
 
+   VERBOSE("setting up namespaces: %d:%d -> %d:%d",
+           uid_out, gid_out, uid_in, gid_in);
    LOG_IDS;
    Zf (unshare(CLONE_NEWNS|CLONE_NEWUSER), "can't init user+mount namespaces");
    LOG_IDS;
@@ -776,7 +522,7 @@ void setup_namespaces(const struct container *c, uid_t uid_out, uid_t uid_in,
    see issue #212. After bind-mounting, we remove the files from the host;
    they persist inside the container and then disappear completely when the
    container exits. */
-void setup_passwd(const struct container *c)
+void passwd_setup(const struct container *c)
 {
    int fd;
    char *path;
@@ -784,7 +530,7 @@ void setup_passwd(const struct container *c)
    struct passwd *p;
 
    // /etc/passwd
-   T_ (path = cat(host_tmp, "/ch-run_passwd.XXXXXX"));
+   path = cat(host_tmp, "/ch-run_passwd.XXXXXX");
    T_ (-1 != (fd = mkstemp(path)));  // mkstemp(3) writes path
    if (c->container_uid != 0)
       T_ (1 <= dprintf(fd, "root:x:0:0:root:/root:/bin/sh\n"));
@@ -809,7 +555,7 @@ void setup_passwd(const struct container *c)
    Z_ (unlink(path));
 
    // /etc/group
-   T_ (path = cat(host_tmp, "/ch-run_group.XXXXXX"));
+   path = cat(host_tmp, "/ch-run_group.XXXXXX");
    T_ (-1 != (fd = mkstemp(path)));
    if (c->container_gid != 0)
       T_ (1 <= dprintf(fd, "root:x:0:\n"));
@@ -832,6 +578,66 @@ void setup_passwd(const struct container *c)
    Z_ (unlink(path));
 }
 
+/* Pivot into the container. Note that pivot_root(2) requires a complex dance
+   to work, i.e., to avoid multiple undocumented error conditions. This dance
+   is explained in detail in bin/ch-checkns.c. */
+void pivot(struct container *c)
+{
+   char *nr_parent, *nr_base;
+
+   VERBOSE("pivoting into container");
+   path_split(c->newroot, &nr_parent, &nr_base);
+
+   // Overmount / to avoid EINVAL if it’s a rootfs.
+   Z_ (chdir(nr_parent));
+   Z_ (mount(nr_parent, "/", NULL, MS_MOVE, NULL));
+   Z_ (chroot("."));
+   // Pivot into the new root. Use /dev because it’s available even in
+   // extremely minimal images.
+   c->newroot = cat("/", nr_base);
+   Zf (chdir(c->newroot), "can't chdir into new root");
+   Zf (syscall(SYS_pivot_root, c->newroot, path_join(c->newroot, "dev")),
+       "can't pivot_root(2)");
+   Zf (chroot("."), "can't chroot(2) into new root");
+   Zf (umount2("/dev", MNT_DETACH), "can't umount old root");
+}
+
+/* Replace the current process with user command and arguments. */
+void run_user_command(char *argv[], const char *initial_dir)
+{
+   LOG_IDS;
+
+   if (initial_dir != NULL)
+      Zf (chdir(initial_dir), "can't cd to %s", initial_dir);
+
+   VERBOSE("executing: %s", argv_to_string(argv));
+
+   Zf (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), "can't set no_new_privs");
+   if (verbose < LL_INFO)
+      T_ (freopen("/dev/null", "w", stdout));
+   if (verbose < LL_STDERR)
+      T_ (freopen("/dev/null", "w", stderr));
+   ch_memory_log("usrx");
+   execvp(argv[0], argv);  // only returns if error
+   Tf (0, "can't execve(2): %s", argv[0]);
+}
+
+/* Wait for semaphore sem for up to timeout seconds. If timeout or an error,
+   exit unsuccessfully. */
+void sem_timedwait_relative(sem_t *sem, int timeout)
+{
+   struct timespec deadline;
+
+   // sem_timedwait() requires a deadline rather than a timeout.
+   Z_ (clock_gettime(CLOCK_REALTIME, &deadline));
+   deadline.tv_sec += timeout;
+
+   if (sem_timedwait(sem, &deadline)) {
+      Ze (errno == ETIMEDOUT, "timeout waiting for join lock");
+      Tf (0, "failure waiting for join lock");
+   }
+}
+
 /* Mount a tmpfs at the given path. */
 void tmpfs_mount(const char *dst, const char *newroot, const char *data)
 {
diff --git a/bin/ch_core.h b/bin/core.h
similarity index 67%
rename from bin/ch_core.h
rename to bin/core.h
index f65cfc083..8615629fc 100644
--- a/bin/ch_core.h
+++ b/bin/core.h
@@ -1,9 +1,12 @@
 /* Copyright © Triad National Security, LLC, and others.
 
-   This interface contains Charliecloud's core containerization features. */
+   This interface contains Charliecloud’s core containerization features. */
 
 #define _GNU_SOURCE
+#pragma once
+
 #include <stdbool.h>
+#include <sys/types.h>
 
 
 /** Types **/
@@ -20,6 +23,20 @@ struct bind {
    enum bind_dep dep;
 };
 
+struct container;  // forward declaration to avoid definition loop
+typedef void (hookf_t)(struct container *, void *);
+struct hook {
+   const char *name;
+   hookf_t *f;
+   void *data;
+};
+
+enum hook_dup {    // see hook_add()
+   HOOK_DUP_OK,
+   HOOK_DUP_SKIP,
+   HOOK_DUP_FAIL
+};
+
 enum img_type {
    IMG_DIRECTORY,  // normal directory, perhaps an external mount of some kind
    IMG_SQUASH,     // SquashFS archive file (not yet mounted)
@@ -32,16 +49,18 @@ struct container {
    gid_t container_gid;  // GID to use in container
    uid_t container_uid;  // UID to use in container
    bool env_expand;      // expand variables in --set-env
+   struct hook *hooks_prestart;  // prestart hook functions and their arguments
    char *host_home;      // if --home, host path to user homedir, else NULL
    char *img_ref;        // image description from command line
+   char **ldconfigs;     // directories to pass to image’s ldconfig(8)
    char *newroot;        // path to new root directory
    bool join;            // is this a synchronized join?
    int join_ct;          // number of peers in a synchronized join
    pid_t join_pid;       // process in existing namespace to join
    char *join_tag;       // identifier for synchronized join
    char *overlay_size;   // size of overlaid tmpfs (NULL for no overlay)
-   bool private_passwd;  // don't bind custom /etc/{passwd,group}
-   bool private_tmp;     // don't bind host's /tmp
+   bool private_passwd;  // don’t bind custom /etc/{passwd,group}
+   bool private_tmp;     // don’t bind host's /tmp
    enum img_type type;   // directory, SquashFS, etc.
    bool writable;        // re-mount image read-write
 };
@@ -50,9 +69,9 @@ struct container {
 /** Function prototypes **/
 
 void containerize(struct container *c);
+void hook_add(struct hook **hook_list, enum hook_dup dup,
+              const char *name, hookf_t *f, void *d);
+void hooks_run(struct container *c, struct hook **hook_list);
 enum img_type image_type(const char *ref, const char *images_dir);
 char *img_name2path(const char *name, const char *storage_dir);
 void run_user_command(char *argv[], const char *initial_dir);
-#ifdef HAVE_SECCOMP
-void seccomp_install(void);
-#endif
diff --git a/bin/ch_fuse.c b/bin/fuse.c
similarity index 97%
rename from bin/ch_fuse.c
rename to bin/fuse.c
index ce60bbcc7..a6b0bc1da 100644
--- a/bin/ch_fuse.c
+++ b/bin/fuse.c
@@ -35,10 +35,11 @@
 // Now we can include ll.h.
 #include <squashfuse/ll.h>
 
-#include "config.h"
-#include "ch_core.h"
-#include "ch_fuse.h"
-#include "ch_misc.h"
+#include "config.h"  // here to avoid potential clash with SquashFUSE config.h
+#include "core.h"
+#include "fuse.h"
+#include "mem.h"
+#include "misc.h"
 
 
 /** Types **/
@@ -121,8 +122,7 @@ void sq_fork(struct container *c)
 
    // Default mount point?
    if (c->newroot == NULL) {
-      char *subdir;
-      T_ (asprintf(&subdir, "/%s.ch/mnt", username) > 0);
+      char *subdir = ch_asprintf("/%s.ch/mnt", username);
       c->newroot = cat("/var/tmp", subdir);
       VERBOSE("using default mount point: %s", c->newroot);
       mkdirs("/var/tmp", subdir, NULL, NULL);
@@ -141,8 +141,7 @@ void sq_fork(struct container *c)
    // Now that the filesystem is mounted, we can fork without race condition.
    // The child returns to caller and runs the user command. When that exits,
    // the parent gets SIGCHLD.
-   pid_child = fork();
-   Tf (pid_child >= 0, "can't fork");
+   pid_child = ch_fork();
    if (pid_child > 0)  // parent (child does nothing here)
       exit(sq_loop());
 }
@@ -204,7 +203,7 @@ int sq_loop(void)
          // [1]: https://codereview.stackexchange.com/a/109349
          // [2]: https://man7.org/linux/man-pages/man2/wait.2.html
          exit_code = 1;
-         VERBOSE("child terminated by signal %d", WTERMSIG(child_status))
+         VERBOSE("child terminated by signal %d", WTERMSIG(child_status));
       }
    }
 
@@ -229,7 +228,7 @@ void sq_mount(const char *img_path, char *mountpt)
    struct fuse_args mount_args = FUSE_ARGS_INIT(mount_argc, mount_argv);
 
    sq.mountpt = mountpt;
-   T_ (sq.chan = malloc(sizeof(sqfs_ll_chan)));
+   sq.chan = ch_malloc(sizeof(sqfs_ll_chan), true);
 
    sq.ll = sqfs_ll_open(img_path, 0);
    Te (sq.ll != NULL, "can't open SquashFS: %s; try ch-run -vv?", img_path);
diff --git a/bin/ch_fuse.h b/bin/fuse.h
similarity index 91%
rename from bin/ch_fuse.h
rename to bin/fuse.h
index 5250ed85a..bc756c54d 100644
--- a/bin/ch_fuse.h
+++ b/bin/fuse.h
@@ -1,6 +1,7 @@
 /* Copyright © Triad National Security, LLC, and others. */
 
 #define _GNU_SOURCE
+#pragma once
 
 /** Function prototypes **/
 
diff --git a/bin/hook.c b/bin/hook.c
new file mode 100644
index 000000000..7a79bcab9
--- /dev/null
+++ b/bin/hook.c
@@ -0,0 +1,41 @@
+/* Copyright © Triad National Security, LLC, and others. */
+
+#define _GNU_SOURCE
+#include "config.h"
+
+#include <stdlib.h>
+
+#include "core.h"
+#include "hook.h"
+#include "misc.h"
+
+
+/** Function prototypes (private) **/
+
+
+/** Functions **/
+
+/* Set the environment variables listed in d. */
+void hook_envs_set(struct container *c, void *d)
+{
+   struct env_var *vars = d;
+   envs_set(vars, c->env_expand);
+}
+
+/* Set the environment variables specified in file d. */
+void hook_envs_set_file(struct container *c, void *d)
+{
+   struct env_file *ef = d;
+   envs_set(env_file_read(ef->path, ef->delim), c->env_expand);
+}
+
+/* Unset the environment variables matching glob d. */
+void hook_envs_unset(struct container *c, void *d)
+{
+   envs_unset((char *)d);
+}
+
+
+void hook_ldconfig(struct container *c, void *d)
+{
+}
diff --git a/bin/hook.h b/bin/hook.h
new file mode 100644
index 000000000..ce4426d9f
--- /dev/null
+++ b/bin/hook.h
@@ -0,0 +1,25 @@
+/* Copyright © Triad National Security, LLC, and others.
+
+   This interface contains hooks that don’t deserve their own file. */
+
+#define _GNU_SOURCE
+#pragma once
+
+#include "core.h"
+#include "misc.h"
+
+
+/** Types **/
+
+struct env_file {
+   char *path;
+   char delim;
+   bool expand;
+};
+
+
+/** Function prototypes **/
+
+void hook_envs_set_file(struct container *c, void *d);
+void hook_envs_set(struct container *c, void *d);
+void hook_envs_unset(struct container *c, void *d);
diff --git a/bin/json.c b/bin/json.c
new file mode 100644
index 000000000..adf6d5182
--- /dev/null
+++ b/bin/json.c
@@ -0,0 +1,493 @@
+/* Copyright © Triad National Security, LLC, and others. */
+
+#define _GNU_SOURCE
+#include "config.h"
+
+#include <fnmatch.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+
+#include CJSON_H
+
+#include "core.h"
+#include "json.h"
+#include "mem.h"
+#include "misc.h"
+
+
+/** Macros **/
+
+
+/** Types **/
+
+/* Dispatch table row for CDI hook emulation.
+
+   We could alternately put args last, making it a “flexible array member”.
+   That would make the field order slightly sub-optimal, but more importantly
+   it would make sizeof() return misleading results, which seems like a
+   nasty trap waiting for someone. */
+#define HOOK_ARG_MAX 3
+struct cdi_hook_dispatch {
+   size_t arg_ct;             // number of arguments to compare
+   char *args[HOOK_ARG_MAX];  // matching arguments
+   void (*f)(void *, char **args);    // NULL to ignore quietly
+};
+#define HDF void (*)(void *, char **args)  // to cast in dispatch tables
+
+struct cdi_spec {
+   char *kind;
+   char *src_path;         // source spec file path
+   dev_t src_dev;          // ... device ID
+   ino_t src_ino;          // ... inode number
+   struct env_var *envs;
+   struct bind *binds;
+   char **ldconfigs;       // directories to process with ldconfig(8)
+};
+
+struct json_dispatch {
+   char *name;
+   struct json_dispatch *children;
+   void (*f)(cJSON *tree, void *state);
+};
+#define JDF void (*)(cJSON *, void *)  // to cast callbacks in dispatch tables
+
+
+/** Constants **/
+
+// Block size in bytes for reading JSON files.
+const size_t READ_SZ = 16384;
+
+
+/** Function prototypes (private) **/
+
+char **array_strings_json_to_c(cJSON *jarry, size_t *ct);
+void cdi_append(struct cdi_spec **specs, struct cdi_spec *spec);
+void cdi_hook_nv_ldcache(struct cdi_spec *spec, char **args);
+char *cdi_hook_to_string(const char *hook_name, char **args);
+void cdi_log(struct cdi_spec *spec);
+struct cdi_spec *cdi_read(const char *path);
+struct cdi_spec *cdi_read_maybe(struct cdi_spec *specs, const char *path);
+bool cdi_requested(struct cdi_config *cf, struct cdi_spec *spec);
+void visit(struct json_dispatch actions[], cJSON *tree, void *state);
+void visit_dispatch(struct json_dispatch action, cJSON *tree, void *state);
+
+// parser callbacks
+void cdiPC_cdiVersion(cJSON *tree, struct cdi_spec *spec);
+void cdiPC_env(cJSON *tree, struct cdi_spec *spec);
+void cdiPC_hook(cJSON *tree, struct cdi_spec *spec);
+void cdiPC_kind(cJSON *tree, struct cdi_spec *spec);
+
+
+/** Globals **/
+
+/* List of CDI specs we’ve read. Yes it’s a global, but that lets us keep
+   struct cdi_spec private to this file, which seemed like the right
+   trade-off. It also seemed like “all the specs we know about” wasn’t
+   something we needed multiple of. */
+struct cdi_spec *cdi_specs = NULL;
+
+/* Callback tables. In the struct, the callback’s second argument is “void *”
+   so any state object can be provided. However, we’d prefer the actual
+   functions to take the correct pointer type; thus, they need to be cast.
+   Alternatives include:
+
+     1. Cast every use of the variable in the callbacks. This seemed verbose
+        and error-prone.
+
+     2. Add a local variable of the correct type to each callback. I thought
+        such distributed boilerplate seemed worse. */
+struct json_dispatch cdiPD_containerEdits[] = {
+   { "env",            NULL, (JDF)cdiPC_env },
+   { "hooks",          NULL, (JDF)cdiPC_hook },
+   { }
+};
+struct json_dispatch cdiPD_root[] = {
+   { "cdiVersion",     NULL, (JDF)cdiPC_cdiVersion },
+   { "kind",           NULL, (JDF)cdiPC_kind },
+   { "containerEdits", cdiPD_containerEdits, },
+   { }
+};
+
+/* CDI hook dispatch table. */
+struct cdi_hook_dispatch cdi_hooks[] = {
+   { 2, { "nvidia-ctk-hook",    "update-ldcache" },  (HDF)cdi_hook_nv_ldcache },
+   { 3, { "nvidia-ctk", "hook", "update-ldcache" },  (HDF)cdi_hook_nv_ldcache },
+   { 2, { "nvidia-ctk-hook",    "chmod" },           NULL },
+   { 3, { "nvidia-ctk", "hook", "chmod" },           NULL },
+   { 2, { "nvidia-ctk-hook",    "create-symlinks" }, NULL },
+   { 3, { "nvidia-ctk", "hook", "create-symlinks" }, NULL },
+   { }
+};
+
+
+/** Functions **/
+
+
+/* Given JSON array of strings jar, which may be of length zero, convert it to
+   a freshly allocated NULL-terminated array of C strings (pointers to
+   null-terminated chars buffers) and return that. ct is an out parameter
+
+   WARNING: This is a shallow copy, i.e., the actual strings are still shared
+   with the JSON array. */
+char **array_strings_json_to_c(cJSON *jarry, size_t *ct)
+{
+   size_t i;
+   char **carry;
+   cJSON *j;
+
+   Tf (cJSON_IsArray(jarry), "JSON: expected array");
+   *ct = cJSON_GetArraySize(jarry);
+   carry = ch_malloc((*ct + 1) * sizeof(char *), true);
+   carry[*ct] = NULL;
+
+   i = 0;
+   cJSON_ArrayForEach(j, jarry) {
+      Tf (cJSON_IsString(j), "JSON: expected string");
+      carry[i++] = j->valuestring;
+   }
+
+   return carry;
+}
+
+/* Return true if devid is a device kind (e.g. “nvidia.com/gpu”), false if
+   it’s a path. Exit with error if NULL pointer or empty string. */
+bool cdi_devid_kind_p(const char *devid)
+{
+   T_ (devid != NULL && devid[0] != '\0');
+   return (devid[0] != '.' && devid[0] != '/');
+}
+
+/* Return a list of environment variables to be set for device devid, which
+   can be either a device kind or a path, or if devid is NULL, all known
+   devices. */
+struct env_var *cdi_envs_get(const char *devid)
+{
+   struct env_var *vars = list_new(sizeof(struct env_var), 0);
+
+   for (int i = 0; cdi_specs[i].kind != NULL; i++) {
+      // Compare devid with both kind and path without checking what it is
+      // because it seemed the odds of false positive low enough.
+      if (   devid == NULL
+          || !strcmp(devid, cdi_specs[i].kind)
+          || !strcmp(devid, cdi_specs[i].src_path))
+         list_append((void **)&vars, cdi_specs[i].envs, sizeof(vars[0]));
+   }
+
+   return vars;
+}
+
+void cdi_hook_nv_ldcache(struct cdi_spec *spec, char **args)
+{
+   for (size_t i = 0; args[i] != NULL; i++)
+      if (!strcmp("--folder", args[i])) {
+         char *dir;
+         T_ (args[i+1] != NULL);
+         T_ (dir = strdup(args[i+1]));
+         // FIXME: YOU ARE HERE: APPEND ONLY IF WE DON'T ALREADY HAVE DIR
+         list_append((void **)&spec->ldconfigs, &dir, sizeof(dir));
+         i++;
+      }
+}
+
+/* Return a freshly allocated string describing the given hook, for logging. */
+char *cdi_hook_to_string(const char *hook_name, char **args)
+{
+   char *args_str;
+
+   args_str = "";
+   for (size_t i = 0; args[i] != NULL; i++)
+      args_str = cats(3, args_str, " ", args[i]);
+
+   return ch_asprintf("%s:%s", hook_name, args_str);
+}
+
+/* Read the CDI spec files we need.
+
+   Note: We only read spec files in the search path directories if either
+   (a) --devices is specified, requesting all known devices or (b) a device
+   kind (rather than a filename) is given to --device (e.g., “nvidia.com/gpu”.
+   This protects users from errors in the spec files if they have not
+   requested any CDI features. */
+void cdi_init(struct cdi_config *cf)
+{
+   bool req_by_kind = false;
+
+   // Initialize specs list.
+   T_ (cdi_specs == NULL);
+   cdi_specs = list_new(sizeof(struct cdi_spec), 0);
+
+   // Read CDI spec files specifically requested.
+   for (int i = 0; cf->devids[i] != NULL; i++)
+      if (cdi_devid_kind_p(cf->devids[i]))
+         req_by_kind = true;
+      else {
+         struct cdi_spec *spec = cdi_read_maybe(cdi_specs, cf->devids[i]);
+         if (spec != NULL)
+            list_append((void **)&cdi_specs, spec, sizeof(*spec));
+      }
+
+   // Read CDI spec files in configured directories if neccessary.
+   if (cf->devs_all_p || req_by_kind)
+      for (int i = 0; cf->spec_dirs[i] != NULL; i++) {
+         char **entries = dir_glob(cf->spec_dirs[i], "*.json");
+         for (int j = 0; entries[j] != NULL; j++) {
+            struct cdi_spec *spec;
+            spec = cdi_read_maybe(cdi_specs,
+                                  path_join(cf->spec_dirs[i], entries[j]));
+            if (spec != NULL && cdi_requested(cf, spec))
+               list_append((void **)&cdi_specs, spec, sizeof(*spec));
+         }
+      }
+
+   // debugging: print parsed CDI specs
+   DEBUG("CDI: read %d specs", list_count(cdi_specs, sizeof(cdi_specs[0])));
+   for (size_t i = 0; cdi_specs[i].kind != NULL; i++)
+      cdi_log(&cdi_specs[0]);
+
+/*
+   // update c
+   for (size_t i = 0; specs[i] != NULL; i++) {
+      // ldconfigs; copy rather than assigning because (1) easier to free
+      // and (2) still works if we later grow other sources of ldconfig.
+      list_cat((void **)&c->ldconfigs, (void *)specs[i]->ldconfigs,
+               sizeof(c->ldconfigs[0]));
+   }
+*/
+}
+
+/* Log contents of spec. */
+void cdi_log(struct cdi_spec *spec)
+{
+   size_t ct;
+
+   DEBUG("CDI: %s from %s (%u,%u %u):", spec->kind, spec->src_path,
+         major(spec->src_dev), minor(spec->src_dev), spec->src_ino);
+   ct = list_count((void *)(spec->envs), sizeof(struct env_var));
+   DEBUG("CDI:   environment: %d:", ct);
+   for (size_t i = 0; i < ct; i++)
+      DEBUG("CDI:     %s=%s", spec->envs[i].name, spec->envs[i].value);
+   ct = list_count((void *)(spec->binds), sizeof(struct bind));
+   DEBUG("CDI:   bind mounts: %d:", ct);
+   for (size_t i = 0; i < ct; i++)
+      DEBUG("CDI:     %s ->  %s", spec->binds[i].src, spec->binds[i].dst);
+   ct = list_count((void *)(spec->ldconfigs), sizeof(char *));
+   DEBUG("CDI:   ldconfig directories: %d:", ct);
+   for (size_t i = 0; i < ct; i++)
+      DEBUG("CDI:     %s", spec->ldconfigs[i]);
+}
+
+/* Read and parse the CDI spec file at path. Return a pointer to the parsed
+   struct. If something goes wrong, exit with error. */
+struct cdi_spec *cdi_read(const char *path)
+{
+   FILE *fp;
+   struct stat st;
+   char *text = NULL;
+   const char *parse_end;
+   cJSON *tree;
+   struct cdi_spec *spec = NULL;
+
+   // Read file into string. Allocate incrementally rather than seeking so
+   // non-seekable input works.
+   Tf (fp = fopen(path, "rb"), "CDI: can't open: %s", path);
+   Zf (fstat(fileno(fp), &st), "CDI: can't stat: %s", path);
+   for (size_t used = 0, avail = READ_SZ; true; avail += READ_SZ) {
+      size_t read_ct;
+      text = ch_realloc(text, avail, false);
+      read_ct = fread(text + used, 1, READ_SZ, fp);
+      used += read_ct;
+      if (read_ct < READ_SZ) {
+         if (feof(fp)) {        // EOF reached
+            T_ (used < avail);
+            text[used] = '\0';  // terminate string
+            break;
+         }
+         Tf(0, "CDI: can't read: %s", path);
+      }
+   }
+
+   // Parse JSON.
+   tree = cJSON_ParseWithOpts(text, &parse_end, false);
+   Tf(tree != NULL, "CDI: JSON failed at byte %d: %s", parse_end - text, path);
+
+   // Visit parse tree to build our struct.
+   spec = ch_malloc(sizeof(struct cdi_spec), true);
+   spec->src_path = (char *)path;  // shouldn’t ever be written
+   spec->src_dev = st.st_dev;
+   spec->src_ino = st.st_ino;
+   visit(cdiPD_root, tree, spec);
+
+   // Clean up.
+   VERBOSE("CDI: spec read OK: %s: %s", spec->kind, path);
+   return spec;
+}
+
+/* Read and parse the CDI spec file at path, returning a pointer to the
+   newly-allocated spec struct, unless (1) we already read the file, in which
+   case log that fact and return NULL, or (2) the device kind has already been
+   specified, in which case exit with error. If something else goes wrong,
+   also exit with error. */
+struct cdi_spec *cdi_read_maybe(struct cdi_spec *specs, const char *path)
+{
+   struct cdi_spec *spec;
+   struct stat st;
+
+   // Don’t read file if we already did. It’s relatively easy to give a spec
+   // file more than once, e.g. if it’s in the search path and also an
+   // argument to --device.
+   for (int i = 0; specs[i].kind != NULL; i++) {
+      Zf (stat(path, &st), "can’t stat CDI spec: %s", path);
+      if (st.st_dev == specs[i].src_dev && st.st_ino == specs[i].src_ino) {
+         VERBOSE("CDI: spec already read, skipping: %s", path);
+         return NULL;
+      }
+   }
+
+   spec = cdi_read(path);
+
+   // Error if this device already specified, which because we don’t re-read
+   // files means two files specified the same device kind.
+   for (int i = 0; specs[i].kind != NULL; i++)
+      Te (strcmp(spec->kind, specs[i].kind),
+          "CDI: device found in multiple spec files: %s: %s and %s",
+          spec->kind, specs[i].src_path, spec->src_path);
+
+   return spec;
+}
+
+/* Return true if the given spec was requested by configuration cf, false
+   otherwise. */
+bool cdi_requested(struct cdi_config *cf, struct cdi_spec *spec)
+{
+   if (cf->devs_all_p)
+      return true;
+
+   for (int i; cf->devids[i] != NULL; i++)
+      if (   cdi_devid_kind_p(cf->devids[i])
+          && !strcmp(cf->devids[i], spec->kind))
+         return true;
+
+   return false;
+}
+
+void cdiPC_cdiVersion(cJSON *tree, struct cdi_spec *spec)
+{
+   DEBUG("CDI: %s: version %s", spec->src_path, tree->valuestring);
+}
+
+void cdiPC_env(cJSON *tree, struct cdi_spec *spec)
+{
+   struct env_var ev;
+   size_t name_len, value_len;  // not including null terminator
+   char *delim, *arnold;
+
+   T_ (cJSON_IsString(tree));
+   T_ (delim = strchr(tree->valuestring, '='));
+   T_ (arnold = strchr(tree->valuestring, 0));
+
+   name_len = delim - tree->valuestring;
+   value_len = arnold - delim - 1;
+   T_ (ev.name = malloc(name_len + 1));
+   memcpy(ev.name, tree->valuestring, name_len);
+   ev.name[name_len] = 0;
+   T_ (ev.value = malloc(value_len + 1));
+   memcpy(ev.value, delim + 1, value_len);
+   ev.value[value_len] = 0;
+
+   list_append((void **)&spec->envs, &ev, sizeof(ev));
+}
+
+void cdiPC_hook(cJSON *tree, struct cdi_spec *spec)
+{
+   char **args;
+   size_t arg_ct;
+   char *hook_name;
+   char *hook_str;
+   bool hook_known;
+   //struct cdi_hook_dispatch hook;
+
+   T_ (hook_name = cJSON_GetStringValue(cJSON_GetObjectItem(tree, "hookName")));
+
+   T_ (cJSON_IsArray(cJSON_GetObjectItem(tree, "args")));
+   args = array_strings_json_to_c(cJSON_GetObjectItem(tree, "args"), &arg_ct);
+   hook_str = cdi_hook_to_string(hook_name, args);
+
+   hook_known = false;
+   for (size_t i = 0; cdi_hooks[i].arg_ct != 0; i++) {  // for each table row
+      if (arg_ct >= cdi_hooks[i].arg_ct) {   // enough hook args to compare
+         for (size_t j = 0; j < cdi_hooks[i].arg_ct; j++)
+            if (strcmp(args[j], cdi_hooks[i].args[j]))
+                goto continue_outer;
+         hook_known = true;  // all words matched
+         if (cdi_hooks[i].f == NULL) {
+            DEBUG("CDI: ignoring known hook: %s", hook_str);
+         } else {
+            DEBUG("CDI: emulating known hook: %s", hook_str);
+            cdi_hooks[i].f(spec, &args[cdi_hooks[i].arg_ct]);
+         }
+         break;  // only call one hook function
+      }
+   continue_outer:
+   }
+
+   if (!hook_known)
+      WARNING("CDI: ignoring unknown hook: %s", hook_str);
+}
+
+void cdiPC_kind(cJSON *tree, struct cdi_spec *spec)
+{
+   T_ (spec->kind = strdup(tree->valuestring));
+}
+
+/* Initialize the cJSON stuff. Quirks:
+
+   1. Despite using reallocation internally, cJSON indeed does not accept a
+      realloc(3) replacement, though it possibly used to. If malloc(3) and
+      free(3) are provided, then it just doesn’t call any realloc().
+
+      Weirdly, cJSON appears to have a notion of “internal” memory management
+      that uses malloc(3), realloc(3), and free(3) regardless of these hooks.
+
+   2. cJSON prefixes everything with CJSON_CDECL, which is juts __cdecl, which
+      is unnecessary for C code. Maybe this is for using cJSON in C++? */
+void json_init(void)
+{
+   cJSON_Hooks hooks = (cJSON_Hooks) {
+      .malloc_fn = ch_malloc_pointerful,
+      .free_fn = ch_free_noop,
+   };
+
+   cJSON_InitHooks(&hooks);
+}
+
+/* Visit each node in the parse tree in depth-first order. At each node, if
+   there is a matching callback in actions, call it. For arrays, call the
+   callback once per array element. */
+void visit(struct json_dispatch actions[], cJSON *tree, void *state)
+{
+   for (int i = 0; actions[i].name != NULL; i++) {
+      cJSON *subtree = cJSON_GetObjectItem(tree, actions[i].name);
+      if (subtree != NULL) {  // child matching action name exists
+         if (!cJSON_IsArray(subtree))
+            visit_dispatch(actions[i], subtree, state);
+         else {
+            cJSON *elem;
+            cJSON_ArrayForEach(elem, subtree)
+               visit_dispatch(actions[i], elem, state);
+         }
+      }
+   }
+}
+
+/* Call the appropriate callback for the the root node of tree, if any. Then
+   visit its children, if any. */
+void visit_dispatch(struct json_dispatch action, cJSON *tree, void *state)
+{
+   if (action.f != NULL)
+      action.f(tree, state);
+   if (action.children != NULL)
+      visit(action.children, tree, state);
+}
diff --git a/bin/json.h b/bin/json.h
new file mode 100644
index 000000000..9675a7a3e
--- /dev/null
+++ b/bin/json.h
@@ -0,0 +1,32 @@
+/* Copyright © Triad National Security, LLC, and others.
+
+   This interface contains all functions that deal with JSON: OCI, CDI, and
+   friends. */
+
+#define _GNU_SOURCE
+#pragma once
+#include "config.h"
+
+#include <stdbool.h>
+
+#include "core.h"
+#include "misc.h"
+
+#include CJSON_H
+
+
+/** Types **/
+
+/* General CDI configuration. */
+struct cdi_config {
+   char **spec_dirs;      // directories to search for CDI spec files
+   bool devs_all_p;        // inject all devices found
+   char **devids;          // user-requested devices
+};
+
+
+/** Function prototypes **/
+
+struct env_var *cdi_envs_get(const char *devid);
+void cdi_init(struct cdi_config *cf);
+void json_init(void);
diff --git a/bin/mem.c b/bin/mem.c
new file mode 100644
index 000000000..308358f2d
--- /dev/null
+++ b/bin/mem.c
@@ -0,0 +1,460 @@
+/* libgc API
+   ---------
+
+   See:
+
+     https://hboehm.info/gc/gcinterface.html
+     https://github.com/ivmai/bdwgc/blob/57ccbcc/include/gc/gc.h#L459
+
+   The latter is more complete.
+
+   libgc provides both upper-case, e.g. GC_MALLOC(), and lower-case, e.g.
+   GC_malloc(), versions of many functions. It’s not totally clear to me what
+   the separation principles are, though the vibe does seem to prefer the
+   upper-case versions. We use the upper-case when available.
+
+   Zeroing newly-allocated memory
+   ------------------------------
+
+   Because we use a lot of zero-terminated data structures, it would be nice
+   for the allocation functions to just always return zeroed buffers. We also
+   want to not require libgc, i.e., we want to still be able to use malloc(3)
+   and realloc(3) under the hood. It’s easy to provide a zeroing
+   malloc(3)-workalike, and we do, but as far as I can tell, it’s impossible
+   to do so for realloc(3)-alike unless we either (1) maintain our own
+   allocation size tracking or (2) use highly non-portable code. Neither of
+   these seemed worth the effort and complexity.
+
+   This is because, as it turns out, the length of an allocated buffer is a
+   more complicated notion than it seems. A buffer has *two* different
+   lengths: L1 is the size requested by the original caller, and L2 is the
+   size actually allocated; L2 ≥ L1. Neither are reliably available:
+
+     * L1: The allocator can’t provide it, and while the caller had it at the
+       time of previous allocation, it might not have kept it.
+
+     * L2: Not available from the libc allocator without fairly extreme
+       non-portability and/or difficult constraints [1], though libgc does
+       provide it with GC_size(). The caller never knew it.
+
+   Suppose we call realloc() with a new length Lν, where Lν > L2 ≥ L1. To zero
+   the new part of the buffer, we must zero (L1,Lν], or (L2,Lν] if we assume
+   (L1,L2] are still zero from the initial malloc(), and leave prior bytes
+   untouched. But we don’t know either L1 or L2 reliably, so we’re hosed,
+   whether we call an upstream realloc() or malloc() an entirely new buffer,
+   then memcpy(3).
+
+   I suspect this is why libc provides calloc(3) but not an equivalent for
+   realloc(3).
+
+   [1]: https://stackoverflow.com/questions/1281686 */
+
+#define _GNU_SOURCE
+#include "config.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#ifdef HAVE_GC
+#include <gc.h>
+#endif
+
+#include "mem.h"
+#include "misc.h"
+
+
+/** Macros **/
+
+/** Types **/
+
+/** Constants **/
+
+/** Function prototytpes (private) **/
+
+ssize_t kB(ssize_t byte_ct);
+
+
+/** Globals **/
+
+/* Note: All the memory statistics are signed “ssize_t” rather than the more
+   correct unsigned “size_t” so that subtractions are less error-prone (we
+   report lots of differences). We assume that memory usage is small enough
+   for this to not matter. */
+
+/* Size of the stack, heap, and anonymous mmap(2) mappings at previous
+   ch_memory_log() call. */
+ssize_t stack_prev = 0;
+ssize_t heap_prev = 0;
+ssize_t anon_prev = 0;
+
+#ifdef HAVE_GC
+
+/* Note: The first four counters are from GC_prof_stats_s fields and have the
+   corresponding names. Total size of allocated blocks is derived. See gc.h. */
+
+/* Total size of the heap. This includes “unmapped” bytes that libgc is
+   tracking but has given back to the OS, I assume to be re-requested from the
+   OS if needed. */
+ssize_t heapsize_prev = 0;
+
+/* Free bytes in the heap, both mapped and unmapped. */
+ssize_t free_prev = 0;
+
+/* Unmapped bytes (i.e., returned to the OS but still tracked by libgc) in the
+   heap. */
+ssize_t unmapped_prev = 0;
+
+/* Number of garbage collections done so far. */
+ssize_t gc_no_prev = 0;
+
+/* Total time spent doing garbage collection, in milliseconds. Corresponds to
+   GC_get_full_gc_total_time(). Note that because ch-run is single-threaded,
+   we do not report time spent collecting with the world stopped. */
+long time_collecting_prev = 0;
+
+#endif
+
+
+/** Functions **/
+
+/* Return a snprintf(3)-formatted string in a newly allocated buffer of
+   appropriate length. Exit on error.
+
+   This function formats the string twice: Once to figure out how long the
+   formatted string is, and again to actually format the string. I’m not aware
+   of a better way to compute string length. (musl does it the same way; glibc
+   was too complicated for my patience in figuring it out.)
+
+   An alternative would be to allocate a small buffer, try that, and if it’s
+   too small re-allocate and format again. For strings that fit, this would
+   save a formatting cycle at the cost of wasted memory and more code paths.
+   That didn’t seem like the right trade-off, esp. since short strings should
+   be the fastest to format. */
+char *ch_asprintf(const char *fmt, ...)
+{
+   va_list ap;
+   char *str;
+
+   va_start(ap, fmt);
+   str = ch_vasprintf(fmt, ap);
+   va_end(ap);
+
+   return str;
+}
+
+/* Fork the process. In parent, return the PID of the child; in the child,
+   return 0. Cannot fail.
+
+   The main purpose of this wrapper is to do an aggressive garbage collection
+   prior to fork(2) so the child is a small as possible. */
+pid_t ch_fork(void)
+{
+   pid_t child;
+
+   ch_memory_log("fork");
+   garbageinate("fkgc");
+
+   child = fork();
+   Tf (child >= 0, "can't fork");
+
+   return child;
+}
+
+/* free(3)-alike that does nothing. Don’t call it. Provided for libraries that
+   let us hook memory allocation and de-allocation, e.g. cJSON. */
+void ch_free_noop(void *p)
+{
+}
+
+/* Return a new null-terminated string containing the next record from fp,
+   where records are delimited by delim (e.g., pass '\n' to get the next
+   line). If no more records available, return NULL. Exit on error.
+
+   Unlike getdelim(3), the delimiter is *not* part of the returned string.
+
+   Warnings:
+
+     1. Records cannot contain the zero byte, and behavior is undefined if fp
+        containes any zeros and delimiter is not '\0'.
+
+     2. The returned buffer is likely larger than needed. We assume wasting
+        this space is better than the overhead of realloc’ing down to a
+        precise size. */
+char *ch_getdelim(FILE *fp, char delim)
+{
+   size_t bytes_read = 0;
+   size_t buf_len = 8;  // non-zero start avoids early frequent realloc
+   char *buf = ch_malloc(buf_len, false);
+
+   while (true) {
+      int c = fgetc(fp);
+      if (c == EOF)
+         break;
+      bytes_read++;
+      if (bytes_read > buf_len) {      // room for terminator ensured later
+         buf_len *= 2;
+         buf = ch_realloc(buf, buf_len, false);
+      }
+      buf[bytes_read-1] = c;
+      if (c == delim)
+         break;
+   }
+
+   if (buf[bytes_read-1] == delim) {   // found delimiter
+      buf[bytes_read-1] = '\0';
+   } else if (feof(fp)) {              // end-of-file
+      if (bytes_read == 0)             // no record left
+         return NULL;
+      else {                           // record ends at EOF (no delimiter)
+         if (bytes_read >= buf_len) {
+            T_ (bytes_read == buf_len);
+            buf = ch_realloc(buf, buf_len + 1, false);
+         }
+         buf[bytes_read] = '\n';
+      }
+   } else {                            // error
+      Te (0, "error reading file");    // don’t know filename here
+   }
+
+   return buf;
+}
+
+/* Allocate and return a new buffer of length size bytes. The initial contents
+   of the buffer are undefined.
+
+   If pointerful, then the buffer may contain pointers. Otherwise, the caller
+   guarantees no pointers will ever be stored in the buffer. This allows
+   garbage collection optimizations. If unsure, say true. */
+void *ch_malloc(size_t size, bool pointerful)
+{
+   void *buf;
+
+#ifdef HAVE_GC
+   buf = pointerful ? GC_MALLOC(size) : GC_MALLOC_ATOMIC(size);
+#else
+   (void)pointerful;  // suppress warning
+   buf = malloc(size);
+#endif
+
+   T_ (buf);
+   return buf;
+}
+
+/* Like ch_malloc(), but same API as malloc(3). Prefer ch_malloc(). This is
+   provided for libraries that let us hook memory allocation and
+   de-allocation, e.g. cJSON. */
+void *ch_malloc_pointerful(size_t size)
+{
+   return ch_malloc(size, true);
+}
+
+/* Like ch_malloc(), but buffer contents are zeroed. */
+void *ch_malloc_zeroed(size_t size, bool pointerful)
+{
+   void *buf = ch_malloc(size, pointerful);
+   memset(buf, 0, size);
+   return buf;
+}
+
+/* Shut down memory management. */
+void ch_memory_exit(void)
+{
+   ch_memory_log("exit");
+}
+
+/* Initialize memory management. We don’t log usage here because it’s called
+   before logging is up. */
+void ch_memory_init(void)
+{
+#ifdef HAVE_GC
+   //GC_set_handle_fork(1); // I think the default mode is fine???
+   GC_INIT();
+   GC_start_performance_measurement();
+#endif
+}
+
+/* Log stack and heap memory usage, and GC statistics if enabled, to stderr
+   and syslog if enabled. */
+void ch_memory_log(const char *when)
+{
+   FILE *fp;
+   char *line = NULL;
+   char *s;
+   ssize_t stack_len = 0, heap_len = 0, anon_len = 0;
+   ssize_t total_len, total_prev;
+#ifdef HAVE_GC
+   struct GC_prof_stats_s ps;
+   ssize_t used, used_prev;
+   long time_collecting;
+#endif
+
+   /* Compute stack, heap, and anonymous mapping sizes. While awkward, AFAICT
+      this is the best available way to get these sizes. See proc_pid_maps(5).
+      Whitespace-separated (?) fields:
+
+        1. start (inclusive) and end (exclusive) addresses, in hex
+        2. permissions, e.g. “r-xp”
+        3. offset, in hex
+        4. device major:minor, in hex?
+        5. inode number, in decimal
+        6. pathname */
+   T_ (fp = fopen("/proc/self/maps", "r"));
+   while ((line = ch_getdelim(fp, '\n'))) {
+      int conv_ct;
+      void *start, *end;
+      char path[8] = { 0 };  // length must match format string!
+      conv_ct = sscanf(line, "%p-%p %*[rwxp-] %*x %*x:%*x %*u %7s",
+                       &start, &end, path);
+      if (conv_ct < 2) {     // will be 2 if path empty
+         WARNING("please report this bug: can't parse map: %d: \"%s\"",
+                 conv_ct, line);
+         break;
+      }
+      if (strlen(path) == 0)
+         anon_len += end - start;
+      else if (!strcmp(path, "[stack]"))
+         stack_len += end - start;
+      else if (!strcmp(path, "[heap]"))
+         heap_len += end - start;
+   }
+   Z_ (fclose(fp));
+
+   // log the basics
+   total_len = stack_len + heap_len + anon_len;
+   total_prev = stack_prev + heap_prev + anon_prev;
+   s = ch_asprintf("mem: %s: "
+         "%zdkB %+zd (stac %zdkB %+zd, heap %zdkB %+zd, anon %zdkB %+zd)",
+         when,
+         kB(total_len), kB(total_len - total_prev),
+         kB(stack_len), kB(stack_len - stack_prev),
+         kB(heap_len),  kB(heap_len - heap_prev),
+         kB(anon_len),  kB(anon_len - anon_prev));
+   stack_prev = stack_len;
+   heap_prev = heap_len;
+   anon_prev = anon_len;
+   DEBUG(s);
+#ifdef ENABLE_SYSLOG
+   syslog(SYSLOG_PRI, "%s", s);
+#endif
+
+   // log GC stuff
+#ifdef HAVE_GC
+   GC_get_prof_stats(&ps, sizeof(ps));
+   time_collecting = GC_get_full_gc_total_time();
+   // space
+   used = ps.heapsize_full - ps.free_bytes_full;
+   used_prev = heapsize_prev - free_prev;
+   s = ch_asprintf("gc:  %s: "
+         "%zdkB %+zd (used %zdkB %+zd, free %zdkB %+zd, unmp %zdkB %+zd)",
+         when,
+         kB(ps.heapsize_full), kB(ps.heapsize_full - heapsize_prev),
+         kB(used), kB(used - used_prev),
+         kB(ps.free_bytes_full), kB(ps.free_bytes_full - free_prev),
+         kB(ps.unmapped_bytes), kB(ps.unmapped_bytes - unmapped_prev));
+   heapsize_prev = ps.heapsize_full;
+   free_prev = ps.free_bytes_full;
+   unmapped_prev = ps.unmapped_bytes;
+   DEBUG(s);
+#ifdef ENABLE_SYSLOG
+   syslog(SYSLOG_PRI, "%s", s);
+#endif
+   // time
+   s = ch_asprintf("gc:  "
+         "%s: %ld collections (%+ld) in %zdms (%+zd)",
+         when,
+         ps.gc_no, ps.gc_no - gc_no_prev,
+         time_collecting, time_collecting - time_collecting_prev);
+   gc_no_prev = ps.gc_no;
+   time_collecting_prev = time_collecting;
+   DEBUG(s);
+#ifdef ENABLE_SYSLOG
+   syslog(SYSLOG_PRI, "%s", s);
+#endif
+#endif
+}
+
+/* Change the size of allocated buffer p to size bytes. Like realloc(3), if p
+   is NULL, then this function is equivalent to ch_malloc(). Unlike free(3),
+   size may not be zero.
+
+   If size is greater than the existing buffer length, the initial content of
+   new bytes is undefined. If size is less than the existing buffer length,
+   this function may be a no-op; i.e., it may be impossible to shrink a
+   buffer’s actual allocation.
+
+   pointerful is as in ch_malloc(). If p is non-NULL, it must match the the
+   original allocation, though this is not validated. */
+void *ch_realloc(void *p, size_t size, bool pointerful)
+{
+   void *p_new;
+
+   T_ (size > 0);
+
+   if (p == NULL)
+      p_new = ch_malloc(size, pointerful);  // no GC_REALLOC_ATOMIC()
+   else {
+#ifdef HAVE_GC
+      p_new = GC_REALLOC(p, size);
+#else
+      p_new = realloc(p, size);
+#endif
+   }
+
+   T_ (p_new);
+   return p_new;
+}
+
+/* Return a copy of s in a newly allocated, pointerless buffer. Cannot fail.
+
+   Note: Unlike strdup(3), ch_strdup() is only needed if you need to actually
+   modify the copy. It should not be used to simplify memory management. */
+char *ch_strdup(const char *s)
+{
+   char *dst;
+
+#ifdef HAVE_GC
+   dst = GC_STRDUP(s);
+#else
+   dst = strdup(s);
+#endif
+
+   T_ (dst);
+   return dst;
+}
+
+/* Like ch_asprintf(), but takes and consumes a va_list pointer. */
+char *ch_vasprintf(const char *fmt, va_list ap)
+{
+   va_list ap2;
+   int str_len;
+   char *str; // = ch_malloc(1024, false);
+
+   va_copy(ap2, ap);
+
+   T_ (0 <= (str_len = vsnprintf(NULL, 0, fmt, ap)));
+   str = ch_malloc(str_len + 1, false);
+   T_ (str_len == vsnprintf(str, str_len + 1, fmt, ap2));
+
+   va_end(ap2);
+
+   return str;
+}
+
+/* If linked with libgc, do a maximum-effort garbage collection; otherwise, do
+   nothing. Use when to tag memory logging. */
+void garbageinate(const char *when)
+{
+#ifdef HAVE_GC
+   GC_gcollect_and_unmap();
+   ch_memory_log(when);
+#endif
+}
+
+/* Convert a signed number of bytes to kilobytes (truncated) and return it. */
+ssize_t kB(ssize_t byte_ct)
+{
+   return byte_ct / 1024;
+}
diff --git a/bin/mem.h b/bin/mem.h
new file mode 100644
index 000000000..63279691d
--- /dev/null
+++ b/bin/mem.h
@@ -0,0 +1,25 @@
+/* Memory management routines. */
+
+#define _GNU_SOURCE
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+
+/** Function prototypes **/
+
+char *ch_asprintf(const char *fmt, ...);
+pid_t ch_fork(void);
+void ch_free_noop(void *p);
+char *ch_getdelim(FILE *fp, char delim);
+void ch_memory_exit(void);
+void ch_memory_init(void);
+void ch_memory_log(const char *when);
+void *ch_malloc(size_t size, bool pointerful);
+void *ch_malloc_pointerful(size_t size);
+void *ch_malloc_zeroed(size_t size, bool pointerful);
+void *ch_realloc(void *p, size_t size, bool pointerful);
+char *ch_strdup(const char *src);
+char *ch_vasprintf(const char *fmt, va_list ap);
+void garbageinate(const char *when);
diff --git a/bin/ch_misc.c b/bin/misc.c
similarity index 57%
rename from bin/ch_misc.c
rename to bin/misc.c
index bdee7fa20..ee9d660c5 100644
--- a/bin/ch_misc.c
+++ b/bin/misc.c
@@ -1,6 +1,8 @@
 /* Copyright © Triad National Security, LLC, and others. */
 
 #define _GNU_SOURCE
+#include "config.h"
+
 #include <ctype.h>
 #include <dirent.h>
 #include <fcntl.h>
@@ -16,8 +18,8 @@
 #include <sys/statvfs.h>
 #include <unistd.h>
 
-#include "config.h"
-#include "ch_misc.h"
+#include "mem.h"
+#include "misc.h"
 
 
 /** Macros **/
@@ -32,10 +34,44 @@
 #define SUPP_GIDS_MAX 128
 
 
+/** Constants **/
+
+/* Text colors. Note leading escape characters (U+001B), which don’t always
+   show up depending on your viewer.
+
+   In principle, we should be using a library for this, e.g.
+   terminfo(5). However, moderately thorough web searching suggests that
+   pretty much any modern terminal will support 256-color ANSI codes, and this
+   is way simpler [1]. Probably should coordinate these colors with the Python
+   code somehow.
+
+   [1]: https://stackoverflow.com/a/3219471 */
+static const char COLOUR_CYAN_DARK[] =  "[0;38;5;6m";
+static const char COLOUR_CYAN_LIGHT[] = "[0;38;5;14m";
+//static const char COLOUR_GRAY[] =       "[0;90m";
+static const char COLOUR_RED[] =        "[0;31m";
+static const char COLOUR_RED_BOLD[] =   "[1;31m";
+static const char COLOUR_RESET[] =      "[0m";
+static const char COLOUR_YELLOW[] =     "[0;33m";
+static const char *_LL_COLOURS[] = { COLOUR_RED_BOLD,     // fatal
+                                     COLOUR_RED_BOLD,     // stderr
+                                     COLOUR_RED,          // warning
+                                     COLOUR_YELLOW,       // info
+                                     COLOUR_CYAN_LIGHT,   // verbose
+                                     COLOUR_CYAN_DARK,    // debug
+                                     COLOUR_CYAN_DARK };  // trace
+/* This lets us index by verbosity, which can be negative. */
+static const char **LL_COLOURS = _LL_COLOURS + 3;
+
+
 /** External variables **/
 
-/* Level of chatter on stderr. */
-enum log_level verbose;
+/* If true, exit abnormally on fatal error. Set in ch-run.c during argument
+   parsing, so will always be default value before that. */
+bool abort_fatal = false;
+
+/* If true, use colored logging. Set in ch-run.c. */
+bool log_color_p = false;
 
 /* Path to host temporary directory. Set during command line processing. */
 char *host_tmp = NULL;
@@ -43,6 +79,9 @@ char *host_tmp = NULL;
 /* Username of invoking users. Set during command line processing. */
 char *username = NULL;
 
+/* Level of chatter on stderr. */
+enum log_level verbose;
+
 /* List of warnings to be re-printed on exit. This is a buffer of shared memory
    allocated by mmap(2), structured as a sequence of null-terminated character
    strings. Warnings that do not fit in this buffer will be lost, though we
@@ -56,6 +95,7 @@ char *warnings;
 size_t warnings_offset = 0;
 
 
+
 /** Function prototypes (private) **/
 
 void mkdir_overmount(const char *path, const char *scratch);
@@ -74,11 +114,12 @@ char *argv_to_string(char **argv)
    char *s = NULL;
 
    for (size_t i = 0; argv[i] != NULL; i++) {
-      char *argv_, *x;
+      char *argv_;
       bool quote_p = false;
 
       // Max length is escape every char plus two quotes and terminating zero.
-      T_ (argv_ = calloc(2 * strlen(argv[i]) + 3, 1));
+      // Initialize to zeroes so we don’t have to terminate string later.
+      argv_ = ch_malloc_zeroed(2 * strlen(argv[i]) + 3, false);
 
       // Copy to new string, escaping as we go. Note lots of fall-through. I'm
       // not sure where this list of shell meta-characters came from; I just
@@ -121,27 +162,19 @@ char *argv_to_string(char **argv)
          }
       }
 
-      if (quote_p) {
-         x = argv_;
-         T_ (1 <= asprintf(&argv_, "\"%s\"", argv_));
-         free(x);
-      }
-
-      if (i != 0) {
-         x = s;
-         s = cat(s, " ");
-         free(x);
-      }
-
-      x = s;
-      s = cat(s, argv_);
-      free(x);
-      free(argv_);
+      s = cats(5, s, i == 0 ? "" : " ",
+               quote_p ? "\"" : "", argv_, quote_p ? "\"" : "");
    }
 
    return s;
 }
 
+/* Return bool b as a string. */
+const char *bool_to_string(bool b)
+{
+   return (b ? "yes" : "no");
+}
+
 /* Iterate through buffer “buf” of size “s” consisting of null-terminated
    strings and return the number of strings in it. Key assumptions:
 
@@ -154,9 +187,9 @@ char *argv_to_string(char **argv)
       3. The buffer contains no empty strings.
 
    These assumptions are consistent with the construction of the “warnings”
-   shared memory buffer, which is the main justification for this function. Note
-   that under these assumptions, the final byte in the buffer is guaranteed to
-   be null. */
+   shared memory buffer, which is the main justification for this function.
+   Note that under these assumptions, the final byte in the buffer is
+   guaranteed to be null. */
 int buf_strings_count(char *buf, size_t size)
 {
    int count = 0;
@@ -182,49 +215,102 @@ bool buf_zero_p(void *buf, size_t size)
    return true;
 }
 
-/* Concatenate strings a and b, then return the result. */
+/* Concatenate strings a and b into a newly-allocated buffer and return a
+   pointer to this buffer. */
 char *cat(const char *a, const char *b)
 {
-   char *ret;
-   if (a == NULL)
-      a = "";
-   if (b == NULL)
-       b = "";
-   T_ (asprintf(&ret, "%s%s", a, b) == strlen(a) + strlen(b));
-   return ret;
+   return cats(2, a, b);
 }
 
-/* Like scandir(3), but (1) filter excludes “.” and “..”, (2) results are not
-   sorted, and (3) cannot fail (exits with an error instead). */
-int dir_ls(const char *path, struct dirent ***namelist)
+/* Concatenate argc strings into a newly allocated buffer and return a pointer
+   to this buffer. If argc is zero, return the empty string. NULL pointers are
+   treated as empty strings. */
+char *cats(size_t argc, ...)
 {
-   int entry_ct;
+   char *ret, *next;
+   size_t ret_len;
+   char **argv;
+   size_t *argv_lens;
+   va_list ap;
 
-   entry_ct = scandir(path, namelist, dir_ls_filter, NULL);
-   Tf (entry_ct >= 0, "can't scan dir", path);
-   return entry_ct;
+   argv = ch_malloc(argc * sizeof(char *), true);
+   argv_lens = ch_malloc(argc * sizeof(size_t), false);
+
+   // compute buffer size and convert NULLs to empty string
+   va_start(ap, argc);
+   ret_len = 1;  // for terminator
+   for (int i = 0; i < argc; i++)
+   {
+      char *arg = va_arg(ap, char *);
+      if (arg == NULL) {
+         argv[i] = "";
+         argv_lens[i] = 0;
+      } else {
+         argv[i] = arg;
+         argv_lens[i] = strlen(arg);
+      }
+      ret_len += argv_lens[i];
+   }
+   va_end(ap);
+
+   // copy strings
+   ret = ch_malloc(ret_len, false);
+   next = ret;
+   for (int i = 0; i < argc; i++) {
+      memcpy(next, argv[i], argv_lens[i]);
+      next += argv_lens[i];
+   }
+   ret[ret_len-1] = '\0';
+
+   return ret;
 }
 
-/* Return the number of entries in directory path, not including “.” and “..”;
-   i.e., the empty directory returns 0 despite them. */
-int dir_ls_count(const char *path)
-{
-   int ct;
-   struct dirent **namelist;
+/* Return a newly-allocated, null-terminated list of filenames in directory
+   path that match fnmatch(3)-pattern glob, excluding “.” and “..”. For a list
+   of everything, pass "*" for glob. Leading dots *do* match “*”.
 
-   ct = dir_ls(path, &namelist);
-   for (size_t i = 0; i < ct; i++)
-      free(namelist[i]);
-   free(namelist);
+   We use readdir(3) rather than scandir(3) because the latter allocates
+   memory with malloc(3). */
+char **dir_glob(const char *path, const char *glob)
+{
+   DIR *dp;
+   int i;  // index of next free array element
+   size_t alloc_ct = 16;
+   char **entries = ch_malloc(alloc_ct * sizeof(char *), true);
+
+   Tf (dp = opendir(path), "can't open directory: %s", path);
+   i = 0;
+   while (true) {
+      struct dirent *entry;
+      int matchp;
+      errno = 0;
+      entry = readdir(dp);
+      if (entry == NULL) {
+         Zf (errno, "can’t read directory: %s", path);
+         break;  // EOF
+      }
+      matchp = fnmatch(glob, entry->d_name, FNM_EXTMATCH);
+      if (matchp != 0) {
+         T_ (matchp == FNM_NOMATCH);  // error?
+         continue;                    // no match, skip
+      }
+      if (i >= alloc_ct - 1) {
+         alloc_ct *= 2;
+         entries = ch_realloc(entries, alloc_ct * sizeof(char *), true);
+      }
+      entries[i] = entry->d_name;
+      i++;
+   }
+   entries[i] = NULL;
+   Zf (closedir(dp), "can't close directory: %s", path);
 
-   return ct;
+   return entries;
 }
 
-/* scandir(3) filter that excludes “.” and “..”: Return 0 if e->d_name is one
-   of those strings, else 1. */
-int dir_ls_filter(const struct dirent *e)
+/* Return the number of matches for glob in path. */
+int dir_glob_count(const char *path, const char *glob)
 {
-   return !(!strcmp(e->d_name, ".") || !strcmp(e->d_name, ".."));
+   return list_count(dir_glob(path, glob), sizeof(char *));
 }
 
 /* Read the file listing environment variables at path, with records separated
@@ -249,18 +335,14 @@ struct env_var *env_file_read(const char *path, int delim)
    vars = list_new(sizeof(struct env_var), 0);
    for (size_t line_no = 1; true; line_no++) {
       struct env_var var;
-      char *line = NULL;
-      size_t line_len = 0;  // don't care but required by getline(3)
+      char *line;
       errno = 0;
-      if (-1 == getdelim(&line, &line_len, delim, fp)) {
-         if (errno == 0)    // EOF
-            break;
-         else
-            Tf (0, "can't read: %s", path);
-      }
-      if (line[strlen(line) - 1] == '\n')  // rm newline if present
+      line = ch_getdelim(fp, delim);
+      if (line == NULL)  // EOF
+         break;
+      if (line[strlen(line) - 1] == (char)delim)  // rm delimiter if present
          line[strlen(line) - 1] = 0;
-      if (line[0] == 0)                    // skip blank lines
+      if (line[0] == '\0')                        // skip blank lines
          continue;
       var = env_var_parse(line, path, line_no);
       list_append((void **)&vars, &var, sizeof(var));
@@ -270,37 +352,54 @@ struct env_var *env_file_read(const char *path, int delim)
    return vars;
 }
 
+/* Return the value of environment variable name if set; otherwise, return
+   value_default instead. */
+char *env_get(const char *name, char *value_default)
+{
+   char *ret = getenv(name);
+   return ret ? ret : value_default;
+}
+
+
 /* Set environment variable name to value. If expand, then further expand
    variables in value marked with "$" as described in the man page. */
 void env_set(const char *name, const char *value, const bool expand)
 {
-   char *value_, *value_expanded;
-   bool first_written;
+   char *vwk = NULL;           // modifiable copy of value
 
    // Walk through value fragments separated by colon and expand variables.
-   T_ (value_ = strdup(value));
-   value_expanded = "";
-   first_written = false;
-   while (true) {                               // loop executes ≥ once
-      char *fgmt = strsep(&value_, ":");        // NULL -> no more items
-      if (fgmt == NULL)
-         break;
-      if (expand && fgmt[0] == '$' && fgmt[1] != 0) {
-         fgmt = getenv(fgmt + 1);               // NULL if unset
-         if (fgmt != NULL && fgmt[0] == 0)
-            fgmt = NULL;                        // convert empty to unset
-      }
-      if (fgmt != NULL) {                       // NULL -> omit from output
-         if (first_written)
-            value_expanded = cat(value_expanded, ":");
-         value_expanded = cat(value_expanded, fgmt);
-         first_written = true;
+   if (expand) {
+      char *vwk_cur;           // current location in vwk
+      char *vout = NULL;       // output (expanded) string
+      bool first_out = false;  // true after 1st output element written
+      vwk = ch_strdup(value);
+      vwk_cur = vwk;
+      while (true) {                            // loop executes ≥ once
+         char *elem = strsep(&vwk_cur, ":");    // NULL -> no more elements
+         if (elem == NULL)
+            break;
+         if (elem[0] == '$' && elem[1] != 0) {  // looks like $VARIABLE
+            elem = getenv(elem + 1);            // NULL if unset
+            if (elem != NULL && elem[0] == 0)   // set but empty
+               elem = NULL;                     // convert to unset
+         }
+         if (elem != NULL) {   // empty -> omit from output list
+            vout = cats(3, vout, first_out ? "" : ":", elem);
+            first_out = true;
+         }
       }
+      value = vwk;
    }
 
    // Save results.
-   VERBOSE("environment: %s=%s", name, value_expanded);
-   Z_ (setenv(name, value_expanded, 1));
+   DEBUG("environment: %s=%s", name, value);
+   Z_ (setenv(name, value, 1));
+}
+
+void envs_set(const struct env_var *vars, const bool expand)
+{
+   for (size_t i = 0; vars[i].name != NULL; i++)
+      env_set(vars[i].name, vars[i].value, expand);
 }
 
 /* Remove variables matching glob from the environment. This is tricky,
@@ -310,12 +409,12 @@ void env_set(const char *name, const char *value, const bool expand)
    O(n^2) search until no matches remain.
 
    Our approach is O(n): we build up a copy of environ, skipping variables
-   that match the glob, and then assign environ to the copy. (This is a valid
-   thing to do [2].)
+   that match the glob, and then assign environ to the copy. This is a valid
+   thing to do [2].
 
    [1]: https://unix.stackexchange.com/a/302987
    [2]: http://man7.org/linux/man-pages/man3/exec.3p.html */
-void env_unset(const char *glob)
+void envs_unset(const char *glob)
 {
    char **new_environ = list_new(sizeof(char *), 0);
    for (size_t i = 0; environ[i] != NULL; i++) {
@@ -323,9 +422,9 @@ void env_unset(const char *glob)
       int matchp;
       split(&name, &value, environ[i], '=');
       T_ (name != NULL);          // environ entries must always have equals
-      matchp = fnmatch(glob, name, FNM_EXTMATCH); // extglobs if available
+      matchp = fnmatch(glob, name, FNM_EXTMATCH);  // extglobs if available
       if (matchp == 0) {
-         VERBOSE("environment: unset %s", name);
+         DEBUG("environment: unset %s", name);
       } else {
          T_ (matchp == FNM_NOMATCH);
          *(value - 1) = '=';  // rejoin line
@@ -343,17 +442,15 @@ struct env_var env_var_parse(const char *line, const char *path, size_t lineno)
 {
    char *name, *value, *where;
 
-   if (path == NULL) {
-      T_ (where = strdup(line));
-   } else {
-      T_ (1 <= asprintf(&where, "%s:%zu", path, lineno));
-   }
+   if (path == NULL)
+      where = ch_strdup(line);
+   else
+      where = ch_asprintf("%s:%zu", path, lineno);
 
    // Split line into variable name and value.
    split(&name, &value, line, '=');
    Te (name != NULL, "can't parse variable: no delimiter: %s", where);
    Te (name[0] != 0, "can't parse variable: empty name: %s", where);
-   free(where);  // for Tim
 
    // Strip leading and trailing single quotes from value, if both present.
    if (   strlen(value) >= 2
@@ -372,50 +469,116 @@ struct env_var env_var_parse(const char *line, const char *path, size_t lineno)
    list to the new location. *list can be NULL to initialize a new list.
    Return the new array size.
 
-   Note: ar must be cast, e.g. "list_append((void **)&foo, ...)".
+   Usage note: ar must be cast, e.g. "list_append((void **)&foo, ...)".
+
+   Implementation note: We could round up the new size to the next power of
+   two for allocation purposes, which would reduce the number of realloc()
+   that actually change the size. However, many allocators do this type of
+   thing internally already, and that seems a better place for it.
 
    Warning: This function relies on all pointers having the same
    representation, which is true on most modern machines but is not guaranteed
    by the standard [1]. We could instead return the new value of ar rather
    than using an out parameter, which would avoid the double pointer and
    associated non-portability but make it easy for callers to create dangling
-   pointers, i.e., after "a = list_append(b, ...)", b will dangle. That
-   problem could in turn be avoided by returning a *copy* of the array rather
-   than a modified array, but then the caller has to deal with the original
-   array itself. It seemed to me the present behavior was the best trade-off.
+   pointers, i.e., after “a = list_append(b, ...)”, b will be invalid. This
+   isn’t just about memory leaks but also the fact that b points to an invalid
+   buffer that likely *looks* valid.
 
    [1]: http://www.c-faq.com/ptrs/genericpp.html */
 void list_append(void **ar, void *new, size_t size)
 {
-   int ct;
+   size_t ct;
    T_ (new != NULL);
 
-   // count existing elements
-   if (*ar == NULL)
-      ct = 0;
-   else
-      for (ct = 0; !buf_zero_p((char *)*ar + ct*size, size); ct++)
-         ;
+   ct = list_count(*ar, size);
+   *ar = ch_realloc(*ar, (ct+2)*size, true);   // existing + new + terminator
+   memcpy(*ar + ct*size, new, size);      // append new (no overlap)
+   memset(*ar + (ct+1)*size, 0, size);    // set new terminator
+}
 
-   T_ (*ar = realloc(*ar, (ct+2)*size));        // existing + new + terminator
-   memcpy((char *)*ar + ct*size, new, size);    // append new (no overlap)
-   memset((char *)*ar + (ct+1)*size, 0, size);  // set new terminator
+/* Copy the contents of list src onto the end of dest. */
+void list_cat(void **dst, void *src, size_t size)
+{
+   size_t ct_dst, ct_src;
+   T_ (src != NULL);
+
+   ct_dst = list_count(*dst, size);
+   ct_src = list_count(src, size);
+   *dst = ch_realloc(*dst, (ct_dst+ct_src+1)*size, true);
+   memcpy(*dst + ct_dst*size, src, ct_src*size);  // append src (no overlap)
+   memset(*dst + (ct_dst+ct_src)*size, 0, size);  // set new terminator
+}
+
+/* Return the number of elements of size size in list *ar, not including the
+   terminating zero element. */
+size_t list_count(void *ar, size_t size)
+{
+   size_t ct;
+
+   if (ar == NULL)
+      return 0;
+
+   for (ct = 0; !buf_zero_p((char *)ar + ct*size, size); ct++)
+      ;
+   return ct;
 }
 
 /* Return a pointer to a new, empty zero-terminated array containing elements
    of size size, with room for ct elements without re-allocation. The latter
    allows to pre-allocate an arbitrary number of slots in the list, which can
-   then be filled directly without testing the list's length for each one.
+   then be filled directly without testing the list’s length for each one.
    (The list is completely filled with zeros, so every position has a
    terminator after it.) */
 void *list_new(size_t size, size_t ct)
 {
    void *list;
-   T_ (list = calloc(ct+1, size));
+   T_ (size > 0);
+   T_ (list = ch_malloc_zeroed((ct+1) * size, true));
    return list;
 }
 
-/* If verbose, print uids and gids on stderr prefixed with where. */
+/* Split str into tokens delimited by delim (multiple adjacent delimiters are
+   treated as one). Copy each token into a newly-allocated string buffer, and
+   return these strings as a new list.
+
+   The function accepts a single delimiter, not multiple like strtok(3). */
+void *list_new_strings(char delim, const char *str)
+{
+   char **list;
+   char *str_, *tok_state;
+   char delims[] = { delim, '\0' };
+   size_t delim_ct = 0;
+
+   // Count delimiters so we can allocate the right size list initially,
+   // avoiding one realloc() per delimiter. Note this does not account for
+   // adjacent delimiters and thus may overcount tokens, possibly wasting a
+   // small amount of memory.
+   for (int i = 0; str[i] != '\0'; i++)
+      delim_ct += (str[i] == delim ? 1 : 0);
+
+   list = list_new(delim_ct + 1, sizeof(char *));
+
+   // Note: strtok_r(3)’s interface is rather awkward; see its man page.
+   str_ = ch_strdup(str);     // so we can modify it
+   tok_state = NULL;
+   for (int i = 0; true; i++) {
+      char *tok;
+      tok = strtok_r(str_, delims, &tok_state);
+      if (tok == NULL)
+         break;
+      T_ (i < delim_ct + 1);  // bounds check
+      list[i] = tok;
+      str_ = NULL;            // only pass actual string on first call
+   }
+
+   return list;
+}
+
+/* If verbose enough, print uids and gids on stderr prefixed with where.
+
+   FIXME: Should change to DEBUG(), but that will give the file/line within
+   this function, which we don’t want. */
 void log_ids(const char *func, int line)
 {
    uid_t ruid, euid, suid;
@@ -423,9 +586,11 @@ void log_ids(const char *func, int line)
    gid_t supp_gids[SUPP_GIDS_MAX];
    int supp_gid_ct;
 
-   if (verbose >= 3) {
+   if (verbose >= LL_TRACE + 1) {  // don’t bother b/c haven’t needed in ages
       Z_ (getresuid(&ruid, &euid, &suid));
       Z_ (getresgid(&rgid, &egid, &sgid));
+      if (log_color_p)
+         T_ (EOF != fputs(LL_COLOURS[LL_TRACE], stderr));
       fprintf(stderr, "%s %d: uids=%d,%d,%d, gids=%d,%d,%d + ", func, line,
               ruid, euid, suid, rgid, egid, sgid);
       supp_gid_ct = getgroups(SUPP_GIDS_MAX, supp_gids);
@@ -439,18 +604,48 @@ void log_ids(const char *func, int line)
          fprintf(stderr, "%d", supp_gids[i]);
       }
       fprintf(stderr, "\n");
+      if (log_color_p)
+         T_ (EOF != fputs(COLOUR_RESET, stderr));
+      Z_ (fflush(stderr));
    }
 }
 
-void test_logging(bool fail) {
-   TRACE("trace");
-   DEBUG("debug");
-   VERBOSE("verbose");
-   INFO("info");
-   WARNING("warning");
-   if (fail)
-      FATAL("the program failed inexplicably (\"log-fail\" specified)");
-   exit(0);
+/* Set up logging. Note ch-run(1) specifies a bunch of color synonyms; this
+   translation happens during argument parsing.*/
+void logging_init(enum log_color_when when, enum log_test test)
+{
+   // set up colors
+   switch (when) {
+   case LL_COLOR_AUTO:
+      if (isatty(fileno(stderr)))
+         log_color_p = true;
+      else {
+         T_ (errno == ENOTTY);
+         log_color_p = false;
+      }
+      break;
+   case LL_COLOR_YES:
+      log_color_p = true;
+      break;
+   case LL_COLOR_NO:
+      log_color_p = false;
+      break;
+   case LL_COLOR_NULL:
+      T_ (0);  // unreachable
+      break;
+   }
+
+   // test logging
+   if (test >= LL_TEST_YES) {
+      TRACE("trace");
+      DEBUG("debug");
+      VERBOSE("verbose");
+      INFO("info");
+      WARNING("warning");
+      if (test >= LL_TEST_FATAL)
+         FATAL("the program failed inexplicably (\"log-fail\" specified)");
+      exit(0);
+   }
 }
 
 /* Create the directory at path, despite its parent not allowing write access,
@@ -466,12 +661,12 @@ void mkdir_overmount(const char *path, const char *scratch)
    char *parent, *path2, *over, *path_dst;
    char *orig_dir = ".orig";  // resisted calling this .weirdal
    int entry_ct;
-   struct dirent **entries;
+   char **entries;
 
    VERBOSE("making writeable via symlink ranch: %s", path);
-   path2 = strdup(path);
+   path2 = ch_strdup(path);
    parent = dirname(path2);
-   T_ (1 <= asprintf(&over, "%s/%d", scratch, dir_ls_count(scratch) + 1));
+   over = ch_asprintf("%s/%d", scratch, dir_glob_count(scratch, "*") + 1);
    path_dst = path_join(over, orig_dir);
 
    // bind-mounts
@@ -483,25 +678,16 @@ void mkdir_overmount(const char *path, const char *scratch)
        "can't bind-mount: %s- > %s", over, parent);
 
    // symlink ranch
-   entry_ct = dir_ls(path_dst, &entries);
+   entries = dir_glob(path_dst, "*");
+   entry_ct = list_count(entries, sizeof(entries[0]));
    DEBUG("existing entries: %d", entry_ct);
    for (int i = 0; i < entry_ct; i++) {
-      char * src = path_join(parent, entries[i]->d_name);
-      char * dst = path_join(orig_dir, entries[i]->d_name);
-
+      char * src = path_join(parent, entries[i]);
+      char * dst = path_join(orig_dir, entries[i]);
       Zf (symlink(dst, src), "can't symlink: %s -> %s", src, dst);
-
-      free(src);
-      free(dst);
-      free(entries[i]);
    }
-   free(entries);
 
    Zf (mkdir(path, 0755), "can't mkdir even after overmount: %s", path);
-
-   free(path_dst);
-   free(over);
-   free(path2);
 }
 
 /* Create directories in path under base. Exit with an error if anything goes
@@ -530,18 +716,17 @@ void mkdirs(const char *base, const char *path, char **denylist,
 
    TRACE("mkdirs: base: %s", basec);
    TRACE("mkdirs: path: %s", path);
-   for (size_t i = 0; denylist[i] != NULL; i++)
+   for (int i = 0; denylist[i] != NULL; i++)
       TRACE("mkdirs: deny: %s", denylist[i]);
 
-   pathw = cat(path, "");  // writeable copy
-   saveptr = NULL;         // avoid warning (#1048; see also strtok_r(3))
+   pathw = ch_strdup(path);  // writeable copy
+   saveptr = NULL;           // avoid warning (#1048; see also strtok_r(3))
    component = strtok_r(pathw, "/", &saveptr);
    nextc = basec;
    next = NULL;
    while (component != NULL) {
-      next = cat(nextc, "/");
-      next = cat(next, component);  // canonical except for last component
-      TRACE("mkdirs: next: %s", next)
+      next = path_join(nextc, component);  // canonical except for last
+      TRACE("mkdirs: next: %s", next);
       component = strtok_r(NULL, "/", &saveptr);  // next NULL if current last
       if (path_exists(next, &sb, false)) {
          if (S_ISLNK(sb.st_mode)) {
@@ -569,7 +754,7 @@ void mkdirs(const char *base, const char *path, char **denylist,
                Tf (0, "can't mkdir: %s", next);
          }
          nextc = next;  // canonical b/c we just created last component as dir
-         TRACE("mkdirs: created: %s", nextc)
+         TRACE("mkdirs: created: %s", nextc);
       }
    }
    TRACE("mkdirs: done");
@@ -587,7 +772,7 @@ void msg(enum log_level level, const char *file, int line, int errno_,
 }
 
 noreturn void msg_fatal(const char *file, int line, int errno_,
-                       const char *fmt, ...)
+                        const char *fmt, ...)
 {
    va_list ap;
 
@@ -595,52 +780,76 @@ noreturn void msg_fatal(const char *file, int line, int errno_,
    msgv(LL_FATAL, file, line, errno_, fmt, ap);
    va_end(ap);
 
-   exit(EXIT_FAILURE);
+   if (abort_fatal)
+      abort();
+   else
+      exit(EXIT_FAILURE);
 }
 
 /* va_list form of msg(). */
 void msgv(enum log_level level, const char *file, int line, int errno_,
           const char *fmt, va_list ap)
 {
-   char *message, *ap_msg;
-
-   if (level > verbose)
+   // note: all components contain appropriate leading/trailing space
+   char *text_formatted;  // caller’s message, formatted
+   char *level_prefix;    // level prefix
+   char *errno_code;      // errno code/number
+   char *errno_desc;      // errno description
+   char *text_full;       // complete text but w/o color codes
+   const char * colour;          // ANSI codes for color
+   const char * colour_reset;    // ANSI codes to reset color
+
+   if (level > verbose)   // not verbose enough; do nothing
       return;
 
-   T_ (1 <= asprintf(&message, "%s[%d]: ",
-                     program_invocation_short_name, getpid()));
+   // Format caller message.
+   if (fmt == NULL)
+      text_formatted = "please report this bug";  // users should not see
+   else
+      text_formatted = ch_vasprintf(fmt, ap);
 
-   // Prefix for the more urgent levels.
+   // Prefix some of the levels.
    switch (level) {
    case LL_FATAL:
-      message = cat(message, "error: ");  // "fatal" too morbid for users
+      level_prefix = "error: ";   // "fatal" too morbid for users
       break;
    case LL_WARNING:
-      message = cat(message, "warning: ");
+      level_prefix = "warning: ";
       break;
    default:
+      level_prefix = "";
       break;
    }
 
-   // Default message if not specified. Users should not see this.
-   if (fmt == NULL)
-      fmt = "please report this bug";
-
-   T_ (1 <= vasprintf(&ap_msg, fmt, ap));
-   if (errno_) {
-      T_ (1 <= asprintf(&message, "%s%s: %s (%s:%d %d)", message, ap_msg,
-                        strerror(errno_), file, line, errno_));
+   // errno.
+   if (!errno_) {
+      errno_code = "";
+      errno_desc = "";
    } else {
-      T_ (1 <= asprintf(&message, "%s%s (%s:%d)", message, ap_msg, file, line));
+      errno_code = cat(" ", strerrorname_np(errno_));  // FIXME: non-portable
+      errno_desc = ch_asprintf(": %s", strerror(errno_));
    }
 
-   if (level == LL_WARNING) {
-      warnings_offset += string_append(warnings, message, WARNINGS_SIZE,
-                                       warnings_offset);
-   }
-   fprintf(stderr, "%s\n", message);
+   // Color.
+   if (log_color_p) {
+      colour = LL_COLOURS[level];
+      colour_reset = COLOUR_RESET;
+   } else {
+      colour = "";
+      colour_reset = "";
+   };
+
+   // Format and print.
+   text_full = ch_asprintf("%s[%d]: %s%s%s (%s:%d%s)",
+                           program_invocation_short_name, getpid(),
+                           level_prefix, text_formatted, errno_desc,
+                           file, line, errno_code);
+   fprintf(stderr, "%s%s%s\n", colour, text_full, colour_reset);
    if (fflush(stderr))
-      abort();  // can't print an error b/c already trying to do that
+      abort();  // can’t print an error b/c already trying to do that
+   if (level == LL_WARNING)
+      warnings_offset += string_append(warnings, text_full,
+                                       WARNINGS_SIZE, warnings_offset);
 }
 
 /* Return true if the given path exists, false otherwise. On error, exit. If
@@ -669,27 +878,23 @@ bool path_exists(const char *path, struct stat *statbuf, bool follow_symlink)
 /* Concatenate paths a and b, then return the result. */
 char *path_join(const char *a, const char *b)
 {
-   char *ret;
-
    T_ (a != NULL);
    T_ (strlen(a) > 0);
    T_ (b != NULL);
    T_ (strlen(b) > 0);
 
-   T_ (asprintf(&ret, "%s/%s", a, b) == strlen(a) + strlen(b) + 1);
-
-   return ret;
+   return ch_asprintf("%s/%s", a, b);
 }
 
 /* Return the mount flags of the file system containing path, suitable for
    passing to mount(2).
 
-   This is messy because, the flags we get from statvfs(3) are ST_* while the
+   This is messy because the flags we get from statvfs(3) are ST_* while the
    flags needed by mount(2) are MS_*. My glibc has a comment in bits/statvfs.h
-   that the ST_* "should be kept in sync with" the MS_* flags, and the values
+   that the ST_* “should be kept in sync with” the MS_* flags, and the values
    do seem to match, but there are additional undocumented flags in there.
-   Also, the kernel contains a test "unprivileged-remount-test.c" that
-   manually translates the flags. Thus, I wasn't comfortable simply passing
+   Also, the kernel contains a test “unprivileged-remount-test.c” that
+   manually translates the flags. Thus, I wasn’t comfortable simply passing
    the output of statvfs(3) to mount(2). */
 unsigned long path_mount_flags(const char *path)
 {
@@ -723,17 +928,14 @@ unsigned long path_mount_flags(const char *path)
           | (sv.f_flag & ST_SYNCHRONOUS ? MS_SYNCHRONOUS : 0);
 }
 
-/* Split path into dirname and basename. */
+/* Split path into dirname and basename. If dir and/or base is NULL, then skip
+   that output. */
 void path_split(const char *path, char **dir, char **base)
 {
-   char *path2;
-
-   T_ (path2 = strdup(path));
-   T_ (*dir = strdup(dirname(path2)));
-   free(path2);
-   T_ (path2 = strdup(path));
-   T_ (*base = strdup(basename(path2)));
-   free(path2);
+   if (dir != NULL)
+      *dir = dirname(ch_strdup(path));
+   if (base != NULL)
+      *base = basename(ch_strdup(path));
 }
 
 /* Return true if path is a subdirectory of base, false otherwise. Acts on the
@@ -778,7 +980,7 @@ char *realpath_(const char *path, bool fail_ok)
 
    if (pathc == NULL) {
       if (fail_ok) {
-         T_ (pathc = strdup(path));
+         pathc = ch_strdup(path);
       } else {
          Tf (false, "can't canonicalize: %s", path);
       }
@@ -798,32 +1000,23 @@ void replace_char(char *s, char old, char new)
 /* Split string str at first instance of delimiter del. Set *a to the part
    before del, and *b to the part after. Both can be empty; if no token is
    present, set both to NULL. Unlike strsep(3), str is unchanged; *a and *b
-   point into a new buffer allocated with malloc(3). This has two
-   implications: (1) the caller must free(3) *a but not *b, and (2) the parts
-   can be rejoined by setting *(*b-1) to del. The point here is to provide an
-   easier wrapper for strsep(3). */
+   point into a new buffer. Therefore, the parts can be rejoined by setting
+   *(*b-1) to del. The point here is to provide an easier wrapper for
+   strsep(3). */
 void split(char **a, char **b, const char *str, char del)
 {
-   char *tmp;
    char delstr[2] = { del, 0 };
    T_ (str != NULL);
-   tmp = strdup(str);
-   *b = tmp;
+   *b = ch_strdup(str);
    *a = strsep(b, delstr);
    if (*b == NULL)
       *a = NULL;
 }
 
-/* Report the version number. */
-void version(void)
-{
-   fprintf(stderr, "%s\n", VERSION);
-}
-
-/* Append null-terminated string “str” to the memory buffer “offset” bytes after
-   from the address pointed to by “addr”. Buffer length is “size” bytes. Return
-   the number of bytes written. If there isn’t enough room for the string, do
-   nothing and return zero. */
+/* Append null-terminated string “str” to the memory buffer “offset” bytes
+   after from the address pointed to by “addr”. Buffer length is “size” bytes.
+   Return the number of bytes written. If there isn’t enough room for the
+   string, do nothing and return zero. */
 size_t string_append(char *addr, char *str, size_t size, size_t offset)
 {
    size_t written = strlen(str) + 1;
@@ -834,23 +1027,32 @@ size_t string_append(char *addr, char *str, size_t size, size_t offset)
    return written;
 }
 
+/* Report the version number. */
+void version(void)
+{
+   fprintf(stderr, "%s\n", VERSION);
+}
+
 /* Reprint messages stored in “warnings” memory buffer. */
 void warnings_reprint(void)
 {
    size_t offset = 0;
    int warn_ct = buf_strings_count(warnings, WARNINGS_SIZE);
 
-   if (warn_ct > 0)
-      fprintf(stderr, "%s[%d]: warning: reprinting first %d warning(s)\n",
-              program_invocation_short_name, getpid(), warn_ct);
-
-   while (   warnings[offset] != 0
-          || (offset < (WARNINGS_SIZE - 1) && warnings[offset+1] != 0)) {
-      fputs(warnings + offset, stderr);
-      fputc('\n', stderr);
-      offset += strlen(warnings + offset) + 1;
+   if (warn_ct > 0) {
+      if (log_color_p)
+         T_ (EOF != fputs(LL_COLOURS[LL_WARNING], stderr));
+      T_ (1 <= fprintf(stderr, "%s[%d]: reprinting first %d warning(s)\n",
+                       program_invocation_short_name, getpid(), warn_ct));
+      while (   warnings[offset] != 0
+             || (offset < (WARNINGS_SIZE - 1) && warnings[offset+1] != 0)) {
+         T_ (EOF != fputs(warnings + offset, stderr));
+         T_ (EOF != fputc('\n', stderr));
+         offset += strlen(warnings + offset) + 1;
+      }
+      if (log_color_p)
+         T_ (EOF != fputs(COLOUR_RESET, stderr));
+      if (fflush(stderr))
+         abort();  // can't print an error b/c already trying to do that
    }
-
-   if (fflush(stderr))
-      abort();  // can't print an error b/c already trying to do that
 }
diff --git a/bin/ch_misc.h b/bin/misc.h
similarity index 81%
rename from bin/ch_misc.h
rename to bin/misc.h
index f590a0890..2b0e16cd7 100644
--- a/bin/ch_misc.h
+++ b/bin/misc.h
@@ -5,9 +5,12 @@
    libraries that ch_core requires. */
 
 #define _GNU_SOURCE
+#pragma once
+
 #include <dirent.h>
 #include <errno.h>
 #include <stdbool.h>
+#include <stdlib.h>
 #include <sys/stat.h>
 
 
@@ -20,6 +23,11 @@
    and hopefully others support the following extension. */
 #define noreturn __attribute__ ((noreturn))
 
+/* Syslog facility and level we use. */
+#ifdef ENABLE_SYSLOG
+#define SYSLOG_PRI (LOG_USER|LOG_INFO)
+#endif
+
 /* Size of “warnings” buffer, in bytes. We want this to be big enough that we
    don’t need to worry about running out of room. */
 #define WARNINGS_SIZE (4*1024)
@@ -66,35 +74,25 @@
 #define Zf(x, ...) if (x)    msg_fatal(__FILE__, __LINE__, errno, __VA_ARGS__)
 #define Ze(x, ...) if (x)    msg_fatal(__FILE__, __LINE__, 0, __VA_ARGS__)
 
-#define FATAL(...)   msg_fatal(      __FILE__, __LINE__, 0, __VA_ARGS__);
-#define WARNING(...) msg(LL_WARNING, __FILE__, __LINE__, 0, __VA_ARGS__);
-#define INFO(...)    msg(LL_INFO,    __FILE__, __LINE__, 0, __VA_ARGS__);
-#define VERBOSE(...) msg(LL_VERBOSE, __FILE__, __LINE__, 0, __VA_ARGS__);
-#define DEBUG(...)   msg(LL_DEBUG,   __FILE__, __LINE__, 0, __VA_ARGS__);
-#define TRACE(...)   msg(LL_TRACE,   __FILE__, __LINE__, 0, __VA_ARGS__);
+#define FATAL(...)   msg_fatal(      __FILE__, __LINE__, 0, __VA_ARGS__)
+#define WARNING(...) msg(LL_WARNING, __FILE__, __LINE__, 0, __VA_ARGS__)
+#define INFO(...)    msg(LL_INFO,    __FILE__, __LINE__, 0, __VA_ARGS__)
+#define VERBOSE(...) msg(LL_VERBOSE, __FILE__, __LINE__, 0, __VA_ARGS__)
+#define DEBUG(...)   msg(LL_DEBUG,   __FILE__, __LINE__, 0, __VA_ARGS__)
+#define TRACE(...)   msg(LL_TRACE,   __FILE__, __LINE__, 0, __VA_ARGS__)
 
 
 /** Types **/
 
-enum env_action { ENV_END = 0,       // terminate list of environment changes
-                  ENV_SET_DEFAULT,   // set by /ch/environment within image
-                  ENV_SET_VARS,      // set by list of variables
-                  ENV_UNSET_GLOB };  // unset glob matches
+#ifndef HAVE_COMPARISON_FN_T
+typedef int (*comparison_fn_t) (const void *, const void *);
+#endif
 
 struct env_var {
    char *name;
    char *value;
 };
 
-struct env_delta {
-   enum env_action action;
-   union {
-      int delim;             // ENV_SET_DEFAULT
-      struct env_var *vars;  // ENV_SET_VARS
-      char *glob;            // ENV_UNSET_GLOB
-   } arg;
-};
-
 enum log_level { LL_FATAL =   -3,
                  LL_STDERR =  -2,
                  LL_WARNING = -1,
@@ -103,12 +101,23 @@ enum log_level { LL_FATAL =   -3,
                  LL_DEBUG =    2,
                  LL_TRACE =    3 };
 
+enum log_color_when { LL_COLOR_NULL = 0,
+                      LL_COLOR_AUTO,
+                      LL_COLOR_YES,
+                      LL_COLOR_NO };
+
+enum log_test { LL_TEST_NONE  = 0,
+                LL_TEST_YES   = 1,
+                LL_TEST_FATAL = 2 };
+
 
 /** External variables **/
 
-extern enum log_level verbose;
+extern bool abort_fatal;
+extern bool log_color_p;
 extern char *host_tmp;
 extern char *username;
+extern enum log_level verbose;
 extern char *warnings;
 extern size_t warnings_offset;
 
@@ -116,19 +125,26 @@ extern size_t warnings_offset;
 /** Function prototypes **/
 
 char *argv_to_string(char **argv);
+const char *bool_to_string(bool b);
 int buf_strings_count(char *str, size_t s);
 bool buf_zero_p(void *buf, size_t size);
 char *cat(const char *a, const char *b);
-int dir_ls(const char *path, struct dirent ***namelist);
-int dir_ls_count(const char *path);
-int dir_ls_filter(const struct dirent *e);
+char *cats(size_t argc, ...);
+char **dir_glob(const char *path, const char *glob);
+int dir_glob_count(const char *path, const char *glob);
 struct env_var *env_file_read(const char *path, int delim);
+char *env_get(const char *name, char *value_default);
 void env_set(const char *name, const char *value, const bool expand);
-void env_unset(const char *glob);
+void envs_set(const struct env_var *envs, const bool expand);
+void envs_unset(const char *glob);
 struct env_var env_var_parse(const char *line, const char *path, size_t lineno);
 void list_append(void **ar, void *new, size_t size);
+void list_cat(void **dst, void *src, size_t size);
+size_t list_count(void *ar, size_t size);
+void *list_new_strings(char delim, const char *s);
 void *list_new(size_t size, size_t ct);
 void log_ids(const char *func, int line);
+void logging_init(enum log_color_when when, enum log_test test);
 void test_logging(bool fail);
 void mkdirs(const char *base, const char *path, char **denylist,
             const char *scratch);
diff --git a/bin/seccomp.c b/bin/seccomp.c
new file mode 100644
index 000000000..bf620a0e3
--- /dev/null
+++ b/bin/seccomp.c
@@ -0,0 +1,261 @@
+/* Copyright © Triad National Security, LLC, and others.
+
+   This interface contains the seccomp filter for root emulation. */
+
+#define _GNU_SOURCE
+#include "config.h"
+
+#include <linux/audit.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "core.h"
+#include "hook.h"
+#include "mem.h"
+
+
+/** Macros **/
+
+/* On some distros (e.g., CentOS 7), some of the architecture numbers are
+   missing. The workaround is to use the numbers I have on Debian Bullseye.
+   The reason I (Reid) feel moderately comfortable doing this is how militant
+   Linux is about not changing the userspace API. */
+#ifndef AUDIT_ARCH_AARCH64
+#define AUDIT_ARCH_AARCH64 0xC00000B7u  // undeclared on CentOS 7
+#undef  AUDIT_ARCH_ARM                  // uses undeclared EM_ARM on CentOS 7
+#define AUDIT_ARCH_ARM     0x40000028u
+#endif
+
+/* Special values for seccomp tables. These must be negative to avoid clashing
+   with real syscall numbers (note zero is often a valid syscal number). */
+#define NR_NON -1  // syscall does not exist on architecture
+#define NR_END -2  // end of table
+
+/** Constants **/
+
+/* Architectures we support for seccomp. Order matches the table below. */
+int SECCOMP_ARCHS[] = { AUDIT_ARCH_AARCH64,   // arm64
+                        AUDIT_ARCH_ARM,       // arm32
+                        AUDIT_ARCH_I386,      // x86 (32-bit)
+                        AUDIT_ARCH_PPC64LE,   // PPC
+                        AUDIT_ARCH_S390X,     // s390x
+                        AUDIT_ARCH_X86_64,    // x86-64
+                        NR_END };
+
+/* System call numbers that we fake with seccomp (by doing nothing and
+   returning success). Some processors can execute multiple architectures
+   (e.g., 64-bit Intel CPUs can run both x64-64 and x86 code), and a process’
+   architecture can even change (if you execve(2) binary of different
+   architecture), so we can’t just use the build host’s architecture.
+
+   I haven’t figured out how to gather these system call numbers
+   automatically, so they are compiled from [1, 2, 3]. See also [4] for a more
+   general reference.
+
+   NOTE: The total number of faked syscalls (i.e., non-zero entries below)
+   must be somewhat less than 256. I haven’t computed the exact limit. There
+   will be an assertion failure at runtime if this is exceeded.
+
+   WARNING: Keep this list consistent with the ch-image(1) man page!
+
+   [1]: https://chromium.googlesource.com/chromiumos/docs/+/HEAD/constants/syscalls.md#Cross_arch-Numbers
+   [2]: https://github.com/strace/strace/blob/v4.26/linux/powerpc64/syscallent.h
+   [3]: https://github.com/strace/strace/blob/v6.6/src/linux/s390x/syscallent.h
+   [4]: https://unix.stackexchange.com/questions/421750 */
+int FAKE_SYSCALL_NRS[][6] = {
+   // arm64   arm32   x86     PPC64   s390x   x86-64
+   // ------  ------  ------  ------  ------  ------
+   {      91,    185,    185,    184,    185,    126 },  // capset
+   {  NR_NON,    182,    182,    181,    212,     92 },  // chown
+   {  NR_NON,    212,    212, NR_NON, NR_NON, NR_NON },  // chown32
+   {      55,     95,     95,     95,    207,     93 },  // fchown
+   {  NR_NON,    207,    207, NR_NON, NR_NON, NR_NON },  // fchown32
+   {      54,    325,    298,    289,    291,    260 },  // fchownat
+   {  NR_NON,     16,     16,     16,    198,     94 },  // lchown
+   {  NR_NON,    198,    198, NR_NON, NR_NON, NR_NON },  // lchown32
+   {     104,    347,    283,    268,    277,    246 },  // kexec_load
+   {     152,    139,    139,    139,    216,    123 },  // setfsgid
+   {  NR_NON,    216,    216, NR_NON, NR_NON, NR_NON },  // setfsgid32
+   {     151,    138,    138,    138,    215,    122 },  // setfsuid
+   {  NR_NON,    215,    215, NR_NON, NR_NON, NR_NON },  // setfsuid32
+   {     144,     46,     46,     46,    214,    106 },  // setgid
+   {  NR_NON,    214,    214, NR_NON, NR_NON, NR_NON },  // setgid32
+   {     159,     81,     81,     81,    206,    116 },  // setgroups
+   {  NR_NON,    206,    206, NR_NON, NR_NON, NR_NON },  // setgroups32
+   {     143,     71,     71,     71,    204,    114 },  // setregid
+   {  NR_NON,    204,    204, NR_NON, NR_NON, NR_NON },  // setregid32
+   {     149,    170,    170,    169,    210,    119 },  // setresgid
+   {  NR_NON,    210,    210, NR_NON, NR_NON, NR_NON },  // setresgid32
+   {     147,    164,    164,    164,    208,    117 },  // setresuid
+   {  NR_NON,    208,    208, NR_NON, NR_NON, NR_NON },  // setresuid32
+   {     145,     70,     70,     70,    203,    113 },  // setreuid
+   {  NR_NON,    203,    203, NR_NON, NR_NON, NR_NON },  // setreuid32
+   {     146,     23,     23,     23,    213,    105 },  // setuid
+   {  NR_NON,    213,    213, NR_NON, NR_NON, NR_NON },  // setuid32
+   { NR_END }, // end
+};
+int FAKE_MKNOD_NRS[] =
+   {  NR_NON,     14,     14,     14,     14,    133 };
+int FAKE_MKNODAT_NRS[] =
+   {      33,    324,    297,    288,    290,    259 };
+
+
+/** Function prototypes (private) **/
+
+void iw(struct sock_fprog *p, int i,
+        uint16_t op, uint32_t k, uint8_t jt, uint8_t jf);
+
+
+/** Functions **/
+
+/* Prestart hook to set up the fake-syscall seccomp(2) filter. This computes
+   and installs a long-ish but fairly simple BPF program to implement the
+   filter. To understand this rather hairy language:
+
+     1. https://man7.org/training/download/secisol_seccomp_slides.pdf
+     2. https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html
+     3. https://elixir.bootlin.com/linux/latest/source/samples/seccomp */
+void hook_seccomp_install(struct container *c, void *d)
+{
+   int arch_ct = sizeof(SECCOMP_ARCHS)/sizeof(SECCOMP_ARCHS[0]) - 1;
+   int syscall_cts[arch_ct];
+   struct sock_fprog p = { 0 };
+   int ii, idx_allow, idx_fake, idx_mknod, idx_mknodat, idx_next_arch;
+   // Lengths of certain instruction groups. These are all obtained manually
+   // by counting below, violating DRY. We could automate these counts, but it
+   // seemed like the cost of extra buffers and code to do that would exceed
+   // that of maintaining the manual counts.
+   int ct_jump_start = 4;  // ld arch & syscall nr, arch test, end-of-arch jump
+   int ct_mknod_jump = 2;  // jump table handling for mknod(2) and mknodat(2)
+   int ct_mknod = 2;       // mknod(2) handling
+   int ct_mknodat = 6;     // mknodat(2) handling
+
+   // Count how many syscalls we are going to fake in the standard way. We
+   // need this to compute the right offsets for all the jumps.
+   for (int ai = 0; SECCOMP_ARCHS[ai] != NR_END; ai++) {
+      p.len += ct_jump_start + ct_mknod_jump;
+      syscall_cts[ai] = 0;
+      for (int si = 0; FAKE_SYSCALL_NRS[si][0] != NR_END; si++) {
+         bool syscall_p = FAKE_SYSCALL_NRS[si][ai] != NR_NON;
+         syscall_cts[ai] += syscall_p;
+         p.len += syscall_p;  // syscall jump table entry
+      }
+   }
+
+   // Initialize program buffer.
+   p.len += (  1             // return allow
+             + 1             // return fake success
+             + ct_mknod      // mknod(2) handling
+             + ct_mknodat);  // mknodat(2) handling
+   DEBUG("seccomp: filter program has %d instructions", p.len);
+   p.filter = ch_malloc(p.len * sizeof(struct sock_filter), false);
+
+   // Return call addresses. Allow needs to come first because we’ll jump to
+   // it for unknown architectures.
+   idx_allow =   p.len - 2 - ct_mknod - ct_mknodat;
+   idx_fake =    p.len - 1 - ct_mknod - ct_mknodat;
+   idx_mknod =   p.len     - ct_mknod - ct_mknodat;
+   idx_mknodat = p.len                - ct_mknodat;
+
+   // Build a jump table for each architecture. The gist is: if architecture
+   // matches, fall through into the jump table, otherwise jump to the next
+   // architecture (or ALLOW for the last architecture).
+   ii = 0;
+   idx_next_arch = -1;  // avoid warning on some compilers
+   for (int ai = 0; SECCOMP_ARCHS[ai] != NR_END; ai++) {
+      int jump;
+      idx_next_arch = ii + syscall_cts[ai] + ct_jump_start + ct_mknod_jump;
+      // load arch into accumulator
+      iw(&p, ii++, BPF_LD|BPF_W|BPF_ABS,
+         offsetof(struct seccomp_data, arch), 0, 0);
+      // jump to next arch if arch doesn't match
+      jump = idx_next_arch - ii - 1;
+      T_ (jump <= 255);
+      iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, SECCOMP_ARCHS[ai], 0, jump);
+      // load syscall number into accumulator
+      iw(&p, ii++, BPF_LD|BPF_W|BPF_ABS,
+         offsetof(struct seccomp_data, nr), 0, 0);
+      // jump table of syscalls
+      for (int si = 0; FAKE_SYSCALL_NRS[si][0] != NR_END; si++) {
+         int nr = FAKE_SYSCALL_NRS[si][ai];
+         if (nr != NR_NON) {
+            jump = idx_fake - ii - 1;
+            T_ (jump <= 255);
+            iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, nr, jump, 0);
+         }
+      }
+      // jump to mknod(2) handling (add even if syscall not implemented to
+      // make the instruction counts simpler)
+      jump = idx_mknod - ii - 1;
+      T_ (jump <= 255);
+      iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, FAKE_MKNOD_NRS[ai], jump, 0);
+      // jump to mknodat(2) handling
+      jump = idx_mknodat - ii - 1;
+      T_ (jump <= 255);
+      iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, FAKE_MKNODAT_NRS[ai], jump, 0);
+      // unfiltered syscall, jump to allow (limit of 255 doesn’t apply to JA)
+      jump = idx_allow - ii - 1;
+      iw(&p, ii++, BPF_JMP|BPF_JA, jump, 0, 0);
+   }
+   T_ (idx_next_arch == idx_allow);
+
+   // Returns. (Note that if we wanted a non-zero errno, we’d bitwise-or with
+   // SECCOMP_RET_ERRNO. But because fake success is errno == 0, we don’t need
+   // a no-op “| 0”.)
+   T_ (ii == idx_allow);
+   iw(&p, ii++, BPF_RET|BPF_K, SECCOMP_RET_ALLOW, 0, 0);
+   T_ (ii == idx_fake);
+   iw(&p, ii++, BPF_RET|BPF_K, SECCOMP_RET_ERRNO, 0, 0);
+
+   // mknod(2) handling. This just loads the file mode and jumps to the right
+   // place in the mknodat(2) handling.
+   T_ (ii == idx_mknod);
+   // load mode argument into accumulator
+   iw(&p, ii++, BPF_LD|BPF_W|BPF_ABS,
+                offsetof(struct seccomp_data, args[1]), 0, 0);
+   // jump to mode test
+   iw(&p, ii++, BPF_JMP|BPF_JA, 1, 0, 0);
+
+   // mknodat(2) handling.
+   T_ (ii == idx_mknodat);
+   // load mode argument into accumulator
+   iw(&p, ii++, BPF_LD|BPF_W|BPF_ABS,
+                offsetof(struct seccomp_data, args[2]), 0, 0);
+   // jump to fake return if trying to create a device.
+   iw(&p, ii++, BPF_ALU|BPF_AND|BPF_K, S_IFMT, 0, 0);   // file type only
+   iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, S_IFCHR, 2, 0);
+   iw(&p, ii++, BPF_JMP|BPF_JEQ|BPF_K, S_IFBLK, 1, 0);
+   // returns
+   iw(&p, ii++, BPF_RET|BPF_K, SECCOMP_RET_ALLOW, 0, 0);
+   iw(&p, ii++, BPF_RET|BPF_K, SECCOMP_RET_ERRNO, 0, 0);
+
+   // Install filter. Use prctl(2) rather than seccomp(2) for slightly greater
+   // compatibility (Linux 3.5 rather than 3.17) and because there is a glibc
+   // wrapper.
+   T_ (ii == p.len);  // next instruction now one past the end of the buffer
+   Z_ (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &p));
+   DEBUG("seccomp: see contributor's guide to disassemble");
+
+   // Test filter. This will fail if the kernel executes the call (because we
+   // are not really privileged and the arguments are bogus) or succeed if
+   // filter handles it. We selected it over something more naturally in the
+   // filter, e.g. setuid(2), because (1) no container process should ever use
+   // it and (2) it’s unlikely to be emulated by a smarter filter in the
+   // future, i.e., it won’t silently start doing something.
+   Zf (syscall(SYS_kexec_load, 0, 0, NULL, 0),
+       "seccomp root emulation failed (is your architecture supported?)");
+}
+
+/* Helper function to write seccomp-bpf programs. */
+void iw(struct sock_fprog *p, int i,
+        uint16_t op, uint32_t k, uint8_t jt, uint8_t jf)
+{
+   p->filter[i] = (struct sock_filter){ op, jt, jf, k };
+}
+
diff --git a/bin/seccomp.h b/bin/seccomp.h
new file mode 100644
index 000000000..821a646ee
--- /dev/null
+++ b/bin/seccomp.h
@@ -0,0 +1,10 @@
+/* Copyright © Triad National Security, LLC, and others.
+
+   This interface contains the seccomp filter for root emulation. */
+
+#define _GNU_SOURCE
+#pragma once
+
+#include "core.h"
+
+void hook_seccomp_install(struct container *c, void *d);
diff --git a/configure.ac b/configure.ac
index f05b4cee0..121efd593 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,6 +74,74 @@ AC_CONFIG_FILES([Makefile
                  test/Makefile])
 
 
+### Our macros ######################################################
+
+# Macro to validate executable versions. Arguments:
+#
+#   $1  name of variable containing executable name or absolute path
+#   $2  minimum version
+#   $3  append to $1 to make shell pipeline to get actual version only
+#       (e.g., without program name)
+#
+# This macro is not able to determine if a program exists, only whether its
+# version is sufficient. ${!1} (i.e, the value of the variable whose name is
+# stored in $1) must be either empty, an absolute path to an executable, or
+# the name of a program in $PATH. A prior macro such as AX_WITH_PROG can be
+# used to ensure this condition.
+#
+# If ${!1} is an absolute path, and that file isn’t executable, error out. If
+# it’s something other than an absolute path, assume it’s the name of a
+# program in $PATH; if not, the behavior is undefined but not good (FIXME).
+#
+# Post-conditions:
+#
+#   1. If ${!1} is non-empty and the version reported by the program is
+#      greater than or equal to the minimum, ${!1} is unchanged. If ${!1} is
+#      empty or reported version is insufficient, ${!1} is the empty string.
+#      This lets you test version sufficiency by whether ${!1} is empty.
+#
+#   2. $1_VERSION_NOTE contains a brief explanatory note.
+#
+AC_DEFUN([CH_CHECK_VERSION], [
+  AS_VAR_PUSHDEF([prog], [$1])
+  AS_IF([test -n "$prog"], [
+    # ${!1} is non-empty
+    AS_CASE([$prog],
+      # absolute path; check if executable
+      [/*], [AC_MSG_CHECKING([if $prog is executable])
+             AS_IF([test -e "$prog"],
+              [AC_MSG_RESULT([ok])],
+              [AC_MSG_RESULT([no])
+               AC_MSG_ERROR([must be executable])])])
+    AC_MSG_CHECKING([if $prog version >= $2])
+    vact=$($prog $3)
+    AX_COMPARE_VERSION([$2], [le], [$vact], [
+      AC_SUBST([$1_VERSION_NOTE], ["ok ($vact)"])
+      AC_MSG_RESULT([ok ($vact)])
+    ], [
+      AC_SUBST([$1_VERSION_NOTE], ["too old ($vact)"])
+      AC_MSG_RESULT([too old ($vact)])
+      AS_UNSET([$1])
+    ])
+  ], [
+    # ${!} is empty
+    AC_SUBST([$1_VERSION_NOTE], ["not found"])
+    AS_UNSET([$1])
+  ])
+  AS_VAR_POPDEF([prog])
+])
+
+# Macro to validate that $1 is a directory (or a symlink to one). If not, exit
+# with error, prefixed with $2.
+AC_DEFUN([CH_REQUIRE_DIR], [
+  AC_MSG_CHECKING([whether $1 is a directory])
+  AS_IF([test -d "$1"],
+        [AC_MSG_RESULT(yes)],
+        [AC_MSG_RESULT(no)
+         AC_MSG_ERROR([$2: not a directory: $1])])
+])
+
+
 ### Options ##################################################################
 
 # Note: Variables must match option, e.g. --disable-foo-bar => enable_foo_bar.
@@ -120,6 +188,7 @@ AC_ARG_ENABLE([test],
   AS_HELP_STRING([--disable-test], [test suite]),
   [], [enable_test=yes])
 
+# --with-seccomp
 AC_ARG_WITH([seccomp],
   AS_HELP_STRING([--with-seccomp=(yes|no)],
                  [support for --seccomp]))
@@ -139,6 +208,69 @@ AS_CASE([$with_seccomp],
   [*],    # anything else
     [AC_MSG_ERROR([invalid --with-seccomp arg: $with_seccomp])])
 
+# --with-gc
+AC_ARG_WITH([gc],
+  AS_HELP_STRING([--with-gc=@<:@yes|no@:>@],
+                 [enable conservative garbage collection with libgc]))
+AS_CASE([$with_gc],
+  [yes],
+    [want_gc=yes
+     need_gc=yes],
+  [no],
+    [want_gc=no
+     need_gc=no],
+  [''],
+    [want_gc=yes
+     need_gc=no],
+  [*],
+    [AC_MSG_ERROR([--with-gc: bad argument: $with_gc])])
+
+AC_ARG_WITH([gc-include],
+  AS_HELP_STRING([--with-gc-include=DIR],
+                 [directory containing gc.h (if not in defaults)]))
+AS_IF([test -n "$with_gc_include"],
+      [inc_libgc=$with_gc_include
+       CH_REQUIRE_DIR([$inc_gc], [--with-gc-include])])
+
+AC_ARG_WITH([gc-lib],
+  AS_HELP_STRING([--with-gc-lib=DIR],
+                 [directory containing libgc.so (if not in defaults)]))
+AS_IF([test -n "$with_gc_lib"],
+      [lib_json=$with_gc_lib
+       CH_REQUIRE_DIR([$lib_gc], [--with-gc])])
+
+# --with-json
+AC_ARG_WITH([json],
+  AS_HELP_STRING([--with-json=@<:@yes|no@:>@],
+                 [enable JSON features by linking with libcjson]))
+AS_CASE([$with_json],
+  [yes],  # --with-json=yes or --with-json
+    [want_json=yes
+     need_json=yes],
+  [no],   # --with-json=no or --without-json
+    [want_json=no
+     need_json=no],
+  [''],   # neither --with-json nor --without-json specified
+    [want_json=yes
+     need_json=no],
+  [*],    # unknown argument
+    [AC_MSG_ERROR([--with-json: bad argument: $with_json])])
+
+AC_ARG_WITH([json-include],
+  AS_HELP_STRING([--with-json-include=DIR],
+                 [directory containing cJSON.h (if not in defaults)]))
+AS_IF([test -n "$with_json_include"],
+      [inc_json=$with_json_include
+       CH_REQUIRE_DIR([$inc_json], [--with-json-include])])
+
+AC_ARG_WITH([json-lib],
+  AS_HELP_STRING([--with-json-lib=DIR],
+                 [directory containing libcjson.so (if not in defaults)]))
+AS_IF([test -n "$with_json_lib"],
+      [lib_json=$with_json_lib
+       CH_REQUIRE_DIR([$lib_json], [--with-json-lib])])
+
+# --with-libsquashfuse
 AC_ARG_WITH([libsquashfuse],
   AS_HELP_STRING([--with-libsquashfuse=@<:@yes|no|PATH@:>@],
                  [whether to link with libsquashfuse]))
@@ -174,145 +306,49 @@ AC_ARG_WITH([sphinx-python],
     [sphinx_python=''])
 
 
-### Feature test macros ######################################################
-
-# Macro to validate executable versions. Arguments:
-#
-#   $1  name of variable containing executable name or absolute path
-#   $2  minimum version
-#   $3  append to $1 to make shell pipeline to get actual version only
-#       (e.g., without program name)
-#
-# This macro is not able to determine if a program exists, only whether its
-# version is sufficient. ${!1} (i.e, the value of the variable whose name is
-# stored in $1) must be either empty, an absolute path to an executable, or
-# the name of a program in $PATH. A prior macro such as AX_WITH_PROG can be
-# used to ensure this condition.
-#
-# If ${!1} is an absolute path, and that file isn’t executable, error out. If
-# it’s something other than an absolute path, assume it’s the name of a
-# program in $PATH; if not, the behavior is undefined but not good (FIXME).
-#
-# Post-conditions:
-#
-#   1. If ${!1} is non-empty and the version reported by the program is
-#      greater than or equal to the minimum, ${!1} is unchanged. If ${!1} is
-#      empty or reported version is insufficient, ${!1} is the empty string.
-#      This lets you test version sufficiency by whether ${!1} is empty.
-#
-#   2. $1_VERSION_NOTE contains a brief explanatory note.
-#
-AC_DEFUN([CH_CHECK_VERSION], [
-  AS_VAR_PUSHDEF([prog], [$1])
-  AS_IF([test -n "$prog"], [
-    # ${!1} is non-empty
-    AS_CASE([$prog],
-      # absolute path; check if executable
-      [/*], [AC_MSG_CHECKING([if $prog is executable])
-             AS_IF([test -e "$prog"],
-              [AC_MSG_RESULT([ok])],
-              [AC_MSG_RESULT([no])
-               AC_MSG_ERROR([must be executable])])])
-    AC_MSG_CHECKING([if $prog version >= $2])
-    vact=$($prog $3)
-    AX_COMPARE_VERSION([$2], [le], [$vact], [
-      AC_SUBST([$1_VERSION_NOTE], ["ok ($vact)"])
-      AC_MSG_RESULT([ok ($vact)])
-    ], [
-      AC_SUBST([$1_VERSION_NOTE], ["too old ($vact)"])
-      AC_MSG_RESULT([too old ($vact)])
-      AS_UNSET([$1])
-    ])
-  ], [
-    # ${!} is empty
-    AC_SUBST([$1_VERSION_NOTE], ["not found"])
-    AS_UNSET([$1])
-  ])
-  AS_VAR_POPDEF([prog])
-])
-
-
 ### C compiler ###############################################################
 
 # Need a C99 compiler. (See https://stackoverflow.com/a/28558338.)
 AC_PROG_CC
 
-# Set up CFLAGS.
-ch_cflags='-std=c99 -Wall'
-AS_IF([test -n "$lib_libsquashfuse"],
-      [ch_cflags="$ch_cflags -I$inc_libsquashfuse -L$lib_libsquashfuse"
-       # Without this, clang fails with “error: argument unused during
-       # compilation” on the -L. GCC ignores it.
-       ch_cflags="$ch_cflags -Wno-unused-command-line-argument"])
+# Set up CFLAGS. -Wno-unused-command-line-argument is for clang, which fails
+# with an error if -L is present for non-linking stages. It seemed easier to
+# add it unconditionally rather than maintain conditionals about which
+# compiler and which libraries.
+ch_cflags='-std=c99 -Wall -Wno-unused-command-line-argument'
 AS_IF([test $use_werror = yes],
       [ch_cflags="$ch_cflags -Werror"])
+AS_IF([test -n "$inc_gc"],                   # -L$lib_gc added below
+      [ch_cflags="$ch_cflags -I$inc_gc"])
+AS_IF([test -n "$inc_json"],                 # -L$lib_json added below
+      [ch_cflags="$ch_cflags -I$inc_json"])
+AS_IF([test -n "$lib_libsquashfuse"],
+      [ch_cflags="$ch_cflags -I$inc_libsquashfuse -L$lib_libsquashfuse"])
 
-AX_CHECK_COMPILE_FLAG([$ch_cflags], [
-  CFLAGS="$CFLAGS $ch_cflags"
-], [
-  AC_MSG_ERROR([no suitable C99 compiler found])
-])
+AX_CHECK_COMPILE_FLAG([$ch_cflags], [],
+                      [AC_MSG_ERROR([no suitable C99 compiler found])])
 AS_IF([test "$CC" = icc],
       [AC_MSG_ERROR([icc not supported (see PR @%:@481)])])
 
 
 ### ch-run required ##########################################################
 
-# Only ch-run needs any kind of interesting library stuff; this variable holds
-# the library arguments we need. This also requires us to use AC_CHECK_LIB
-# instead of the (recommended by docs) AC_SEARCH_LIBS, because that adds
-# things to LIBS, which we don’t want because it’s applied to all executables.
-CH_RUN_LIBS=
-
-# asprintf(3)
-#
-# You can do this with AC_CHECK_FUNC or AC_CHECK_FUNCS, but those macros call
-# the function with no arguments. This causes a warning for asprintf() for
-# some compilers (and I have no clue why others accept it); see issue #798.
-# Instead, try to build a small test program that calls asprintf() correctly.
-AC_MSG_CHECKING([for asprintf in libc])
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-    #define _GNU_SOURCE
-    #include <stdio.h>
-    #include <stdlib.h>
-
-    int main(void)
-    {
-       char *p;
-       if (asprintf(&p, "WEIRD AL YANKOVIC\n") >= 0)
-          free(p);
-       return 0;
-    }
-  ]])],
-  [AC_MSG_RESULT([yes])],
-  [AC_MSG_RESULT([no])
-   AC_MSG_ERROR([asprintf(3) not found; please report this bug])])
+# Note: We link both ch-run and ch-checkns with all the shared libraries,
+# despite the latter using much less, depending on the compiler to omit
+# libraries that aren’t actually used (gcc does this) or just not caring that
+# extra libraries are linked.
 
 # argp_parse(3), which is included with glibc but not other libc’s, e.g. musl.
-AC_MSG_CHECKING([for argp_parse in libc])
-AC_LINK_IFELSE([AC_LANG_SOURCE([[
-    #include <argp.h>
-
-    int main(void)
-    {
-        argp_parse(0, 1, NULL, 0, 0, 0);
-        return 0;
-    }
-  ]])],
-  [AC_MSG_RESULT([yes])],  # built-in, no further action
-  [AC_MSG_RESULT([no])     # try external libargp
-   AC_CHECK_LIB(
-     [argp], [argp_parse],
-     [CH_RUN_LIBS="-largp $CH_RUN_LIBS"],
-     [AC_MSG_ERROR([argp_parse(3) not found; please report this bug])])])
+# In the latter case, we need an external libargp.
+AC_SEARCH_LIBS(argp_parse, argp, [],
+  [AC_MSG_ERROR([argp_parse(3) not found; please report this bug])])
 
 # pthreads; needed for “ch-run --join”.
 AX_PTHREAD
 
-# POSIX IPC lives in librt.
-AC_CHECK_LIB([rt], [shm_open], [CH_RUN_LIBS="-lrt $CH_RUN_LIBS"], [
-  AC_MSG_ERROR([shm_open(3) not found])
-])
+# POSIX IPC sometimes lives in librt.
+AC_SEARCH_LIBS(shm_open, rt, [],
+  [AC_MSG_ERROR([shm_open(3) not found; please report this bug])])
 
 # User namespaces
 AC_MSG_CHECKING([if in chroot])  # https://unix.stackexchange.com/a/14346
@@ -339,6 +375,9 @@ AC_RUN_IFELSE([AC_LANG_SOURCE([[
   [AC_MSG_ERROR([cross-compilation not supported])])
 AC_MSG_RESULT($have_userns)
 
+
+### ch-run optional ##########################################################
+
 # overlayfs
 AC_DEFUN([CH_OVERLAY_C], [[
   #define _GNU_SOURCE
@@ -421,9 +460,6 @@ AS_IF([test $enable_impolite_checks = yes],
                      [AC_MSG_ERROR([cross-compilation not supported])])])
 AC_MSG_RESULT($have_tmpfs_xattrs)
 
-
-### ch-run optional ##########################################################
-
 # FNM_EXTMATCH is a GNU extension to support extended globs in fnmatch(3).
 AC_CHECK_DECL(FNM_EXTMATCH,
               [have_fnm_extmatch=yes],
@@ -431,6 +467,54 @@ AC_CHECK_DECL(FNM_EXTMATCH,
               [[#define _GNU_SOURCE
                 #include <fnmatch.h>]])
 
+# libgc. Note that we don’t try to ensure the header we find matches the
+# library we find. Hopefully that’s not a problem.
+AS_IF([test $want_gc = yes], [
+  AC_SEARCH_LIBS(GC_malloc, gc,
+                 [have_libgc=yes
+                  AS_IF([test -n "$lib_gc"],
+                        [ch_ldflags="-Wl,-rpath=$lib_gc $ch_ldflags"])],
+                 [have_libgc=no])
+  AC_CHECK_HEADER([gc.h],
+                  [have_gc_h=yes],
+                  [have_gc_h=no])
+], [have_libgc=no
+    have_gc_h=no])
+# Error out if needed but not found.
+AS_IF([test $have_libgc = yes && test $have_gc_h = yes],
+      [have_gc=yes],
+      [have_gc=no])
+AS_IF([test $need_gc = yes && test $have_gc = no],
+      [AC_MSG_ERROR([--with-gc=yes but libgc not found])])
+
+# cJSON. Also do not check this header matches the library we find.
+AS_IF([test $want_json = yes], [
+  AC_SEARCH_LIBS(cJSON_ParseWithLength, cjson,
+                 [have_libcjson=yes
+                  AS_IF([test -n "$lib_json"],
+                        [ch_ldflags="-Wl,-rpath=$lib_json $ch_ldflags"])],
+                 [have_libcjson=no])
+  # The include file installs by default to “$PREFIX/include/cjson/cJSON.h”,
+  # but --with-json-include shouldn’t require a “cjson” subdirectory and it
+  # seemed impossible to document that concisely anyway. Thereforre, try both
+  # and define a macro. Double quotes support bundling it with Charliecloud.
+  AC_CHECK_HEADER([cJSON.h],
+                  [have_cjson_h=yes
+                   cjson_h='"cJSON.h"'],
+                  [AC_CHECK_HEADER([cjson/cJSON.h],
+                                   [have_cjson_h=yes
+                                    cjson_h='"cjson/cJSON.h"'],
+                                   [cjson_h='not found'
+                                    have_cjson_h=no])])
+], [have_libcjson=no
+    have_cjson_h=no])
+# Error out if needed but not found.
+AS_IF([test $have_libcjson = yes && test $have_cjson_h = yes],
+      [have_json=yes],
+      [have_json=no])
+AS_IF([test $need_json = yes && test $have_json = no],
+      [AC_MSG_ERROR([--with-json=yes but cJSON not found])])
+
 # Should we build seccomp?
 AC_MSG_CHECKING([for seccomp filter support])
 AC_RUN_IFELSE([AC_LANG_SOURCE([[
@@ -501,18 +585,30 @@ AS_IF([test $want_libsquashfuse = yes], [
         [AC_MSG_ERROR([need pkg-config to find libfuse3; try --with-libsquashfuse=no or see issue @%:@1844])])
   AS_IF([pkg-config --exists fuse3], [
     have_libfuse3=yes
-    CFLAGS="$CFLAGS $(pkg-config --cflags fuse3)"
+    ch_cflags="$ch_cflags $(pkg-config --cflags fuse3)"
+    # add -lfuse3 to LIBS (we already know it is available)
+    AC_SEARCH_LIBS(fuse_session_new, fuse3, [],
+      [AC_MSG_ERROR([libfuse3 found but not found; please report this bug])])
     # libsquashfuse?
-    AC_CHECK_LIB([squashfuse_ll], [sqfs_ll_mount],
-      [have_libsquashfuse_ll=yes],
-      [have_libsquashfuse_ll=no])
-    # ll.h?
+    AC_SEARCH_LIBS(sqfs_ll_mount, squashfuse_ll,
+                   [have_libsquashfuse_ll=yes],
+                   [have_libsquashfuse_ll=no])
+    # ll.h? This check is hairy because AC_CHECK_HEADERS tries to actually
+    # compile a program that includes the header, but that won’t work for ll.h
+    # without the -I for fuse3 we got from pkg-config. We’re also advised not
+    # to change $CFLAGS within configure.ac [1]. I couldn’t figure out a way
+    # to get the -I into AC_CHECK_HEADER without changing $CFLAGS, so I just
+    # put it back to follow the advice as best I could.
+    # [1]: https://www.gnu.org/software/autoconf/manual/autoconf-2.65/html_node/Preset-Output-Variables.html
+    cflags_old=$CFLAGS
+    CFLAGS="$ch_cflags $CFLAGS"
     AC_CHECK_HEADER([squashfuse/ll.h],
       [have_ll_h=yes],
       [have_ll_h=no],
       [#define SQFS_CONFIG_H
        #define FUSE_USE_VERSION 32
-      ])  # see comment in ch_fuse.c regarding these defines
+      ])  # see comment in fuse.c regarding these defines
+    CFLAGS=$cflags_old
   ],
   [have_libfuse3=no])
 ])
@@ -524,9 +620,7 @@ AS_IF([   test $want_libsquashfuse = yes \
        && test $have_ll_h = yes],
       [have_libsquashfuse=yes
        AS_IF([test -n "$lib_libsquashfuse"],
-             [rpath_libsquashfuse=-Wl,-rpath=$lib_libsquashfuse],
-             [rpath_libsquashfuse=])
-       CH_RUN_LIBS="-lsquashfuse_ll -lfuse3 $rpath_libsquashfuse $CH_RUN_LIBS"],
+             [ch_ldflags="-Wl,-rpath=$lib_libsquashfuse $ch_ldflags"])],
       [have_libsquashfuse=no])
 AS_IF([   test $need_libsquashfuse = yes \
        && test $have_libsquashfuse = no],
@@ -764,14 +858,13 @@ CH_CHECK_VERSION([WGET], [$vmin_wget], [--version | head -1 | cut -d' ' -f3])
 #      the output Makefile. It *does not* create a Make variable.
 #
 #   4. AC_DEFINE(foo, value, comment) #define’s the preprocessor symbol foo to
-#      value in config.h. (Supposedly value and comment are optional but I got
-#      warnings doing that.) So this is how you make configure values
-#      available in C code (as macros, not variables). Typically you would
-#      define something or not (allowing #ifdef), rather than always define to
-#      true or false (which would require #if).
+#      value in config.h. (Supposedly, value and comment are optional but I
+#      got warnings doing that.) Importantly, value is not expanded. This is
+#      good for either defining or not defining a C macro; you can then use
+#      #ifdef to gate on that macro.
 #
-#   5. AC_DEFINE_UNQUOTES adds some extra transformations to the above. I
-#      didn’t quite follow.
+#   5. AC_DEFINE_UNQUOTED also expands value. This is good for defining a C
+#      macro to the actual value of some configure variable.
 #
 # Below are all the variables we want available outside configure.
 
@@ -783,21 +876,29 @@ AS_IF([test $enable_syslog = yes],
       [AC_DEFINE([ENABLE_SYSLOG], [1], [log to syslog])])
 AM_CONDITIONAL([ENABLE_TEST], [test $enable_test = yes])
 
-AC_SUBST([CH_RUN_LIBS])
-AC_SUBST([PYTHON_SHEBANG])
-AC_SUBST([SPHINX])
+AC_SUBST(AM_CFLAGS, [$ch_cflags])
+AC_SUBST(AM_LDFLAGS, [$ch_ldflags])
+AC_SUBST(PYTHON_SHEBANG)
+AC_SUBST(SPHINX)
 
-AS_IF([test $have_overlayfs = yes],
-      [AC_DEFINE([HAVE_OVERLAYFS], [1], [unprivileged overlayfs])])
-AS_IF([test $have_tmpfs_xattrs = yes],
-      [AC_DEFINE([HAVE_TMPFS_XATTRS], [1], [tmpfs user xattrs])])
 AS_IF([test $have_fnm_extmatch = yes],
       [AC_DEFINE([HAVE_FNM_EXTMATCH], [1], [extended globs supported])])
-AS_IF([test $have_seccomp = yes],
-      [AC_DEFINE([HAVE_SECCOMP], [1], [seccomp supported])])
+AS_IF([test $have_gc = yes],
+      [AC_DEFINE([HAVE_GC], [1], [enable garbage collection])])
+AM_CONDITIONAL([HAVE_JSON], [test $have_json = yes])
+AS_IF([test $have_json = yes],
+      [AC_DEFINE([HAVE_JSON], [1], [enable JSON features])
+       AC_DEFINE_UNQUOTED([CJSON_H], [$cjson_h], [cJSON.h location])])
 AM_CONDITIONAL([HAVE_LIBSQUASHFUSE], [test $have_libsquashfuse = yes])
 AS_IF([test $have_libsquashfuse = yes],
       [AC_DEFINE([HAVE_LIBSQUASHFUSE], [1], [link with libsquashfuse])])
+AS_IF([test $have_overlayfs = yes],
+      [AC_DEFINE([HAVE_OVERLAYFS], [1], [unprivileged overlayfs])])
+AS_IF([test $have_seccomp = yes],
+      [AC_DEFINE([HAVE_SECCOMP], [1], [seccomp supported])])
+AM_CONDITIONAL([HAVE_SECCOMP], [test $have_seccomp = yes])
+AS_IF([test $have_tmpfs_xattrs = yes],
+      [AC_DEFINE([HAVE_TMPFS_XATTRS], [1], [tmpfs user xattrs])])
 
 
 
@@ -811,6 +912,20 @@ AS_IF([   test $have_userns = yes],
       [have_ch_run=yes],
       [have_ch_run=no])
 
+AS_IF([   test $want_gc = yes],
+      [note_libgc=$have_libgc
+       note_gc_h=$have_gc_h],
+      [note_libgc='not tested'
+       note_gc_h='not tested'])
+
+AS_IF([   test $want_json = yes],
+      [note_libcjson=$have_libcjson
+       AS_IF([test $have_cjson_h = yes],
+             [note_cjson_h="yes, $cjson_h"],
+             [note_cjson_h=no])],
+      [note_libcjson='not tested'
+       note_cjson_h='not tested'])
+
 # image builders
 
 AS_IF([   test $enable_ch_image = yes \
@@ -946,10 +1061,22 @@ Building Charliecloud
     test suite ... ${enable_test}
 
   required:
-    C99 compiler ... ${CC} ${CFLAGS}
-
-  optional:
-    extended glob patterns in --unset-env ... ${have_fnm_extmatch}
+    C99 compiler ... ${CC}
+    \$CFLAGS ... ${ch_cflags}
+    \$LDFLAGS ... ${ch_ldflags}
+    library args ... ${LIBS}
+
+  extended glob patterns in --unset-env: ${have_fnm_extmatch}
+
+  garbage collection: ${have_gc}
+    enabled ... ${want_gc}
+    libgc ... ${note_libgc}
+    gc.h ... ${note_gc_h}
+
+  JSON features: ${have_json}
+    enabled ... ${want_json}
+    libcjson ... ${note_libcjson}
+    cJSON.h ... ${note_cjson_h}
 
   ch-run(1) internal SquashFS mounting: ${have_libsquashfuse}
     enabled ... ${want_libsquashfuse}
diff --git a/doc/cdi-nvidia.json b/doc/cdi-nvidia.json
new file mode 100644
index 000000000..0bc419d09
--- /dev/null
+++ b/doc/cdi-nvidia.json
@@ -0,0 +1,36 @@
+{
+  "cdiVersion": "0.5.0",
+  "kind": "nvidia.com/gpu",
+  "devices": [ {
+      "name": "foo",
+      "containerEdits": {
+        "deviceNodes": [ { "path": "/dev/nvidia0" },
+                         { "path": "/dev/dri/card0" } ],
+        "hooks": [ { "hookName": "createContainer",
+                     "path": "/usr/bin/nvidia-ctk",
+                     "args": [ "nvidia-ctk",
+                               "hook", "create-symlinks",
+                               "--link", "../card0::/dev/dri/by-path/pci-0000:07:00.0-card",
+                             ] } ] } } ] }
+  "containerEdits": {
+    "env": [ "NVIDIA_VISIBLE_DEVICES=void" ],
+    "deviceNodes": [ { "path": "/dev/nvidia-modeset" },
+                     { "path": "/dev/nvidiactl" } ],
+    "mounts": [
+      { "hostPath": "/run/nvidia-fabricmanager/socket",
+        "containerPath": "/run/nvidia-fabricmanager/socket",
+        "options": [ "ro", "nosuid", "nodev", "bind", "noexec" ] },
+      { "hostPath": "/usr/bin/nvidia-smi",
+        "containerPath": "/usr/bin/nvidia-smi",
+        "options": [ "ro", "nosuid", "nodev", "bind" ] },
+      { "hostPath": "/usr/lib/x86_64-linux-gnu/libcuda.so.535.161.08",
+        "containerPath": "/usr/lib/x86_64-linux-gnu/libcuda.so.535.161.08",
+        "options": [ "ro", "nosuid", "nodev", "bind" ] } ]
+    "hooks": [
+      { "hookName": "createContainer",
+        "path": "/usr/bin/nvidia-ctk",
+        "args": [
+          "nvidia-ctk",
+          "hook", "update-ldcache",
+          "--folder", "/usr/lib/x86_64-linux-gnu" ] } ] }
+}
diff --git a/doc/ch-run.rst b/doc/ch-run.rst
index 2771078e4..8622a3b4e 100644
--- a/doc/ch-run.rst
+++ b/doc/ch-run.rst
@@ -56,6 +56,40 @@ mounting SquashFS images with FUSE.
   :code:`-c`, :code:`--cd=DIR`
     Initial working directory in container.
 
+  :code:`--cdi-dirs=PATHS`
+    Colon-separated list of directories to search for CDI JSON specifications.
+    Default: :code:`CH_RUN_CDI_DIRS` if set, otherwise
+    :code:`/etc/cdi:/var/run/cdi`.
+
+  :code:`--color[=WHEN]`
+    Color logging output by log level when :code:`WHEN`:
+
+       * By default, or if :code:`WHEN` is :code:`auto`, :code:`tty`,
+         :code:`if-tty`: use color if standard error is a TTY; otherwise,
+         don’t use color.
+
+       * If :code:`WHEN` is :code:`yes`, :code:`always`, or :code:`force`; or
+         if :code:`--color` is specified without an argument: always use
+         color.
+
+       * If :code:`WHEN` is :code:`no`, :code:`never`, or :code:`none`: never
+         use color.
+
+    This uses ANSI color codes without checking any terminal databases, which
+    should work on all modern terminals.
+
+  :code:`-d`, :code:`--devices`
+    Inject all CDI devices for which a specification is found. Implies
+    :code:`--write-fake`.
+
+  :code:`--device=DEV`
+    Inject CDI device :code:`DEV`, either (1) a filename, if it starts with a
+    slash (:code:`/`) or dot (:code:`.`), e.g. :code:`/etc/cdi/nvidia.json`,
+    or (2) a CDI selector for a list of devices in a CDI specification file,
+    e.g. :code:`nvidia.com/gpu`. Specific devices may not be selected, e.g.
+    :code:`nvidia.com/gpu=1:0` is invalid (see below for why). Implies
+    :code:`--write-fake`. Can be repeated.
+
   :code:`--env-no-expand`
     Don’t expand variables when using :code:`--set-env`.
 
@@ -118,11 +152,6 @@ mounting SquashFS images with FUSE.
     This is intended for use by :code:`ch-image(1)` when building images; see
     that man page for a detailed discussion.
 
-  :code:`-t`, :code:`--private-tmp`
-    By default, the host’s :code:`/tmp` (or :code:`$TMPDIR` if set) is
-    bind-mounted at container :code:`/tmp`. If this is specified, a new
-    :code:`tmpfs` is mounted on the container’s :code:`/tmp` instead.
-
   :code:`--set-env`, :code:`--set-env=FILE`, :code:`--set-env=VAR=VALUE`
     Set environment variables with newline-separated file
     (:code:`/ch/environment` within the image if not specified) or on the
@@ -131,6 +160,11 @@ mounting SquashFS images with FUSE.
   :code:`--set-env0`, :code:`--set-env0=FILE`, :code:`--set-env0=VAR=VALUE`
     Like :code:`--set-env`, but file is null-byte separated.
 
+  :code:`-t`, :code:`--private-tmp`
+    By default, the host’s :code:`/tmp` (or :code:`$TMPDIR` if set) is
+    bind-mounted at container :code:`/tmp`. If this is specified, a new
+    :code:`tmpfs` is mounted on the container’s :code:`/tmp` instead.
+
   :code:`-u`, :code:`--uid=UID`
     Run as user :code:`UID` within container.
 
@@ -345,6 +379,7 @@ Caveats:
 * Many of the arguments given to the race losers, such as the image path and
   :code:`--bind`, will be ignored in favor of what was given to the winner.
 
+
 .. _ch-run_overlay:
 
 Writeable overlay with :code:`--write-fake`
@@ -375,39 +410,221 @@ requires kernel support. Specifically:
    and thus is not helpful for unprivileged containers.)
 
 
+Injecting host “devices” with Container Device Interface (CDI)
+==============================================================
+
+Overview of CDI
+---------------
+
+`Container Device Interface (CDI)
+<https://github.com/cncf-tags/container-device-interface/blob/main/SPEC.md>`_
+is an emerging `Cloud Native Computing Foundation (CNCF)
+<https://www.cncf.io/>`_ standard to specify how “devices” are made available
+to containers. Importantly, a CDI *device* is not a hardware gadget nor a
+device file but rather a set of container modifications to be done before
+invoking the user command. It’s intended to make devices (in the usual sense
+of hardware gadgets) available inside containers but is quite flexible. A CDI
+device can specify multiple device files, environment variables, mounts, and
+more. Christopher Desiniotis gave a good talk at Container Plumbing Days 2024
+introducing CDI (`slides
+<https://static.sched.com/hosted_files/containerplumbingdays2024/e0/CDI_%20The%20Future%20of%20Specialized%20Hardware%20in%20Containers.pdf>`_,
+`video <https://www.youtube.com/watch?v=MbWjw6AMMVs>`_).
+
+CDI devices are described in JSON *specification files*, which are declarative
+except they provide for arbitrary hook programs. However, Charliecloud treats
+them as fully declarative by interpreting hooks as a declarative statement
+rather than a program to be run (brittle, but works for now). This
+declarativeness has a significant advantage over OCI hooks, because we have a
+clear description of what needs to be done rather than needing to run opaque
+programs as hooks.
+
+Another advantage of CDI is that it’s largely orthogonal to OCI. While the
+specifications have a strong OCI framing, this is largely an artifact of the
+exposition style rather than a core notion.
+
+Here is an example spec file:
+
+.. literalinclude:: cdi-nvidia.json
+   :language: JSON
+
+This declares:
+
+#. A single CDI device called :code:`nvidia.com/gpu=foo`, comprising:
+
+   #. Two device files to be made available in the container,
+      :code:`/dev/nvidia0` and :code:`/dev/dri/card0`.
+
+   #. One symlink to create inside the container,
+      :code:`/dev/by-path/pci-0000:07:00.0-card` → :code:`../card0`.
+
+#. A set of container changes to be made once regardless of which devices are
+   selected (this example has one, but real spec files have several),
+   comprising:
+
+   #. One environment variable to set, :code:`NVIDIA_VISIBLE_DEVICES`.
+
+   #. Two device files to be made available in the container,
+      :code:`/dev/nvidia-modeset` and :code:`/dev/nvidiactl`.
+
+   #. A socket (:code:`/run/nvidia-fabricmanager/socket`), executable
+      (:code:`nvidia-smi`), and shared library
+      (:code:`libcuda.so.535.161.08`) to be bind-mounted into the
+      container.
+
+   #. Run the *host* :code:`ldconfig` to update the *container* linker cache,
+      scanning only container directory :code:`/usr/lib/x86_64-linux-gnu`.
+
+Charliecloud’s CDI implementation
+---------------------------------
+
+Charliecloud has some differences from other container implementations in how
+this spec file is interpreted, but the results (working CDI devices) should be
+the same. These are:
+
+#. All CDI devices available to the user normally are also available in the
+   container. For example, some implementations allow
+   :code:`--device=nvidia.com/gpu=foo`, which puts only the GPU named
+   :code:`foo` in the container, but :code:`ch-run` accepts only
+   :code:`--device=nvidia.com/gpu` (and similarly in
+   :code:`CH_RUN_CDI_DEFAULT`). This is because the host :code:`/dev` is
+   bind-mounted into Charliecloud containers, so there is no need to deal with
+   individual device files.
+
+#. Hooks are interpreted declaratively rather than running the specified
+   program. This is because we have not yet encountered any hooks that are
+   both useful under Charliecloud and do a task that merits an external
+   program. See below for details on individual hooks.
+
+#. Only bind mounts are implemented, because unprivileged mount namespaces
+   can’t mount much that is meaningful, and we haven’t seen any other mount
+   types yet.
+
+#. Charliecloud minimizes the number of bind mounts to avoid bloating the
+   container filesystem tree. (The spec file for one of our not-that-large
+   systems declares 47 mounts!) We do this by bind-mounting each filesystem
+   represented in a host path once and then symlinking into it for the
+   declared bind mounts.
+
+Selecting devices
+-----------------
+
+:code:`ch-run` must do two things to make CDI devices available: (1) locate
+appropriate specification files and (2) select which kinds of CDI devices to
+inject. We assume further that the most common use case is to inject all
+available CDI devices. The design of Charliecloud’s CDI user interface follows
+from these principles.
+
+TL;DR: The intended most common usage is simply :code:`ch-run -d` to inject
+all available CDI devices, using prior configuration by users or admins.
+
+Available spec files are those in the colon-separated list of directories in
+:code:`--cdi-dirs=DIRS` if given, otherwise in :code:`CH_RUN_CDI_DIRS`,
+otherwise :code:`/etc/cdi:/var/run/cdi` as required by the standard.
+
+The option :code:`--devices` (plural) or :code:`-d` then injects all devices
+found in all spec files in these directories.
+
+Individual CDI device kinds can be selected with :code:`--device=DEV`
+(singular), where :code:`DEV` is a device identifier. If it identifier starts
+with slash (:code:`/`) or dot (:code:`.`), the identifier is a path to a JSON
+CDI spec file, and all devices in that file are injected (e.g.,
+:code:`--device=./foo.json`). Otherwise, it is a CDI device kind with no
+device name(s) (e.g., :code:`--device=nvidia.com/gpu`). The option can be
+repeated to inject multiple device kinds.
+
+Importantly, both :code:`--device` and :code:`--devices` imply
+:code:`--write-fake` (:code:`-W`) so the container image can be written.
+
 Environment variables
-=====================
+---------------------
 
-:code:`ch-run` leaves environment variables unchanged, i.e. the host
-environment is passed through unaltered, except:
+Injecting a CDI device may require setting environment variables, as declared
+in the spec file. These environment changes are executed in the order that
+that CDI command line options appear on the command line relative to other
+user-specified environment options, e.g. :code:`--set-env` and
+:code:`--unset-env`. See :ref:`ch-run_environment-variables` below for
+details.
 
-* by default (:code:`--home` not specified), :code:`HOME` is set to
-  :code:`/root`, if it exists, and :code:`/` otherwise.
-* limited tweaks to avoid significant guest breakage;
-* user-set variables via :code:`--set-env`;
-* user-unset variables via :code:`--unset-env`; and
-* set :code:`CH_RUNNING`.
+Hooks
+------
 
-This section describes these features.
+Behavior summary
+~~~~~~~~~~~~~~~~
 
-The default tweaks happen first, then :code:`--set-env` and
-:code:`--unset-env` in the order specified on the command line, and then
-:code:`CH_RUNNING`. The two options can be repeated arbitrarily many times,
-e.g. to add/remove multiple variable sets or add only some variables in a
-file.
+Presently, CDI hooks fall into three categories for Charliecloud:
 
-Default behavior
-----------------
+#. **Known hooks that we need**, with behavior emulated internally (i.e, we do
+   what the hook does, adapted for Charliecloud, rather than running it).
 
-By default, :code:`ch-run` makes the following environment variable changes:
+#. **Known hooks that we don’t need**; we ignore these quietly (i.e., logged but
+   a level hidden by default).
+
+#. **Unknown hooks.** We warn about these, because they need to be either moved
+   into one of the first to categories or actually run. (That is, we’re still
+   figuring out what’s needed for Charliecloud here.)
+
+The next two sections document known hooks.
+
+.. note::
+
+   `nVidia Container Toolkit
+   <https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html>`_
+   CDI hooks can be spelled either `either
+   <https://github.com/NVIDIA/nvidia-container-toolkit/issues/435>`_
+   :code:`nvidia-ctk hook` (two words) or :code:`nvidia-ctk-hook` (one word).
+   We treat the two spellings the same.
+
+Emulated hooks
+~~~~~~~~~~~~~~
+
+#. :code:`nvidia-ctk-hook update-ldcache` . This updates the container’s
+   linker cache (i.e., :code:`/etc/ld.so.cache`), `notably using
+   <https://github.com/cncf-tags/container-device-interface/issues/203#issuecomment-2117628618>`_
+   the *host’s* :code:`ldconfig`. For now at least, we instead use the
+   *container’s* :code:`ldconfig`, the reasoning being that (1) the
+   container’s linker updating its own cache is lower-risk compatibility wise
+   and (2) it seems unlikely that an image would be compatible with nVidia
+   libraries and have a linker cache but no :code:`ldconfig` executable.
+
+   If the image has no :code:`ldconfig`, :code:`ch-run` exits with an error
+   and the container does not run. This indicates the assumption above is
+   false, so please report this error as a bug.
+
+Ignored hooks
+~~~~~~~~~~~~~
+
+#. :code:`nvidia-ctk-hook create-symlinks`. This creates one or more symlinks.
+   In our experience, the links created already exist in the host’s
+   :code:`/dev` or are created by :code:`ldconfig(8)`.
+
+#. :code:`nvidia-ctk-hook chmod`. This changes file permissions, but in
+   unprivileged Charliecloud containers, the invoking user will already have
+   access to all appropriate files.
 
-:code:`$CH_RUNNING`
-  Set to :code:`Weird Al Yankovic`. While a process can figure out that it’s
-  in an unprivileged container and what namespaces are active without this
-  hint, that can be messy, and there is no way to tell that it’s a
-  *Charliecloud* container specifically. This variable makes such a test
-  simple and well-defined. (**Note:** This variable is unaffected by
-  :code:`--unset-env`.)
+
+.. _ch-run_environment-variables:
+
+Environment variables
+=====================
+
+Unlike most other implementations, :code:`ch-run`’s baseline for the container
+environment is to pass through the host environment unaltered. From this
+starting point, the environment is altered in this order:
+
+#. :code:`$HOME`, :code:`$PATH`, and :code:`$TMPDIR` are adjusted to avoid
+   common breakage (see below).
+
+#. User-specified changes are executed in the order they appear on the command
+   line (i.e., :code:`-d`/:code:`--devices`, :code:`--device`,
+   :code:`--set-env`, and :code:`--unset-env`, some of which can appear
+   multiple times).
+
+#. :code:`$CH_RUNNING` is set.
+
+Built-in environment changes
+----------------------------
+
+Prior to user changes, i.e. can be altered by the user:
 
 :code:`$HOME`
   If :code:`--home` is specified, then your home directory is bind-mounted
@@ -418,13 +635,12 @@ By default, :code:`ch-run` makes the following environment variable changes:
   is unchanged.)
 
 :code:`$PATH`
-  Newer Linux distributions replace some root-level directories, such as
-  :code:`/bin`, with symlinks to their counterparts in :code:`/usr`.
-
-  Some of these distributions (e.g., Fedora 24) have also dropped :code:`/bin`
-  from the default :code:`$PATH`. This is a problem when the guest OS does
-  *not* have a merged :code:`/usr` (e.g., Debian 8 “Jessie”). Thus, we add
-  :code:`/bin` to :code:`$PATH` if it’s not already present.
+  We append :code:`/bin` to :code:`$PATH` if it’s not already present. This is
+  because newer Linux distributions replace some root-level directories, such
+  as :code:`/bin`, with symlinks to their counterparts in :code:`/usr`. Some
+  of these distributions (e.g., Fedora 24) have also dropped :code:`/bin` from
+  the default :code:`$PATH`. This is a problem when the guest OS does *not*
+  have a merged :code:`/usr` (e.g., Debian 8 “Jessie”).
 
   Further reading:
 
@@ -437,6 +653,15 @@ By default, :code:`ch-run` makes the following environment variable changes:
   made available in the guest at :code:`/tmp` unless :code:`--private-tmp` is
   given.
 
+After user changes, i.e. cannot be altered by the user with :code:`ch-run`:
+
+:code:`$CH_RUNNING`
+  Set to :code:`Weird Al Yankovic`. While a process can figure out that it’s
+  in an unprivileged container and what namespaces are active without this
+  hint, that can be messy, and there is no way to tell that it’s a
+  *Charliecloud* container specifically. This variable makes such a test
+  simple and well-defined.
+
 Setting variables with :code:`--set-env` or :code:`--set-env0`
 --------------------------------------------------------------
 
@@ -760,4 +985,6 @@ status is 1 regardless of the signal value.
 .. include:: ./see_also.rst
 
 ..  LocalWords:  mtune NEWROOT hugetlbfs UsrMerge fusermount mybox IMG HOSTPATH
-..  LocalWords:  noprofile norc SHLVL PWD kernelnewbies extglob
+..  LocalWords:  noprofile norc SHLVL PWD kernelnewbies extglob cdi AMMVs dri
+..  LocalWords:  Desiniotis declarativeness fabricmanager libglxserver ctk
+..  LocalWords:  libcuda ldcache
diff --git a/doc/dev.rst b/doc/dev.rst
index 4a3f53284..4c5f85236 100644
--- a/doc/dev.rst
+++ b/doc/dev.rst
@@ -941,6 +941,64 @@ characters.
 C code
 ------
 
+Memory management
+~~~~~~~~~~~~~~~~~
+
+*TL;DR:* Charliecloud does not free any memory. You can enable garbage
+collection with :code:`libgc` if you want, and this is the default, but it may
+not be necessary, i.e. simply leaking all allocated memory could still be
+smaller than the overhead of trying to clean up.
+
+*How-To:* (1) Use Charliecloud wrappers for all library functions that
+allocate memory, e.g. :code:`ch_malloc()` instead of :code:`malloc(3)`.
+Importantly, this includes things like :code:`strdup(3)` and
+:code:`asprintf(3)`. (2) Don’t call :code:`free(3)` or any other library
+functions that free memory.
+
+:code:`ch-run.c` has, since `very nearly the beginning
+<https://github.com/hpc/charliecloud/commit/b65e7c1>`_, carried the notice
+that it “does not bother to free memory allocations, since they are modest and
+the program is short-lived”. Explicit memory management is difficult and
+time-consuming, and it didn’t seem worth the effort.
+
+Eventually, we grew a `long-running process
+<https://github.com/hpc/charliecloud/releases/tag/v0.26>`_ to serve a
+SquashFUSE filesystem, and the short-lived justification became obsolete. The
+rough goal became: convert to proper memory management, freeing everything
+that we allocated. Various :code:`free(3)` crept in here and there, but a full
+refactor was never a priority.
+
+Then `PR #1919 <https://github.com/hpc/charliecloud/pull/1902>`_ came to be
+and grew in scope until it was a significant refactor. We tried to Do It Right
+on memory management everywhere this PR touched, and we did, until Reid got
+fed up writing comments about whose problem it was to free this or that and
+copying data simply so those comments could be tractable.
+
+So now we’re back full circle. Memory management is not worth Charliecloud
+developers’ time. We gleefully :code:`malloc(3)` and :code:`realloc(3)`
+without a care in the world, sinning every time. But now you have options. You
+can either:
+
+1. YOLO, i.e. simply never free anything, i.e. leak like a sieve. But
+   Charliecloud is still a small program and it’s unlikely to be an actual
+   problem. Our quick-and-dirty tests with a small “hello world” Alpine image
+   running :code:`true(1)` show a main :code:`ch-run` process using 350 KiB
+   just before it executes the user program, and the SquashFUSE process the
+   same just before forking and 1,600 KiB upon exit.
+
+2. Link with :code:`libgc`, i.e. the `Boehm-Demers-Weiser
+   <https://hboehm.info/gc/>`_ conservative garbage collector. The idea is
+   that garbage collection scans the stack, heap, and other pointer sources
+   for integers that *look* like pointers and assumes they *are* pointers.
+   Apparently it `works quite well <https://hboehm.info/gc/issues.html>`_ and
+   can even be faster than explicit memory management in some cases. The
+   quick-and-dirty tests show 900 KiB by the main process, and the SquashFUSE
+   process the same just before forking (after an explicit garbage collection)
+   and 2,200 KiB upon exit.
+
+:code:`ch-run` logs memory usage to syslog, and also stderr with :code:`-vv`,
+so you can analyze your specific situation.
+
 :code:`const`
 ~~~~~~~~~~~~~
 
@@ -1054,19 +1112,19 @@ computed, but it’s all in raw hex and hard to interpret, e.g.::
 
   $ ch-run --seccomp -vv alpine:3.17 -- true
   [...]
-  ch-run[62763]: seccomp: arch c00000b7: found 13 syscalls (ch_core.c:582)
-  ch-run[62763]: seccomp: arch 40000028: found 27 syscalls (ch_core.c:582)
+  ch-run[62763]: seccomp: arch c00000b7: found 13 syscalls (core.c:582)
+  ch-run[62763]: seccomp: arch 40000028: found 27 syscalls (core.c:582)
   [...]
-  ch-run[62763]: seccomp(2) program has 156 instructions (ch_core.c:591)
-  ch-run[62763]:    0: { op=20 k=       4 jt=  0 jf=  0 } (ch_core.c:423)
-  ch-run[62763]:    1: { op=15 k=c00000b7 jt=  0 jf= 17 } (ch_core.c:423)
-  ch-run[62763]:    2: { op=20 k=       0 jt=  0 jf=  0 } (ch_core.c:423)
-  ch-run[62763]:    3: { op=15 k=      5b jt=145 jf=  0 } (ch_core.c:423)
+  ch-run[62763]: seccomp(2) program has 156 instructions (core.c:591)
+  ch-run[62763]:    0: { op=20 k=       4 jt=  0 jf=  0 } (core.c:423)
+  ch-run[62763]:    1: { op=15 k=c00000b7 jt=  0 jf= 17 } (core.c:423)
+  ch-run[62763]:    2: { op=20 k=       0 jt=  0 jf=  0 } (core.c:423)
+  ch-run[62763]:    3: { op=15 k=      5b jt=145 jf=  0 } (core.c:423)
   [...]
-  ch-run[62763]:  154: { op= 6 k=7fff0000 jt=  0 jf=  0 } (ch_core.c:423)
-  ch-run[62763]:  155: { op= 6 k=   50000 jt=  0 jf=  0 } (ch_core.c:423)
-  ch-run[62763]: note: see FAQ to disassemble the above (ch_core.c:676)
-  ch-run[62763]: executing: true (ch_core.c:538)
+  ch-run[62763]:  154: { op= 6 k=7fff0000 jt=  0 jf=  0 } (core.c:423)
+  ch-run[62763]:  155: { op= 6 k=   50000 jt=  0 jf=  0 } (core.c:423)
+  ch-run[62763]: note: see FAQ to disassemble the above (core.c:676)
+  ch-run[62763]: executing: true (core.c:538)
 
 You can instead use `seccomp-tools
 <https://github.com/david942j/seccomp-tools>`_ to disassemble and pretty-print
diff --git a/doc/faq.rst b/doc/faq.rst
index 83ba73e8e..c30c29101 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -201,7 +201,7 @@ handling.
 For example::
 
   $ ch-run /var/tmp/hello -- /bin/echo foo
-  ch-run[154334]: error: can’t execve(2): /bin/echo: Permission denied (ch_core.c:387 13)
+  ch-run[154334]: error: can’t execve(2): /bin/echo: Permission denied (core.c:387 13)
 
 But :code:`/bin/echo` *does* have execute permission::
 
diff --git a/misc/gdb-backtrace b/misc/gdb-backtrace
new file mode 100755
index 000000000..faddbb1a1
--- /dev/null
+++ b/misc/gdb-backtrace
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# $1  executable
+# $2  core dump, or directory containing core dumps, in which case pick newest
+
+bin=$1
+core=$2
+
+if [[ -d $core ]]; then
+    # kludge but good enough for now (https://stackoverflow.com/q/1015678)
+    printf "$core is a directory\n" 1>&2
+    core=$core/$(ls -At $core | head -1)
+    printf "using $core\n" 1>&2
+fi
+
+gdb -batch $bin $core \
+    -ex 'set style enabled on' \
+    -ex 'set print pretty on' \
+    -ex 'set print frame-info source-and-location' \
+    -ex 'echo \n\n' \
+    -ex 'backtrace -full'