diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 index c89ea204e83d..da035dba6183 100644 --- a/config/kernel-timer.m4 +++ b/config/kernel-timer.m4 @@ -1,8 +1,19 @@ dnl # -dnl # 6.2: timer_delete_sync introduced, del_timer_sync deprecated and made -dnl # into a simple wrapper +dnl # 6.2: timer_delete & timer_delete_sync introduced, del_timer & +dnl del_timer_sync deprecated and made into a simple wrapper dnl # 6.15: del_timer_sync removed dnl # +dnl # We test for them separately as they appear to have not always been +dnl # backported together +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE], [ + ZFS_LINUX_TEST_SRC([timer_delete], [ + #include + ],[ + struct timer_list *timer __attribute__((unused)) = NULL; + timer_delete(timer); + ]) +]) AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC], [ ZFS_LINUX_TEST_SRC([timer_delete_sync], [ #include @@ -12,6 +23,16 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC], [ ]) ]) +AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE], [ + AC_MSG_CHECKING([whether timer_delete() is available]) + ZFS_LINUX_TEST_RESULT([timer_delete], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TIMER_DELETE, 1, + [timer_delete is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE_SYNC], [ AC_MSG_CHECKING([whether timer_delete_sync() is available]) ZFS_LINUX_TEST_RESULT([timer_delete_sync], [ @@ -24,9 +45,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE_SYNC], [ ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER], [ + ZFS_AC_KERNEL_SRC_TIMER_DELETE ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC ]) AC_DEFUN([ZFS_AC_KERNEL_TIMER], [ + ZFS_AC_KERNEL_TIMER_DELETE ZFS_AC_KERNEL_TIMER_DELETE_SYNC ]) diff --git a/include/os/linux/spl/sys/taskq.h b/include/os/linux/spl/sys/taskq.h index c9b2bc994c8c..fcb436ac1e3c 100644 --- a/include/os/linux/spl/sys/taskq.h +++ b/include/os/linux/spl/sys/taskq.h @@ -22,7 +22,7 @@ * with the SPL. If not, see . */ /* - * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024, 2025, Klara, Inc. * Copyright (c) 2024, Syneto */ @@ -134,6 +134,8 @@ typedef struct taskq { wait_queue_head_t tq_work_waitq; /* new work waitq */ wait_queue_head_t tq_wait_waitq; /* wait waitq */ tq_lock_role_t tq_lock_class; /* class when taking tq_lock */ + struct timer_list tq_deadman; /* deadman timer */ + unsigned long tq_deadman_at; /* time of last deadman trip */ /* list node for the cpu hotplug callback */ struct hlist_node tq_hp_cb_node; boolean_t tq_hp_support; diff --git a/man/man4/spl.4 b/man/man4/spl.4 index 61dfe42e463d..8904e46c0bb6 100644 --- a/man/man4/spl.4 +++ b/man/man4/spl.4 @@ -14,8 +14,9 @@ .\" Portions Copyright [yyyy] [name of copyright owner] .\" .\" Copyright 2013 Turbo Fredriksson . All rights reserved. +.\" Copyright (c) 2025, Klara, Inc. .\" -.Dd May 7, 2025 +.Dd November 12, 2025 .Dt SPL 4 .Os . @@ -130,6 +131,14 @@ When not enabled, the thread is halted to facilitate further debugging. .Pp Set to a non-zero value to enable. . +.It Sy spl_taskq_deadman_timeout Ns = Ns Sy 20 Pq uint +Log a warning if a taskq has not made progress in N seconds. +"Progress" here means a taskq thread has not picked up a new task in this +time, +or all threads have not completed in this time. +This can be useful for deadlock debugging. +Setting this value to 0 will disable this function. +. .It Sy spl_taskq_kick Ns = Ns Sy 0 Pq uint Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will scan all the taskqs. diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 092f090d934b..625cbb84be87 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -24,7 +24,7 @@ * Solaris Porting Layer (SPL) Task Queue Implementation. */ /* - * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024, 2025, Klara, Inc. * Copyright (c) 2024, Syneto */ @@ -39,7 +39,14 @@ #include #include -/* Linux 6.2 renamed timer_delete_sync(); point it at its old name for those. */ +/* + * Linux 6.2 renamed del_timer()/del_timer_sync() to + * timer_delete()/timer_delete_sync(). For kernels before that, point the new + * names to the old. + */ +#ifndef HAVE_TIMER_DELETE +#define timer_delete(t) del_timer(t) +#endif #ifndef HAVE_TIMER_DELETE_SYNC #define timer_delete_sync(t) del_timer_sync(t) #endif @@ -142,6 +149,11 @@ module_param(spl_taskq_thread_sequential, uint, 0644); MODULE_PARM_DESC(spl_taskq_thread_sequential, "Create new taskq threads after N sequential tasks"); +static uint_t spl_taskq_deadman_timeout = 20; +module_param(spl_taskq_deadman_timeout, uint, 0644); +MODULE_PARM_DESC(spl_taskq_deadman_timeout, + "Log a warning if the taskq has not made progress in N seconds"); + /* * Global system-wide dynamic task queue available for all consumers. This * taskq is not intended for long-running tasks; instead, a dedicated taskq @@ -357,6 +369,34 @@ task_expire(struct timer_list *tl) task_expire_impl(t); } +static void +taskq_deadman(struct timer_list *tl) +{ + unsigned long irqflags; + taskq_t *tq = container_of(tl, taskq_t, tq_deadman); + + spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class); + if (tq->tq_nactive == 0 || spl_taskq_deadman_timeout == 0) { + spin_unlock_irqrestore(&tq->tq_lock, irqflags); + return; + } + + unsigned long nqueued = 0; + struct list_head *pos; + list_for_each(pos, &tq->tq_pend_list) + nqueued++; + list_for_each(pos, &tq->tq_prio_list) + nqueued++; + + printk(KERN_INFO "spl: taskq stuck for %us: %s.%d " + "[%d/%d threads active, %lu tasks queued]\n", + spl_taskq_deadman_timeout, tq->tq_name, tq->tq_instance, + tq->tq_nthreads, tq->tq_nactive, nqueued); + + tq->tq_deadman_at = jiffies; + spin_unlock_irqrestore(&tq->tq_lock, irqflags); +} + /* * Returns the lowest incomplete taskqid_t. The taskqid_t may * be queued on the pending list, on the priority list, on the @@ -1071,6 +1111,11 @@ taskq_thread(void *args) taskq_insert_in_order(tq, tqt); tq->tq_nactive++; + + if (spl_taskq_deadman_timeout > 0) + mod_timer(&tq->tq_deadman, + jiffies + spl_taskq_deadman_timeout * HZ); + spin_unlock_irqrestore(&tq->tq_lock, flags); TQSTAT_INC(tq, threads_active); @@ -1096,6 +1141,21 @@ taskq_thread(void *args) list_del_init(&tqt->tqt_active_list); tqt->tqt_task = NULL; + if (tq->tq_nactive == 0 || + spl_taskq_deadman_timeout == 0) + timer_delete(&tq->tq_deadman); + + if (tq->tq_deadman_at > 0) { + unsigned long stuck_for = + jiffies - tq->tq_deadman_at; + tq->tq_deadman_at = 0; + + printk(KERN_INFO + "spl: taskq resumed after %lus: %s.%d\n", + stuck_for / HZ, tq->tq_name, + tq->tq_instance); + } + /* For prealloc'd tasks, we don't free anything. */ if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC)) task_done(tq, t); @@ -1375,6 +1435,9 @@ taskq_create(const char *name, int threads_arg, pri_t pri, tq->tq_next_id = TASKQID_INITIAL; tq->tq_lowest_id = TASKQID_INITIAL; tq->lastspawnstop = jiffies; + timer_setup(&tq->tq_deadman, NULL, 0); + tq->tq_deadman.function = taskq_deadman; + tq->tq_deadman_at = 0; INIT_LIST_HEAD(&tq->tq_free_list); INIT_LIST_HEAD(&tq->tq_pend_list); INIT_LIST_HEAD(&tq->tq_prio_list);