Skip to content

Commit 4f8b6f2

Browse files
committed
Merge tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Add a dm-crypt optional "high_priority" flag that enables the crypt workqueues to use WQ_HIGHPRI. - Export dm-crypt workqueues via sysfs (by enabling WQ_SYSFS) to allow for improved visibility and controls over IO and crypt workqueues. - Fix dm-crypt to no longer constrain max_segment_size to PAGE_SIZE. This limit isn't needed given that the block core provides late bio splitting if bio exceeds underlying limits (e.g. max_segment_size). - Fix dm-crypt crypt_queue's use of WQ_UNBOUND to not use WQ_CPU_INTENSIVE because it is meaningless with WQ_UNBOUND. - Fix various issues with dm-delay target (ranging from a resource teardown fix, a fix for hung task when using kthread mode, and other improvements that followed from code inspection). * tag 'for-6.10/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm-delay: remove timer_lock dm-delay: change locking to avoid contention dm-delay: fix max_delay calculations dm-delay: fix hung task introduced by kthread mode dm-delay: fix workqueue delay_timer race dm-crypt: don't set WQ_CPU_INTENSIVE for WQ_UNBOUND crypt_queue dm: use queue_limits_set dm-crypt: stop constraining max_segment_size to PAGE_SIZE dm-crypt: export sysfs of all workqueues dm-crypt: add the optional "high_priority" flag
2 parents 113d1dd + 8b21ac8 commit 4f8b6f2

File tree

4 files changed

+97
-68
lines changed

4 files changed

+97
-68
lines changed

Documentation/admin-guide/device-mapper/dm-crypt.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ same_cpu_crypt
113113
The default is to use an unbound workqueue so that encryption work
114114
is automatically balanced between available CPUs.
115115

116+
high_priority
117+
Set dm-crypt workqueues and the writer thread to high priority. This
118+
improves throughput and latency of dm-crypt while degrading general
119+
responsiveness of the system.
120+
116121
submit_from_crypt_cpus
117122
Disable offloading writes to a separate thread after encryption.
118123
There are some situations where offloading write bios from the

drivers/md/dm-crypt.c

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747

4848
#define DM_MSG_PREFIX "crypt"
4949

50+
static DEFINE_IDA(workqueue_ida);
51+
5052
/*
5153
* context holding the current state of a multi-part conversion
5254
*/
@@ -137,9 +139,9 @@ struct iv_elephant_private {
137139
* and encrypts / decrypts at the same time.
138140
*/
139141
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
140-
DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
141-
DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE,
142-
DM_CRYPT_WRITE_INLINE };
142+
DM_CRYPT_SAME_CPU, DM_CRYPT_HIGH_PRIORITY,
143+
DM_CRYPT_NO_OFFLOAD, DM_CRYPT_NO_READ_WORKQUEUE,
144+
DM_CRYPT_NO_WRITE_WORKQUEUE, DM_CRYPT_WRITE_INLINE };
143145

144146
enum cipher_flags {
145147
CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cipher */
@@ -184,6 +186,7 @@ struct crypt_config {
184186
struct crypto_aead **tfms_aead;
185187
} cipher_tfm;
186188
unsigned int tfms_count;
189+
int workqueue_id;
187190
unsigned long cipher_flags;
188191

189192
/*
@@ -1653,8 +1656,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
16531656

16541657
/*
16551658
* Generate a new unfragmented bio with the given size
1656-
* This should never violate the device limitations (but only because
1657-
* max_segment_size is being constrained to PAGE_SIZE).
1659+
* This should never violate the device limitations (but if it did then block
1660+
* core should split the bio as needed).
16581661
*
16591662
* This function may be called concurrently. If we allocate from the mempool
16601663
* concurrently, there is a possibility of deadlock. For example, if we have
@@ -2771,6 +2774,9 @@ static void crypt_dtr(struct dm_target *ti)
27712774
if (cc->crypt_queue)
27722775
destroy_workqueue(cc->crypt_queue);
27732776

2777+
if (cc->workqueue_id)
2778+
ida_free(&workqueue_ida, cc->workqueue_id);
2779+
27742780
crypt_free_tfms(cc);
27752781

27762782
bioset_exit(&cc->bs);
@@ -3134,7 +3140,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
31343140
struct crypt_config *cc = ti->private;
31353141
struct dm_arg_set as;
31363142
static const struct dm_arg _args[] = {
3137-
{0, 8, "Invalid number of feature args"},
3143+
{0, 9, "Invalid number of feature args"},
31383144
};
31393145
unsigned int opt_params, val;
31403146
const char *opt_string, *sval;
@@ -3161,6 +3167,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
31613167

31623168
else if (!strcasecmp(opt_string, "same_cpu_crypt"))
31633169
set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
3170+
else if (!strcasecmp(opt_string, "high_priority"))
3171+
set_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
31643172

31653173
else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
31663174
set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
@@ -3230,8 +3238,9 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
32303238
{
32313239
struct crypt_config *cc;
32323240
const char *devname = dm_table_device_name(ti->table);
3233-
int key_size;
3241+
int key_size, wq_id;
32343242
unsigned int align_mask;
3243+
unsigned int common_wq_flags;
32353244
unsigned long long tmpll;
32363245
int ret;
32373246
size_t iv_size_padding, additional_req_size;
@@ -3398,20 +3407,38 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
33983407
cc->tag_pool_max_sectors <<= cc->sector_shift;
33993408
}
34003409

3410+
wq_id = ida_alloc_min(&workqueue_ida, 1, GFP_KERNEL);
3411+
if (wq_id < 0) {
3412+
ti->error = "Couldn't get workqueue id";
3413+
ret = wq_id;
3414+
goto bad;
3415+
}
3416+
cc->workqueue_id = wq_id;
3417+
34013418
ret = -ENOMEM;
3402-
cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
3419+
common_wq_flags = WQ_MEM_RECLAIM | WQ_SYSFS;
3420+
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
3421+
common_wq_flags |= WQ_HIGHPRI;
3422+
3423+
cc->io_queue = alloc_workqueue("kcryptd_io-%s-%d", common_wq_flags, 1, devname, wq_id);
34033424
if (!cc->io_queue) {
34043425
ti->error = "Couldn't create kcryptd io queue";
34053426
goto bad;
34063427
}
34073428

3408-
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
3409-
cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
3410-
1, devname);
3411-
else
3412-
cc->crypt_queue = alloc_workqueue("kcryptd/%s",
3413-
WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
3414-
num_online_cpus(), devname);
3429+
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) {
3430+
cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
3431+
common_wq_flags | WQ_CPU_INTENSIVE,
3432+
1, devname, wq_id);
3433+
} else {
3434+
/*
3435+
* While crypt_queue is certainly CPU intensive, the use of
3436+
* WQ_CPU_INTENSIVE is meaningless with WQ_UNBOUND.
3437+
*/
3438+
cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
3439+
common_wq_flags | WQ_UNBOUND,
3440+
num_online_cpus(), devname, wq_id);
3441+
}
34153442
if (!cc->crypt_queue) {
34163443
ti->error = "Couldn't create kcryptd queue";
34173444
goto bad;
@@ -3427,6 +3454,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
34273454
ti->error = "Couldn't spawn write thread";
34283455
goto bad;
34293456
}
3457+
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
3458+
set_user_nice(cc->write_thread, MIN_NICE);
34303459

34313460
ti->num_flush_bios = 1;
34323461
ti->limit_swap_bios = true;
@@ -3547,6 +3576,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
35473576

35483577
num_feature_args += !!ti->num_discard_bios;
35493578
num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
3579+
num_feature_args += test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
35503580
num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
35513581
num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
35523582
num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
@@ -3560,6 +3590,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
35603590
DMEMIT(" allow_discards");
35613591
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
35623592
DMEMIT(" same_cpu_crypt");
3593+
if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
3594+
DMEMIT(" high_priority");
35633595
if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
35643596
DMEMIT(" submit_from_crypt_cpus");
35653597
if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
@@ -3579,6 +3611,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
35793611
DMEMIT_TARGET_NAME_VERSION(ti->type);
35803612
DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n');
35813613
DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n');
3614+
DMEMIT(",high_priority=%c", test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags) ? 'y' : 'n');
35823615
DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ?
35833616
'y' : 'n');
35843617
DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ?
@@ -3688,14 +3721,6 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
36883721
{
36893722
struct crypt_config *cc = ti->private;
36903723

3691-
/*
3692-
* Unfortunate constraint that is required to avoid the potential
3693-
* for exceeding underlying device's max_segments limits -- due to
3694-
* crypt_alloc_buffer() possibly allocating pages for the encryption
3695-
* bio that are not as physically contiguous as the original bio.
3696-
*/
3697-
limits->max_segment_size = PAGE_SIZE;
3698-
36993724
limits->logical_block_size =
37003725
max_t(unsigned int, limits->logical_block_size, cc->sector_size);
37013726
limits->physical_block_size =
@@ -3706,7 +3731,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
37063731

37073732
static struct target_type crypt_target = {
37083733
.name = "crypt",
3709-
.version = {1, 25, 0},
3734+
.version = {1, 26, 0},
37103735
.module = THIS_MODULE,
37113736
.ctr = crypt_ctr,
37123737
.dtr = crypt_dtr,

drivers/md/dm-delay.c

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ struct delay_class {
2828

2929
struct delay_c {
3030
struct timer_list delay_timer;
31-
struct mutex timer_lock;
31+
struct mutex process_bios_lock; /* hold while removing bios to be processed from list */
32+
spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */
3233
struct workqueue_struct *kdelayd_wq;
3334
struct work_struct flush_expired_bios;
3435
struct list_head delayed_bios;
@@ -49,8 +50,6 @@ struct dm_delay_info {
4950
unsigned long expires;
5051
};
5152

52-
static DEFINE_MUTEX(delayed_bios_lock);
53-
5453
static void handle_delayed_timer(struct timer_list *t)
5554
{
5655
struct delay_c *dc = from_timer(dc, t, delay_timer);
@@ -60,12 +59,7 @@ static void handle_delayed_timer(struct timer_list *t)
6059

6160
static void queue_timeout(struct delay_c *dc, unsigned long expires)
6261
{
63-
mutex_lock(&dc->timer_lock);
64-
65-
if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
66-
mod_timer(&dc->delay_timer, expires);
67-
68-
mutex_unlock(&dc->timer_lock);
62+
timer_reduce(&dc->delay_timer, expires);
6963
}
7064

7165
static inline bool delay_is_fast(struct delay_c *dc)
@@ -89,12 +83,16 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
8983
{
9084
struct dm_delay_info *delayed, *next;
9185
struct bio_list flush_bio_list;
86+
LIST_HEAD(local_list);
9287
unsigned long next_expires = 0;
9388
bool start_timer = false;
9489
bio_list_init(&flush_bio_list);
9590

96-
mutex_lock(&delayed_bios_lock);
97-
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
91+
mutex_lock(&dc->process_bios_lock);
92+
spin_lock(&dc->delayed_bios_lock);
93+
list_replace_init(&dc->delayed_bios, &local_list);
94+
spin_unlock(&dc->delayed_bios_lock);
95+
list_for_each_entry_safe(delayed, next, &local_list, list) {
9896
cond_resched();
9997
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
10098
struct bio *bio = dm_bio_from_per_bio_data(delayed,
@@ -114,7 +112,10 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
114112
}
115113
}
116114
}
117-
mutex_unlock(&delayed_bios_lock);
115+
spin_lock(&dc->delayed_bios_lock);
116+
list_splice(&local_list, &dc->delayed_bios);
117+
spin_unlock(&dc->delayed_bios_lock);
118+
mutex_unlock(&dc->process_bios_lock);
118119

119120
if (start_timer)
120121
queue_timeout(dc, next_expires);
@@ -128,13 +129,13 @@ static int flush_worker_fn(void *data)
128129

129130
while (!kthread_should_stop()) {
130131
flush_delayed_bios(dc, false);
131-
mutex_lock(&delayed_bios_lock);
132+
spin_lock(&dc->delayed_bios_lock);
132133
if (unlikely(list_empty(&dc->delayed_bios))) {
133134
set_current_state(TASK_INTERRUPTIBLE);
134-
mutex_unlock(&delayed_bios_lock);
135+
spin_unlock(&dc->delayed_bios_lock);
135136
schedule();
136137
} else {
137-
mutex_unlock(&delayed_bios_lock);
138+
spin_unlock(&dc->delayed_bios_lock);
138139
cond_resched();
139140
}
140141
}
@@ -154,8 +155,10 @@ static void delay_dtr(struct dm_target *ti)
154155
{
155156
struct delay_c *dc = ti->private;
156157

157-
if (dc->kdelayd_wq)
158+
if (dc->kdelayd_wq) {
159+
timer_shutdown_sync(&dc->delay_timer);
158160
destroy_workqueue(dc->kdelayd_wq);
161+
}
159162

160163
if (dc->read.dev)
161164
dm_put_device(ti, dc->read.dev);
@@ -166,7 +169,7 @@ static void delay_dtr(struct dm_target *ti)
166169
if (dc->worker)
167170
kthread_stop(dc->worker);
168171

169-
mutex_destroy(&dc->timer_lock);
172+
mutex_destroy(&dc->process_bios_lock);
170173

171174
kfree(dc);
172175
}
@@ -224,7 +227,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
224227

225228
ti->private = dc;
226229
INIT_LIST_HEAD(&dc->delayed_bios);
227-
mutex_init(&dc->timer_lock);
230+
mutex_init(&dc->process_bios_lock);
231+
spin_lock_init(&dc->delayed_bios_lock);
228232
dc->may_delay = true;
229233
dc->argc = argc;
230234

@@ -240,19 +244,18 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
240244
ret = delay_class_ctr(ti, &dc->flush, argv);
241245
if (ret)
242246
goto bad;
243-
max_delay = max(max_delay, dc->write.delay);
244-
max_delay = max(max_delay, dc->flush.delay);
245247
goto out;
246248
}
247249

248250
ret = delay_class_ctr(ti, &dc->write, argv + 3);
249251
if (ret)
250252
goto bad;
253+
max_delay = max(max_delay, dc->write.delay);
254+
251255
if (argc == 6) {
252256
ret = delay_class_ctr(ti, &dc->flush, argv + 3);
253257
if (ret)
254258
goto bad;
255-
max_delay = max(max_delay, dc->flush.delay);
256259
goto out;
257260
}
258261

@@ -267,8 +270,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
267270
* In case of small requested delays, use kthread instead of
268271
* timers and workqueue to achieve better latency.
269272
*/
270-
dc->worker = kthread_create(&flush_worker_fn, dc,
271-
"dm-delay-flush-worker");
273+
dc->worker = kthread_run(&flush_worker_fn, dc, "dm-delay-flush-worker");
272274
if (IS_ERR(dc->worker)) {
273275
ret = PTR_ERR(dc->worker);
274276
dc->worker = NULL;
@@ -309,14 +311,14 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
309311
delayed->context = dc;
310312
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
311313

312-
mutex_lock(&delayed_bios_lock);
314+
spin_lock(&dc->delayed_bios_lock);
313315
if (unlikely(!dc->may_delay)) {
314-
mutex_unlock(&delayed_bios_lock);
316+
spin_unlock(&dc->delayed_bios_lock);
315317
return DM_MAPIO_REMAPPED;
316318
}
317319
c->ops++;
318320
list_add_tail(&delayed->list, &dc->delayed_bios);
319-
mutex_unlock(&delayed_bios_lock);
321+
spin_unlock(&dc->delayed_bios_lock);
320322

321323
if (delay_is_fast(dc))
322324
wake_up_process(dc->worker);
@@ -330,12 +332,12 @@ static void delay_presuspend(struct dm_target *ti)
330332
{
331333
struct delay_c *dc = ti->private;
332334

333-
mutex_lock(&delayed_bios_lock);
335+
spin_lock(&dc->delayed_bios_lock);
334336
dc->may_delay = false;
335-
mutex_unlock(&delayed_bios_lock);
337+
spin_unlock(&dc->delayed_bios_lock);
336338

337339
if (!delay_is_fast(dc))
338-
del_timer_sync(&dc->delay_timer);
340+
timer_delete(&dc->delay_timer);
339341
flush_delayed_bios(dc, true);
340342
}
341343

0 commit comments

Comments
 (0)