Skip to content

Commit 6c83d15

Browse files
committed
btrfs: add new ioctl to wait for cleaned subvolumes
Add a new unprivileged ioctl that will let the command 'btrfs subvolume sync' work without the (privileged) SEARCH_TREE ioctl. There are several modes of operation, where the most common ones are to wait on a specific subvolume or all currently queued for cleaning. This is utilized e.g. in backup applications that delete subvolumes and wait until they're cleaned to check for remaining space. The other modes are for flexibility, e.g. for monitoring or checkpoints in the queue of deleted subvolumes, again without the need to use SEARCH_TREE. Notes: - waiting is interruptible, the timeout is set to 1 second and is not configurable - repeated calls to the ioctl see a different state, so this is inherently racy when using e.g. the count or peek next/last Use cases: - a subvolume A was deleted, wait for cleaning (WAIT_FOR_ONE) - a bunch of subvolumes were deleted, wait for all (WAIT_FOR_QUEUED or PEEK_LAST + WAIT_FOR_ONE) - count how many are queued (not blocking), for monitoring purposes - report progress (PEEK_NEXT), may miss some if cleaning is quick - own waiting in user space (PEEK_LAST until it's 0) Signed-off-by: David Sterba <[email protected]>
1 parent 5599f39 commit 6c83d15

File tree

2 files changed

+153
-0
lines changed

2 files changed

+153
-0
lines changed

fs/btrfs/ioctl.c

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5027,6 +5027,132 @@ int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
50275027
return -EINVAL;
50285028
}
50295029

5030+
static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *argp)
5031+
{
5032+
struct btrfs_root *root;
5033+
struct btrfs_ioctl_subvol_wait args = { 0 };
5034+
signed long sched_ret;
5035+
int refs;
5036+
u64 root_flags;
5037+
bool wait_for_deletion = false;
5038+
bool found = false;
5039+
5040+
if (copy_from_user(&args, argp, sizeof(args)))
5041+
return -EFAULT;
5042+
5043+
switch (args.mode) {
5044+
case BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED:
5045+
/*
5046+
* Wait for the first one deleted that waits until all previous
5047+
* are cleaned.
5048+
*/
5049+
spin_lock(&fs_info->trans_lock);
5050+
if (!list_empty(&fs_info->dead_roots)) {
5051+
root = list_last_entry(&fs_info->dead_roots,
5052+
struct btrfs_root, root_list);
5053+
args.subvolid = btrfs_root_id(root);
5054+
found = true;
5055+
}
5056+
spin_unlock(&fs_info->trans_lock);
5057+
if (!found)
5058+
return -ENOENT;
5059+
5060+
fallthrough;
5061+
case BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE:
5062+
if ((0 < args.subvolid && args.subvolid < BTRFS_FIRST_FREE_OBJECTID) ||
5063+
BTRFS_LAST_FREE_OBJECTID < args.subvolid)
5064+
return -EINVAL;
5065+
break;
5066+
case BTRFS_SUBVOL_SYNC_COUNT:
5067+
spin_lock(&fs_info->trans_lock);
5068+
args.count = list_count_nodes(&fs_info->dead_roots);
5069+
spin_unlock(&fs_info->trans_lock);
5070+
if (copy_to_user(argp, &args, sizeof(args)))
5071+
return -EFAULT;
5072+
return 0;
5073+
case BTRFS_SUBVOL_SYNC_PEEK_FIRST:
5074+
spin_lock(&fs_info->trans_lock);
5075+
/* Last in the list was deleted first. */
5076+
if (!list_empty(&fs_info->dead_roots)) {
5077+
root = list_last_entry(&fs_info->dead_roots,
5078+
struct btrfs_root, root_list);
5079+
args.subvolid = btrfs_root_id(root);
5080+
} else {
5081+
args.subvolid = 0;
5082+
}
5083+
spin_unlock(&fs_info->trans_lock);
5084+
if (copy_to_user(argp, &args, sizeof(args)))
5085+
return -EFAULT;
5086+
return 0;
5087+
case BTRFS_SUBVOL_SYNC_PEEK_LAST:
5088+
spin_lock(&fs_info->trans_lock);
5089+
/* First in the list was deleted last. */
5090+
if (!list_empty(&fs_info->dead_roots)) {
5091+
root = list_first_entry(&fs_info->dead_roots,
5092+
struct btrfs_root, root_list);
5093+
args.subvolid = btrfs_root_id(root);
5094+
} else {
5095+
args.subvolid = 0;
5096+
}
5097+
spin_unlock(&fs_info->trans_lock);
5098+
if (copy_to_user(argp, &args, sizeof(args)))
5099+
return -EFAULT;
5100+
return 0;
5101+
default:
5102+
return -EINVAL;
5103+
}
5104+
5105+
/* 32bit limitation: fs_roots_radix key is not wide enough. */
5106+
if (sizeof(unsigned long) != sizeof(u64) && args.subvolid > U32_MAX)
5107+
return -EOVERFLOW;
5108+
5109+
while (1) {
5110+
/* Wait for the specific one. */
5111+
if (down_read_interruptible(&fs_info->subvol_sem) == -EINTR)
5112+
return -EINTR;
5113+
refs = -1;
5114+
spin_lock(&fs_info->fs_roots_radix_lock);
5115+
root = radix_tree_lookup(&fs_info->fs_roots_radix,
5116+
(unsigned long)args.subvolid);
5117+
if (root) {
5118+
spin_lock(&root->root_item_lock);
5119+
refs = btrfs_root_refs(&root->root_item);
5120+
root_flags = btrfs_root_flags(&root->root_item);
5121+
spin_unlock(&root->root_item_lock);
5122+
}
5123+
spin_unlock(&fs_info->fs_roots_radix_lock);
5124+
up_read(&fs_info->subvol_sem);
5125+
5126+
/* Subvolume does not exist. */
5127+
if (!root)
5128+
return -ENOENT;
5129+
5130+
/* Subvolume not deleted at all. */
5131+
if (refs > 0)
5132+
return -EEXIST;
5133+
/* We've waited and now the subvolume is gone. */
5134+
if (wait_for_deletion && refs == -1) {
5135+
/* Return the one we waited for as the last one. */
5136+
if (copy_to_user(argp, &args, sizeof(args)))
5137+
return -EFAULT;
5138+
return 0;
5139+
}
5140+
5141+
/* Subvolume not found on the first try (deleted or never existed). */
5142+
if (refs == -1)
5143+
return -ENOENT;
5144+
5145+
wait_for_deletion = true;
5146+
ASSERT(root_flags & BTRFS_ROOT_SUBVOL_DEAD);
5147+
sched_ret = schedule_timeout_interruptible(HZ);
5148+
/* Early wake up or error. */
5149+
if (sched_ret != 0)
5150+
return -EINTR;
5151+
}
5152+
5153+
return 0;
5154+
}
5155+
50305156
long btrfs_ioctl(struct file *file, unsigned int
50315157
cmd, unsigned long arg)
50325158
{
@@ -5178,6 +5304,8 @@ long btrfs_ioctl(struct file *file, unsigned int
51785304
case BTRFS_IOC_ENCODED_WRITE_32:
51795305
return btrfs_ioctl_encoded_write(file, argp, true);
51805306
#endif
5307+
case BTRFS_IOC_SUBVOL_SYNC_WAIT:
5308+
return btrfs_ioctl_subvol_sync(fs_info, argp);
51815309
}
51825310

51835311
return -ENOTTY;

include/uapi/linux/btrfs.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,29 @@ struct btrfs_ioctl_encoded_io_args {
10491049
#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
10501050
#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
10511051

1052+
/*
1053+
* Wait for subvolume cleaning process. This queries the kernel queue and it
1054+
* can change between the calls.
1055+
*
1056+
* - FOR_ONE - specify the subvolid
1057+
* - FOR_QUEUED - wait for all currently queued
1058+
* - COUNT - count number of queued
1059+
* - PEEK_FIRST - read which is the first in the queue (to be cleaned or being
1060+
* cleaned already), or 0 if the queue is empty
1061+
* - PEEK_LAST - read the last subvolid in the queue, or 0 if the queue is empty
1062+
*/
1063+
struct btrfs_ioctl_subvol_wait {
1064+
__u64 subvolid;
1065+
__u32 mode;
1066+
__u32 count;
1067+
};
1068+
1069+
#define BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE (0)
1070+
#define BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED (1)
1071+
#define BTRFS_SUBVOL_SYNC_COUNT (2)
1072+
#define BTRFS_SUBVOL_SYNC_PEEK_FIRST (3)
1073+
#define BTRFS_SUBVOL_SYNC_PEEK_LAST (4)
1074+
10521075
/* Error codes as returned by the kernel */
10531076
enum btrfs_err_code {
10541077
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -1181,6 +1204,8 @@ enum btrfs_err_code {
11811204
struct btrfs_ioctl_encoded_io_args)
11821205
#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
11831206
struct btrfs_ioctl_encoded_io_args)
1207+
#define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \
1208+
struct btrfs_ioctl_subvol_wait)
11841209

11851210
#ifdef __cplusplus
11861211
}

0 commit comments

Comments
 (0)