Skip to content

Commit 5d28743

Browse files
kakrakreijack
andcommitted
btrfs: add allocator_hint mode
When this mode is enabled, the chunk allocation policy is modified as follows: Each disk may have a different tag: - BTRFS_DEV_ALLOCATION_PREFERRED_METADATA - BTRFS_DEV_ALLOCATION_METADATA_ONLY - BTRFS_DEV_ALLOCATION_DATA_ONLY - BTRFS_DEV_ALLOCATION_PREFERRED_DATA (default) Where: - ALLOCATION_PREFERRED_X means that it is preferred to use this disk for the X chunk type (the other type may be allowed when the space is low) - ALLOCATION_X_ONLY means that it is used *only* for the X chunk type. This means also that it is a preferred choice. Each time the allocator allocates a chunk of type X, first it takes the disks tagged as ALLOCATION_X_ONLY or ALLOCATION_PREFERRED_X. If the space is not enough, it uses also the disks tagged as ALLOCATION_METADATA_ONLY. If the space is not enough, it uses also the other disks, with the exception of the one marked as ALLOCATION_PREFERRED_Y, where Y is the other type of chunk (i.e. not X). Co-authored-by: Goffredo Baroncelli <kreijack@inwind.it> Signed-off-by: Kai Krakow <kai@kaishome.de>
1 parent 42e48fd commit 5d28743

File tree

2 files changed

+107
-1
lines changed

2 files changed

+107
-1
lines changed

fs/btrfs/volumes.c

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,21 @@ enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags
184184
return BTRFS_BG_FLAG_TO_INDEX(profile);
185185
}
186186

187+
#ifdef CONFIG_BTRFS_ALLOCATOR_HINTS
188+
#define BTRFS_DEV_ALLOCATION_MASK ((1ULL << \
189+
BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT) - 1)
190+
#define BTRFS_DEV_ALLOCATION_MASK_COUNT (1ULL << \
191+
BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT)
192+
193+
static const char alloc_hint_map[BTRFS_DEV_ALLOCATION_MASK_COUNT] = {
194+
[BTRFS_DEV_ALLOCATION_DATA_ONLY] = -1,
195+
[BTRFS_DEV_ALLOCATION_PREFERRED_DATA] = 0,
196+
[BTRFS_DEV_ALLOCATION_PREFERRED_METADATA] = 1,
197+
[BTRFS_DEV_ALLOCATION_METADATA_ONLY] = 2,
198+
/* the other values are set to 0 */
199+
};
200+
#endif
201+
187202
const char *btrfs_bg_type_to_raid_name(u64 flags)
188203
{
189204
const int index = btrfs_bg_flags_to_raid_index(flags);
@@ -5089,13 +5104,20 @@ static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
50895104
}
50905105

50915106
/*
5092-
* sort the devices in descending order by max_avail, total_avail
5107+
* sort the devices in descending order by alloc_hint (optional),
5108+
* max_avail, total_avail
50935109
*/
50945110
static int btrfs_cmp_device_info(const void *a, const void *b)
50955111
{
50965112
const struct btrfs_device_info *di_a = a;
50975113
const struct btrfs_device_info *di_b = b;
50985114

5115+
#ifdef CONFIG_BTRFS_ALLOCATOR_HINTS
5116+
if (di_a->alloc_hint > di_b->alloc_hint)
5117+
return -1;
5118+
if (di_a->alloc_hint < di_b->alloc_hint)
5119+
return 1;
5120+
#endif
50995121
if (di_a->max_avail > di_b->max_avail)
51005122
return -1;
51015123
if (di_a->max_avail < di_b->max_avail)
@@ -5303,16 +5325,97 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
53035325
devices_info[ndevs].max_avail = max_avail;
53045326
devices_info[ndevs].total_avail = total_avail;
53055327
devices_info[ndevs].dev = device;
5328+
5329+
#ifdef CONFIG_BTRFS_ALLOCATOR_HINTS
5330+
if ((ctl->type & BTRFS_BLOCK_GROUP_DATA) &&
5331+
(ctl->type & BTRFS_BLOCK_GROUP_METADATA)) {
5332+
/*
5333+
* if mixed bg set all the alloc_hint
5334+
* fields to the same value, so the sorting
5335+
* is not affected
5336+
*/
5337+
devices_info[ndevs].alloc_hint = 0;
5338+
} else if (ctl->type & BTRFS_BLOCK_GROUP_DATA) {
5339+
int hint = device->type & BTRFS_DEV_ALLOCATION_MASK;
5340+
5341+
/*
5342+
* skip BTRFS_DEV_METADATA_ONLY disks
5343+
*/
5344+
if (hint == BTRFS_DEV_ALLOCATION_METADATA_ONLY)
5345+
continue;
5346+
/*
5347+
* if a data chunk must be allocated,
5348+
* sort also by hint (data disk
5349+
* higher priority)
5350+
*/
5351+
devices_info[ndevs].alloc_hint = -alloc_hint_map[hint];
5352+
} else { /* BTRFS_BLOCK_GROUP_METADATA */
5353+
int hint = device->type & BTRFS_DEV_ALLOCATION_MASK;
5354+
5355+
/*
5356+
* skip BTRFS_DEV_DATA_ONLY disks
5357+
*/
5358+
if (hint == BTRFS_DEV_ALLOCATION_DATA_ONLY)
5359+
continue;
5360+
/*
5361+
* if a data chunk must be allocated,
5362+
* sort also by hint (metadata hint
5363+
* higher priority)
5364+
*/
5365+
devices_info[ndevs].alloc_hint = alloc_hint_map[hint];
5366+
}
5367+
#endif
5368+
53065369
++ndevs;
53075370
}
53085371
ctl->ndevs = ndevs;
53095372

5373+
#ifdef CONFIG_BTRFS_ALLOCATOR_HINTS
5374+
/*
5375+
* no devices available
5376+
*/
5377+
if (!ndevs)
5378+
return 0;
5379+
#endif
5380+
53105381
/*
53115382
* now sort the devices by hole size / available space
53125383
*/
53135384
sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
53145385
btrfs_cmp_device_info, NULL);
53155386

5387+
#ifdef CONFIG_BTRFS_ALLOCATOR_HINTS
5388+
/*
5389+
* select the minimum set of disks grouped by hint that
5390+
* can host the chunk
5391+
*/
5392+
ndevs = 0;
5393+
while (ndevs < ctl->ndevs) {
5394+
int hint = devices_info[ndevs++].alloc_hint;
5395+
while (ndevs < ctl->ndevs &&
5396+
devices_info[ndevs].alloc_hint == hint)
5397+
ndevs++;
5398+
if (ndevs >= ctl->devs_min)
5399+
break;
5400+
}
5401+
5402+
BUG_ON(ndevs > ctl->ndevs);
5403+
ctl->ndevs = ndevs;
5404+
5405+
/*
5406+
* the next layers require the devices_info ordered by
5407+
* max_avail. If we are returing two (or more) different
5408+
* group of alloc_hint, this is not always true. So sort
5409+
* these gain.
5410+
*/
5411+
5412+
for (int i = 0 ; i < ndevs ; i++)
5413+
devices_info[i].alloc_hint = 0;
5414+
5415+
sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
5416+
btrfs_cmp_device_info, NULL);
5417+
#endif
5418+
53165419
return 0;
53175420
}
53185421

fs/btrfs/volumes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,9 @@ struct btrfs_device_info {
599599
u64 dev_offset;
600600
u64 max_avail;
601601
u64 total_avail;
602+
#ifdef CONFIG_BTRFS_ALLOCATOR_HINTS
603+
int alloc_hint;
604+
#endif
602605
};
603606

604607
struct btrfs_raid_attr {

0 commit comments

Comments
 (0)