Skip to content

Commit f3e2e53

Browse files
yhrcmaiolino
authored andcommitted
xfs: add inode to zone caching for data placement
Placing data from the same file in the same zone is a great heuristic for reducing write amplification and we do this already - but only for sequential writes. To support placing data in the same way for random writes, reuse the xfs mru cache to map inodes to open zones on first write. If a mapping is present, use the open zone for data placement for this file until the zone is full. Signed-off-by: Hans Holmberg <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: Carlos Maiolino <[email protected]>
1 parent 70b95cb commit f3e2e53

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

fs/xfs/xfs_mount.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ typedef struct xfs_mount {
236236
bool m_update_sb; /* sb needs update in mount */
237237
unsigned int m_max_open_zones;
238238
unsigned int m_zonegc_low_space;
239+
struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */
239240

240241
/* max_atomic_write mount option value */
241242
unsigned long long m_awu_max_bytes;

fs/xfs/xfs_zone_alloc.c

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "xfs_zone_priv.h"
2525
#include "xfs_zones.h"
2626
#include "xfs_trace.h"
27+
#include "xfs_mru_cache.h"
2728

2829
void
2930
xfs_open_zone_put(
@@ -796,6 +797,100 @@ xfs_submit_zoned_bio(
796797
submit_bio(&ioend->io_bio);
797798
}
798799

800+
/*
801+
* Cache the last zone written to for an inode so that it is considered first
802+
* for subsequent writes.
803+
*/
804+
struct xfs_zone_cache_item {
805+
struct xfs_mru_cache_elem mru;
806+
struct xfs_open_zone *oz;
807+
};
808+
809+
static inline struct xfs_zone_cache_item *
810+
xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
811+
{
812+
return container_of(mru, struct xfs_zone_cache_item, mru);
813+
}
814+
815+
static void
816+
xfs_zone_cache_free_func(
817+
void *data,
818+
struct xfs_mru_cache_elem *mru)
819+
{
820+
struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru);
821+
822+
xfs_open_zone_put(item->oz);
823+
kfree(item);
824+
}
825+
826+
/*
827+
* Check if we have a cached last open zone available for the inode and
828+
* if yes return a reference to it.
829+
*/
830+
static struct xfs_open_zone *
831+
xfs_cached_zone(
832+
struct xfs_mount *mp,
833+
struct xfs_inode *ip)
834+
{
835+
struct xfs_mru_cache_elem *mru;
836+
struct xfs_open_zone *oz;
837+
838+
mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
839+
if (!mru)
840+
return NULL;
841+
oz = xfs_zone_cache_item(mru)->oz;
842+
if (oz) {
843+
/*
844+
* GC only steals open zones at mount time, so no GC zones
845+
* should end up in the cache.
846+
*/
847+
ASSERT(!oz->oz_is_gc);
848+
ASSERT(atomic_read(&oz->oz_ref) > 0);
849+
atomic_inc(&oz->oz_ref);
850+
}
851+
xfs_mru_cache_done(mp->m_zone_cache);
852+
return oz;
853+
}
854+
855+
/*
856+
* Update the last used zone cache for a given inode.
857+
*
858+
* The caller must have a reference on the open zone.
859+
*/
860+
static void
861+
xfs_zone_cache_create_association(
862+
struct xfs_inode *ip,
863+
struct xfs_open_zone *oz)
864+
{
865+
struct xfs_mount *mp = ip->i_mount;
866+
struct xfs_zone_cache_item *item = NULL;
867+
struct xfs_mru_cache_elem *mru;
868+
869+
ASSERT(atomic_read(&oz->oz_ref) > 0);
870+
atomic_inc(&oz->oz_ref);
871+
872+
mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
873+
if (mru) {
874+
/*
875+
* If we have an association already, update it to point to the
876+
* new zone.
877+
*/
878+
item = xfs_zone_cache_item(mru);
879+
xfs_open_zone_put(item->oz);
880+
item->oz = oz;
881+
xfs_mru_cache_done(mp->m_zone_cache);
882+
return;
883+
}
884+
885+
item = kmalloc(sizeof(*item), GFP_KERNEL);
886+
if (!item) {
887+
xfs_open_zone_put(oz);
888+
return;
889+
}
890+
item->oz = oz;
891+
xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
892+
}
893+
799894
void
800895
xfs_zone_alloc_and_submit(
801896
struct iomap_ioend *ioend,
@@ -819,11 +914,16 @@ xfs_zone_alloc_and_submit(
819914
*/
820915
if (!*oz && ioend->io_offset)
821916
*oz = xfs_last_used_zone(ioend);
917+
if (!*oz)
918+
*oz = xfs_cached_zone(mp, ip);
919+
822920
if (!*oz) {
823921
select_zone:
824922
*oz = xfs_select_zone(mp, write_hint, pack_tight);
825923
if (!*oz)
826924
goto out_error;
925+
926+
xfs_zone_cache_create_association(ip, *oz);
827927
}
828928

829929
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
@@ -1211,6 +1311,14 @@ xfs_mount_zones(
12111311
error = xfs_zone_gc_mount(mp);
12121312
if (error)
12131313
goto out_free_zone_info;
1314+
1315+
/*
1316+
* Set up a mru cache to track inode to open zone for data placement
1317+
* purposes. The magic values for group count and life time is the
1318+
* same as the defaults for file streams, which seems sane enough.
1319+
*/
1320+
xfs_mru_cache_create(&mp->m_zone_cache, mp,
1321+
5000, 10, xfs_zone_cache_free_func);
12141322
return 0;
12151323

12161324
out_free_zone_info:
@@ -1224,4 +1332,5 @@ xfs_unmount_zones(
12241332
{
12251333
xfs_zone_gc_unmount(mp);
12261334
xfs_free_zone_info(mp->m_zone_info);
1335+
xfs_mru_cache_destroy(mp->m_zone_cache);
12271336
}

0 commit comments

Comments
 (0)