Skip to content

Commit a5b665d

Browse files
authored
DDT: Switch to using wmsums for lookup stats
ddt_lookup() is a very busy code under a highly congested global lock. Anything we can save here is very important. Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Rob Norris <[email protected]> Signed-off-by: Alexander Motin <[email protected]> Closes #17980
1 parent 48f33c1 commit a5b665d

File tree

2 files changed

+99
-8
lines changed

2 files changed

+99
-8
lines changed

include/sys/ddt.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <sys/fs/zfs.h>
3434
#include <sys/zio.h>
3535
#include <sys/dmu.h>
36+
#include <sys/wmsum.h>
3637

3738
#ifdef __cplusplus
3839
extern "C" {
@@ -296,6 +297,20 @@ typedef struct {
296297

297298
kstat_t *ddt_ksp; /* kstats context */
298299

300+
/* wmsums for hot-path lookup counters */
301+
wmsum_t ddt_kstat_dds_lookup;
302+
wmsum_t ddt_kstat_dds_lookup_live_hit;
303+
wmsum_t ddt_kstat_dds_lookup_live_wait;
304+
wmsum_t ddt_kstat_dds_lookup_live_miss;
305+
wmsum_t ddt_kstat_dds_lookup_existing;
306+
wmsum_t ddt_kstat_dds_lookup_new;
307+
wmsum_t ddt_kstat_dds_lookup_log_hit;
308+
wmsum_t ddt_kstat_dds_lookup_log_active_hit;
309+
wmsum_t ddt_kstat_dds_lookup_log_flushing_hit;
310+
wmsum_t ddt_kstat_dds_lookup_log_miss;
311+
wmsum_t ddt_kstat_dds_lookup_stored_hit;
312+
wmsum_t ddt_kstat_dds_lookup_stored_miss;
313+
299314
enum zio_checksum ddt_checksum; /* checksum algorithm in use */
300315
spa_t *ddt_spa; /* pool this ddt is on */
301316
objset_t *ddt_os; /* ddt objset (always MOS) */

module/zfs/ddt.c

Lines changed: 84 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -362,20 +362,26 @@ static const ddt_kstats_t ddt_kstats_template = {
362362
};
363363

364364
#ifdef _KERNEL
365+
/*
366+
* Hot-path lookup counters use wmsums to avoid cache line bouncing.
367+
* DDT_KSTAT_BUMP: Increment a wmsum counter (lookup stats).
368+
*
369+
* Sync-only counters use direct kstat assignment (no atomics needed).
370+
* DDT_KSTAT_SET: Set a value (log entry counts, rates).
371+
* DDT_KSTAT_SUB: Subtract from a value (decrement log entry counts).
372+
* DDT_KSTAT_ZERO: Zero a value (clear log entry counts).
373+
*/
365374
#define _DDT_KSTAT_STAT(ddt, stat) \
366375
&((ddt_kstats_t *)(ddt)->ddt_ksp->ks_data)->stat.value.ui64
367376
#define DDT_KSTAT_BUMP(ddt, stat) \
368-
do { atomic_inc_64(_DDT_KSTAT_STAT(ddt, stat)); } while (0)
369-
#define DDT_KSTAT_ADD(ddt, stat, val) \
370-
do { atomic_add_64(_DDT_KSTAT_STAT(ddt, stat), val); } while (0)
377+
wmsum_add(&(ddt)->ddt_kstat_##stat, 1)
371378
#define DDT_KSTAT_SUB(ddt, stat, val) \
372-
do { atomic_sub_64(_DDT_KSTAT_STAT(ddt, stat), val); } while (0)
379+
do { *_DDT_KSTAT_STAT(ddt, stat) -= (val); } while (0)
373380
#define DDT_KSTAT_SET(ddt, stat, val) \
374-
do { atomic_store_64(_DDT_KSTAT_STAT(ddt, stat), val); } while (0)
381+
do { *_DDT_KSTAT_STAT(ddt, stat) = (val); } while (0)
375382
#define DDT_KSTAT_ZERO(ddt, stat) DDT_KSTAT_SET(ddt, stat, 0)
376383
#else
377384
#define DDT_KSTAT_BUMP(ddt, stat) do {} while (0)
378-
#define DDT_KSTAT_ADD(ddt, stat, val) do {} while (0)
379385
#define DDT_KSTAT_SUB(ddt, stat, val) do {} while (0)
380386
#define DDT_KSTAT_SET(ddt, stat, val) do {} while (0)
381387
#define DDT_KSTAT_ZERO(ddt, stat) do {} while (0)
@@ -783,7 +789,7 @@ ddt_class_start(void)
783789
{
784790
uint64_t start = gethrestime_sec();
785791

786-
if (ddt_prune_artificial_age) {
792+
if (unlikely(ddt_prune_artificial_age)) {
787793
/*
788794
* debug aide -- simulate a wider distribution
789795
* so we don't have to wait for an aged DDT
@@ -1171,7 +1177,7 @@ ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t verify)
11711177

11721178
ASSERT(MUTEX_HELD(&ddt->ddt_lock));
11731179

1174-
if (ddt->ddt_version == DDT_VERSION_UNCONFIGURED) {
1180+
if (unlikely(ddt->ddt_version == DDT_VERSION_UNCONFIGURED)) {
11751181
/*
11761182
* This is the first use of this DDT since the pool was
11771183
* created; finish getting it ready for use.
@@ -1594,19 +1600,75 @@ ddt_configure(ddt_t *ddt, boolean_t new)
15941600
return (0);
15951601
}
15961602

1603+
static int
1604+
ddt_kstat_update(kstat_t *ksp, int rw)
1605+
{
1606+
ddt_t *ddt = ksp->ks_private;
1607+
ddt_kstats_t *dds = ksp->ks_data;
1608+
1609+
if (rw == KSTAT_WRITE)
1610+
return (SET_ERROR(EACCES));
1611+
1612+
/* Aggregate wmsum counters for lookup stats */
1613+
dds->dds_lookup.value.ui64 =
1614+
wmsum_value(&ddt->ddt_kstat_dds_lookup);
1615+
dds->dds_lookup_live_hit.value.ui64 =
1616+
wmsum_value(&ddt->ddt_kstat_dds_lookup_live_hit);
1617+
dds->dds_lookup_live_wait.value.ui64 =
1618+
wmsum_value(&ddt->ddt_kstat_dds_lookup_live_wait);
1619+
dds->dds_lookup_live_miss.value.ui64 =
1620+
wmsum_value(&ddt->ddt_kstat_dds_lookup_live_miss);
1621+
dds->dds_lookup_existing.value.ui64 =
1622+
wmsum_value(&ddt->ddt_kstat_dds_lookup_existing);
1623+
dds->dds_lookup_new.value.ui64 =
1624+
wmsum_value(&ddt->ddt_kstat_dds_lookup_new);
1625+
dds->dds_lookup_log_hit.value.ui64 =
1626+
wmsum_value(&ddt->ddt_kstat_dds_lookup_log_hit);
1627+
dds->dds_lookup_log_active_hit.value.ui64 =
1628+
wmsum_value(&ddt->ddt_kstat_dds_lookup_log_active_hit);
1629+
dds->dds_lookup_log_flushing_hit.value.ui64 =
1630+
wmsum_value(&ddt->ddt_kstat_dds_lookup_log_flushing_hit);
1631+
dds->dds_lookup_log_miss.value.ui64 =
1632+
wmsum_value(&ddt->ddt_kstat_dds_lookup_log_miss);
1633+
dds->dds_lookup_stored_hit.value.ui64 =
1634+
wmsum_value(&ddt->ddt_kstat_dds_lookup_stored_hit);
1635+
dds->dds_lookup_stored_miss.value.ui64 =
1636+
wmsum_value(&ddt->ddt_kstat_dds_lookup_stored_miss);
1637+
1638+
/* Sync-only counters are already set directly in kstats */
1639+
1640+
return (0);
1641+
}
1642+
15971643
static void
15981644
ddt_table_alloc_kstats(ddt_t *ddt)
15991645
{
16001646
char *mod = kmem_asprintf("zfs/%s", spa_name(ddt->ddt_spa));
16011647
char *name = kmem_asprintf("ddt_stats_%s",
16021648
zio_checksum_table[ddt->ddt_checksum].ci_name);
16031649

1650+
/* Initialize wmsums for lookup counters */
1651+
wmsum_init(&ddt->ddt_kstat_dds_lookup, 0);
1652+
wmsum_init(&ddt->ddt_kstat_dds_lookup_live_hit, 0);
1653+
wmsum_init(&ddt->ddt_kstat_dds_lookup_live_wait, 0);
1654+
wmsum_init(&ddt->ddt_kstat_dds_lookup_live_miss, 0);
1655+
wmsum_init(&ddt->ddt_kstat_dds_lookup_existing, 0);
1656+
wmsum_init(&ddt->ddt_kstat_dds_lookup_new, 0);
1657+
wmsum_init(&ddt->ddt_kstat_dds_lookup_log_hit, 0);
1658+
wmsum_init(&ddt->ddt_kstat_dds_lookup_log_active_hit, 0);
1659+
wmsum_init(&ddt->ddt_kstat_dds_lookup_log_flushing_hit, 0);
1660+
wmsum_init(&ddt->ddt_kstat_dds_lookup_log_miss, 0);
1661+
wmsum_init(&ddt->ddt_kstat_dds_lookup_stored_hit, 0);
1662+
wmsum_init(&ddt->ddt_kstat_dds_lookup_stored_miss, 0);
1663+
16041664
ddt->ddt_ksp = kstat_create(mod, 0, name, "misc", KSTAT_TYPE_NAMED,
16051665
sizeof (ddt_kstats_t) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
16061666
if (ddt->ddt_ksp != NULL) {
16071667
ddt_kstats_t *dds = kmem_alloc(sizeof (ddt_kstats_t), KM_SLEEP);
16081668
memcpy(dds, &ddt_kstats_template, sizeof (ddt_kstats_t));
16091669
ddt->ddt_ksp->ks_data = dds;
1670+
ddt->ddt_ksp->ks_update = ddt_kstat_update;
1671+
ddt->ddt_ksp->ks_private = ddt;
16101672
kstat_install(ddt->ddt_ksp);
16111673
}
16121674

@@ -1648,6 +1710,20 @@ ddt_table_free(ddt_t *ddt)
16481710
kstat_delete(ddt->ddt_ksp);
16491711
}
16501712

1713+
/* Cleanup wmsums for lookup counters */
1714+
wmsum_fini(&ddt->ddt_kstat_dds_lookup);
1715+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_live_hit);
1716+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_live_wait);
1717+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_live_miss);
1718+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_existing);
1719+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_new);
1720+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_log_hit);
1721+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_log_active_hit);
1722+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_log_flushing_hit);
1723+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_log_miss);
1724+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_stored_hit);
1725+
wmsum_fini(&ddt->ddt_kstat_dds_lookup_stored_miss);
1726+
16511727
ddt_log_free(ddt);
16521728
ASSERT0(avl_numnodes(&ddt->ddt_tree));
16531729
ASSERT0(avl_numnodes(&ddt->ddt_repair_tree));

0 commit comments

Comments
 (0)