Skip to content

Commit 894edd0

Browse files
authored
Add TXG timestamp database
This feature enables tracking of when TXGs are committed to disk, providing an estimated timestamp for each TXG. With this information, it becomes possible to perform scrubs based on specific date ranges, improving the granularity of data management and recovery operations. Reviewed-by: Tony Hutter <[email protected]> Reviewed-by: Brian Behlendorf <[email protected]> Reviewed-by: Alexander Motin <[email protected]> Reviewed-by: Paul Dagnelie <[email protected]> Signed-off-by: Mariusz Zaborski <[email protected]> Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Closes #16853
1 parent c3496b5 commit 894edd0

File tree

21 files changed

+736
-10
lines changed

21 files changed

+736
-10
lines changed

cmd/zpool/zpool_main.c

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -513,8 +513,8 @@ get_usage(zpool_help_t idx)
513513
return (gettext("\tinitialize [-c | -s | -u] [-w] <-a | <pool> "
514514
"[<device> ...]>\n"));
515515
case HELP_SCRUB:
516-
return (gettext("\tscrub [-e | -s | -p | -C] [-w] <-a | "
517-
"<pool> [<pool> ...]>\n"));
516+
return (gettext("\tscrub [-e | -s | -p | -C | -E | -S] [-w] "
517+
"<-a | <pool> [<pool> ...]>\n"));
518518
case HELP_RESILVER:
519519
return (gettext("\tresilver <pool> ...\n"));
520520
case HELP_TRIM:
@@ -8359,6 +8359,8 @@ zpool_do_reopen(int argc, char **argv)
83598359
typedef struct scrub_cbdata {
83608360
int cb_type;
83618361
pool_scrub_cmd_t cb_scrub_cmd;
8362+
time_t cb_date_start;
8363+
time_t cb_date_end;
83628364
} scrub_cbdata_t;
83638365

83648366
static boolean_t
@@ -8402,8 +8404,8 @@ scrub_callback(zpool_handle_t *zhp, void *data)
84028404
return (1);
84038405
}
84048406

8405-
err = zpool_scan(zhp, cb->cb_type, cb->cb_scrub_cmd);
8406-
8407+
err = zpool_scan_range(zhp, cb->cb_type, cb->cb_scrub_cmd,
8408+
cb->cb_date_start, cb->cb_date_end);
84078409
if (err == 0 && zpool_has_checkpoint(zhp) &&
84088410
cb->cb_type == POOL_SCAN_SCRUB) {
84098411
(void) printf(gettext("warning: will not scrub state that "
@@ -8421,10 +8423,34 @@ wait_callback(zpool_handle_t *zhp, void *data)
84218423
return (zpool_wait(zhp, *act));
84228424
}
84238425

8426+
static time_t
8427+
date_string_to_sec(const char *timestr, boolean_t rounding)
8428+
{
8429+
struct tm tm = {0};
8430+
int adjustment = rounding ? 1 : 0;
8431+
8432+
/* Allow mktime to determine timezone. */
8433+
tm.tm_isdst = -1;
8434+
8435+
if (strptime(timestr, "%Y-%m-%d %H:%M", &tm) == NULL) {
8436+
if (strptime(timestr, "%Y-%m-%d", &tm) == NULL) {
8437+
fprintf(stderr, gettext("Failed to parse the date.\n"));
8438+
usage(B_FALSE);
8439+
}
8440+
adjustment *= 24 * 60 * 60;
8441+
} else {
8442+
adjustment *= 60;
8443+
}
8444+
8445+
return (mktime(&tm) + adjustment);
8446+
}
8447+
84248448
/*
8425-
* zpool scrub [-e | -s | -p | -C] [-w] <pool> ...
8449+
* zpool scrub [-e | -s | -p | -C | -E | -S] [-w] <pool> ...
84268450
*
84278451
* -e Only scrub blocks in the error log.
8452+
* -E End date of scrub.
8453+
* -S Start date of scrub.
84288454
* -s Stop. Stops any in-progress scrub.
84298455
* -p Pause. Pause in-progress scrub.
84308456
* -w Wait. Blocks until scrub has completed.
@@ -8440,6 +8466,7 @@ zpool_do_scrub(int argc, char **argv)
84408466

84418467
cb.cb_type = POOL_SCAN_SCRUB;
84428468
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
8469+
cb.cb_date_start = cb.cb_date_end = 0;
84438470

84448471
boolean_t is_error_scrub = B_FALSE;
84458472
boolean_t is_pause = B_FALSE;
@@ -8448,17 +8475,27 @@ zpool_do_scrub(int argc, char **argv)
84488475
boolean_t scrub_all = B_FALSE;
84498476

84508477
/* check options */
8451-
while ((c = getopt(argc, argv, "aspweC")) != -1) {
8478+
while ((c = getopt(argc, argv, "aspweCE:S:")) != -1) {
84528479
switch (c) {
84538480
case 'a':
84548481
scrub_all = B_TRUE;
84558482
break;
84568483
case 'e':
84578484
is_error_scrub = B_TRUE;
84588485
break;
8486+
case 'E':
8487+
/*
8488+
* Round the date. It's better to scrub more data than
8489+
* less. This also makes the date inclusive.
8490+
*/
8491+
cb.cb_date_end = date_string_to_sec(optarg, B_TRUE);
8492+
break;
84598493
case 's':
84608494
is_stop = B_TRUE;
84618495
break;
8496+
case 'S':
8497+
cb.cb_date_start = date_string_to_sec(optarg, B_FALSE);
8498+
break;
84628499
case 'p':
84638500
is_pause = B_TRUE;
84648501
break;
@@ -8506,6 +8543,19 @@ zpool_do_scrub(int argc, char **argv)
85068543
}
85078544
}
85088545

8546+
if ((cb.cb_date_start != 0 || cb.cb_date_end != 0) &&
8547+
cb.cb_scrub_cmd != POOL_SCRUB_NORMAL) {
8548+
(void) fprintf(stderr, gettext("invalid option combination: "
8549+
"start/end date is available only with normal scrub\n"));
8550+
usage(B_FALSE);
8551+
}
8552+
if (cb.cb_date_start != 0 && cb.cb_date_end != 0 &&
8553+
cb.cb_date_start > cb.cb_date_end) {
8554+
(void) fprintf(stderr, gettext("invalid arguments: "
8555+
"end date has to be later than start date\n"));
8556+
usage(B_FALSE);
8557+
}
8558+
85098559
if (wait && (cb.cb_type == POOL_SCAN_NONE ||
85108560
cb.cb_scrub_cmd == POOL_SCRUB_PAUSE)) {
85118561
(void) fprintf(stderr, gettext("invalid option combination: "
@@ -8546,6 +8596,7 @@ zpool_do_resilver(int argc, char **argv)
85468596

85478597
cb.cb_type = POOL_SCAN_RESILVER;
85488598
cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
8599+
cb.cb_date_start = cb.cb_date_end = 0;
85498600

85508601
/* check options */
85518602
while ((c = getopt(argc, argv, "")) != -1) {

include/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ COMMON_H = \
1010
cityhash.h \
1111
zfeature_common.h \
1212
zfs_comutil.h \
13+
zfs_crrd.h \
1314
zfs_deleg.h \
1415
zfs_fletcher.h \
1516
zfs_namecheck.h \

include/libzfs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,8 @@ typedef struct initialize_cbdata {
302302
* Functions to manipulate pool and vdev state
303303
*/
304304
_LIBZFS_H int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
305+
_LIBZFS_H int zpool_scan_range(zpool_handle_t *, pool_scan_func_t,
306+
pool_scrub_cmd_t, time_t, time_t);
305307
_LIBZFS_H int zpool_initialize_one(zpool_handle_t *, void *);
306308
_LIBZFS_H int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
307309
nvlist_t *);

include/sys/dmu.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,9 @@ typedef struct dmu_buf {
414414
#define DMU_POOL_ZPOOL_CHECKPOINT "com.delphix:zpool_checkpoint"
415415
#define DMU_POOL_LOG_SPACEMAP_ZAP "com.delphix:log_spacemap_zap"
416416
#define DMU_POOL_DELETED_CLONES "com.delphix:deleted_clones"
417+
#define DMU_POOL_TXG_LOG_TIME_MINUTES "com.klaraystems:txg_log_time:minutes"
418+
#define DMU_POOL_TXG_LOG_TIME_DAYS "com.klaraystems:txg_log_time:days"
419+
#define DMU_POOL_TXG_LOG_TIME_MONTHS "com.klaraystems:txg_log_time:months"
417420

418421
/*
419422
* Allocate an object from this objset. The range of object numbers

include/sys/spa_impl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@
5555
#include <sys/dsl_deadlist.h>
5656
#include <zfeature_common.h>
5757

58+
#include "zfs_crrd.h"
59+
5860
#ifdef __cplusplus
5961
extern "C" {
6062
#endif
@@ -344,6 +346,12 @@ struct spa {
344346
spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */
345347
zthr_t *spa_checkpoint_discard_zthr;
346348

349+
kmutex_t spa_txg_log_time_lock; /* for spa_txg_log_time */
350+
dbrrd_t spa_txg_log_time;
351+
uint64_t spa_last_noted_txg;
352+
uint64_t spa_last_noted_txg_time;
353+
uint64_t spa_last_flush_txg_time;
354+
347355
space_map_t *spa_syncing_log_sm; /* current log space map */
348356
avl_tree_t spa_sm_logs_by_txg;
349357
kmutex_t spa_flushed_ms_lock; /* for metaslabs_by_flushed */

include/zfs_crrd.h

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// SPDX-License-Identifier: CDDL-1.0
2+
/*
3+
* CDDL HEADER START
4+
*
5+
* The contents of this file are subject to the terms of the
6+
* Common Development and Distribution License (the "License").
7+
* You may not use this file except in compliance with the License.
8+
*
9+
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10+
* or https://opensource.org/licenses/CDDL-1.0.
11+
* See the License for the specific language governing permissions
12+
* and limitations under the License.
13+
*
14+
* When distributing Covered Code, include this CDDL HEADER in each
15+
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16+
* If applicable, add the following below this CDDL HEADER, with the
17+
* fields enclosed by brackets "[]" replaced with your own identifying
18+
* information: Portions Copyright [yyyy] [name of copyright owner]
19+
*
20+
* CDDL HEADER END
21+
*/
22+
/*
23+
* Copyright (c) 2024 Klara Inc.
24+
*
25+
* This software was developed by
26+
* Mariusz Zaborski <[email protected]>
27+
* Fred Weigel <[email protected]>
28+
* under sponsorship from Wasabi Technology, Inc. and Klara Inc.
29+
*/
30+
31+
#ifndef _CRRD_H_
32+
#define _CRRD_H_
33+
34+
#define RRD_MAX_ENTRIES 256
35+
36+
#define RRD_ENTRY_SIZE sizeof (uint64_t)
37+
#define RRD_STRUCT_ELEM (sizeof (rrd_t) / RRD_ENTRY_SIZE)
38+
39+
typedef enum {
40+
DBRRD_FLOOR,
41+
DBRRD_CEILING
42+
} dbrrd_rounding_t;
43+
44+
typedef struct {
45+
uint64_t rrdd_time;
46+
uint64_t rrdd_txg;
47+
} rrd_data_t;
48+
49+
typedef struct {
50+
uint64_t rrd_head; /* head (beginning) */
51+
uint64_t rrd_tail; /* tail (end) */
52+
uint64_t rrd_length;
53+
54+
rrd_data_t rrd_entries[RRD_MAX_ENTRIES];
55+
} rrd_t;
56+
57+
typedef struct {
58+
rrd_t dbr_minutes;
59+
rrd_t dbr_days;
60+
rrd_t dbr_months;
61+
} dbrrd_t;
62+
63+
size_t rrd_len(rrd_t *rrd);
64+
65+
const rrd_data_t *rrd_entry(rrd_t *r, size_t i);
66+
rrd_data_t *rrd_tail_entry(rrd_t *rrd);
67+
uint64_t rrd_tail(rrd_t *rrd);
68+
uint64_t rrd_get(rrd_t *rrd, size_t i);
69+
70+
void rrd_add(rrd_t *rrd, hrtime_t time, uint64_t txg);
71+
72+
void dbrrd_add(dbrrd_t *db, hrtime_t time, uint64_t txg);
73+
uint64_t dbrrd_query(dbrrd_t *r, hrtime_t tv, dbrrd_rounding_t rouding);
74+
75+
#endif

lib/libzfs/libzfs.abi

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,7 @@
574574
<elf-symbol name='zpool_reguid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
575575
<elf-symbol name='zpool_reopen_one' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
576576
<elf-symbol name='zpool_scan' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
577+
<elf-symbol name='zpool_scan_range' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
577578
<elf-symbol name='zpool_search_import' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
578579
<elf-symbol name='zpool_set_bootenv' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
579580
<elf-symbol name='zpool_set_guid' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@@ -6946,6 +6947,14 @@
69466947
<parameter type-id='b51cf3c2' name='cmd'/>
69476948
<return type-id='95e97e5e'/>
69486949
</function-decl>
6950+
<function-decl name='zpool_scan_range' mangled-name='zpool_scan_range' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_scan_range'>
6951+
<parameter type-id='4c81de99' name='zhp'/>
6952+
<parameter type-id='7313fbe2' name='func'/>
6953+
<parameter type-id='b51cf3c2' name='cmd'/>
6954+
<parameter type-id='c9d12d66' name='date_start'/>
6955+
<parameter type-id='c9d12d66' name='date_end'/>
6956+
<return type-id='95e97e5e'/>
6957+
</function-decl>
69496958
<function-decl name='zpool_find_vdev_by_physpath' mangled-name='zpool_find_vdev_by_physpath' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zpool_find_vdev_by_physpath'>
69506959
<parameter type-id='4c81de99' name='zhp'/>
69516960
<parameter type-id='80f4b756' name='ppath'/>

lib/libzfs/libzfs_pool.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2773,7 +2773,13 @@ zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
27732773
* Scan the pool.
27742774
*/
27752775
int
2776-
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
2776+
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd) {
2777+
return (zpool_scan_range(zhp, func, cmd, 0, 0));
2778+
}
2779+
2780+
int
2781+
zpool_scan_range(zpool_handle_t *zhp, pool_scan_func_t func,
2782+
pool_scrub_cmd_t cmd, time_t date_start, time_t date_end)
27772783
{
27782784
char errbuf[ERRBUFLEN];
27792785
int err;
@@ -2782,6 +2788,8 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
27822788
nvlist_t *args = fnvlist_alloc();
27832789
fnvlist_add_uint64(args, "scan_type", (uint64_t)func);
27842790
fnvlist_add_uint64(args, "scan_command", (uint64_t)cmd);
2791+
fnvlist_add_uint64(args, "scan_date_start", (uint64_t)date_start);
2792+
fnvlist_add_uint64(args, "scan_date_end", (uint64_t)date_end);
27852793

27862794
err = lzc_scrub(ZFS_IOC_POOL_SCRUB, zhp->zpool_name, args, NULL);
27872795
fnvlist_free(args);

lib/libzpool/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ nodist_libzpool_la_SOURCES = \
177177
module/zfs/zfs_byteswap.c \
178178
module/zfs/zfs_chksum.c \
179179
module/zfs/zfs_debug_common.c \
180+
module/zfs/zfs_crrd.c \
180181
module/zfs/zfs_fm.c \
181182
module/zfs/zfs_fuid.c \
182183
module/zfs/zfs_ratelimit.c \

man/man4/zfs.4

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2246,6 +2246,21 @@ Defer frees starting in this pass.
22462246
Maximum memory used for prefetching a checkpoint's space map on each
22472247
vdev while discarding the checkpoint.
22482248
.
2249+
.It Sy zfs_spa_note_txg_time Ns = Ns Sy 600 Pq uint
2250+
This parameter defines, in seconds, how often the TXG time database will record
2251+
a new TXG if it has changed.
2252+
After the specified time interval has passed, and if the TXG number has changed,
2253+
the new value is recorded in the database.
2254+
These timestamps can later be used for more granular operations, such as
2255+
scrubbing.
2256+
.
2257+
.It Sy zfs_spa_flush_txg_time Ns = Ns Sy 600 Pq uint
2258+
This parameter defines, in seconds, how often the ZFS will flush
2259+
the TXG time database to disk.
2260+
It ensures that the data is actually written to persistent storage, which helps
2261+
preserve the database in case of unexpected shutdown.
2262+
The database is also automatically flushed during the export sequence.
2263+
.
22492264
.It Sy zfs_special_class_metadata_reserve_pct Ns = Ns Sy 25 Ns % Pq uint
22502265
Only allow small data blocks to be allocated on the special and dedup vdev
22512266
types when the available free space percentage on these vdevs exceeds this

0 commit comments

Comments
 (0)