diff --git a/NEWS.md b/NEWS.md index 96608cf9..815a5f90 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,16 @@ ## Changes in this version: +### ENHANCEMENTS: + +- Added `--no-i-r-skip-unchanged` option to provide accurate progress reporting + for resumed transfers. This option pre-scans the destination to identify + unchanged files, skips them from processing, and adjusts `stats.total_size` + accordingly. This fixes the issue where interrupted transfers show incorrect + progress percentages (e.g., 1% to 80% instead of 0% to 100%) when resumed. + The option works for all transfer types (local, local→remote, remote→local, + daemon) and implies `--no-i-r`. + ### BUG FIXES: - ... diff --git a/generator.c b/generator.c index b56fa569..a67f58be 100644 --- a/generator.c +++ b/generator.c @@ -30,9 +30,12 @@ extern int stdout_format_has_i; extern int logfile_format_has_i; extern int am_root; extern int am_server; +extern int am_sender; extern int am_daemon; extern int inc_recurse; +extern int no_i_r_skip_unchanged; extern int relative_paths; +extern struct stats stats; extern int implied_dirs; extern int keep_dirlinks; extern int write_devices; @@ -1242,6 +1245,13 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, return; } + if (!F_IS_ACTIVE(file)) { + /* File was marked as inactive (unchanged) during pre-scan */ + if (DEBUG_GTE(GENR, 2)) + rprintf(FINFO, "skipping inactive file: %s\n", fname); + return; + } + maybe_ATTRS_ACCURATE_TIME = always_checksum ? ATTRS_ACCURATE_TIME : 0; if (skip_dir) { @@ -2223,6 +2233,73 @@ void check_for_finished_files(int itemizing, enum logcode code, int check_redo) } } +/* Pre-scan the file list to mark unchanged files and adjust stats.total_size. + * This allows accurate progress reporting on resumed transfers. */ +void prescan_for_unchanged(const char *local_name) +{ + int i, active_count = 0, skipped_count = 0; + char fbuf[MAXPATHLEN]; + STRUCT_STAT st; + + /* Only prescan if feature is enabled */ + if (!no_i_r_skip_unchanged || !cur_flist) + return; + + if (DEBUG_GTE(GENR, 1)) + rprintf(FINFO, "pre-scanning for unchanged files\n"); + + for (i = 0; i < cur_flist->used; i++) { + struct file_struct *file = cur_flist->files[i]; + enum filetype ftype; + + if (!file || !F_IS_ACTIVE(file)) + continue; + + ftype = get_file_type(file->mode); + + /* Only check regular files */ + if (ftype != FT_REG) { + active_count++; + continue; + } + + /* Construct destination path */ + if (local_name) + strlcpy(fbuf, local_name, sizeof fbuf); + else + f_name(file, fbuf); + + /* Stat destination file */ + if (do_stat(fbuf, &st) < 0) { + active_count++; + continue; + } + + /* Check if file is unchanged */ + if (quick_check_ok(ftype, fbuf, file, &st)) { + if (DEBUG_GTE(GENR, 2)) + rprintf(FINFO, "skipping unchanged: %s\n", fbuf); + + /* Subtract from total size for accurate progress */ + stats.total_size -= F_LENGTH(file); + + /* Mark as inactive to remove from file list */ + clear_file(file); + skipped_count++; + } else { + active_count++; + } + } + + /* Update stats to reflect only active files for progress display */ + stats.num_files = active_count; + stats.num_skipped_files = skipped_count; + + if (DEBUG_GTE(GENR, 1)) + rprintf(FINFO, "skipped %d unchanged files, %d active, adjusted size: %.2f GB\n", + skipped_count, active_count, (double)stats.total_size / 1024 / 1024 / 1024); +} + void generate_files(int f_out, const char *local_name) { int i, ndx, next_loopchk = 0; @@ -2278,6 +2355,7 @@ void generate_files(int f_out, const char *local_name) } dflt_perms = (ACCESSPERMS & ~orig_umask); + stats.current_active_index = 0; do { #ifdef SUPPORT_HARD_LINKS diff --git a/io.c b/io.c index 436bed45..38b1b7b4 100644 --- a/io.c +++ b/io.c @@ -1511,6 +1511,15 @@ static void read_a_msg(void) raw_read_buf((char*)&stats.total_read, sizeof stats.total_read); iobuf.in_multiplexed = 1; break; + case MSG_FLIST_COUNT: + if (msg_bytes != 12 || !am_sender) + goto invalid_msg; + val = raw_read_int(); + stats.num_skipped_files = val; + stats.num_files -= val; + raw_read_buf((char*)&stats.total_size, 8); + iobuf.in_multiplexed = 1; + break; case MSG_REDO: if (msg_bytes != 4 || !am_generator) goto invalid_msg; diff --git a/main.c b/main.c index 9d764e16..5dd59673 100644 --- a/main.c +++ b/main.c @@ -40,6 +40,7 @@ extern int am_server; extern int am_sender; extern int am_daemon; extern int inc_recurse; +extern int no_i_r_skip_unchanged; extern int blocking_io; extern int always_checksum; extern int remove_source_files; @@ -1029,6 +1030,18 @@ static int do_recv(int f_in, int f_out, char *local_name) io_flush(FULL_FLUSH); + /* Pre-scan for unchanged files before forking so both processes get updated stats */ + prescan_for_unchanged(local_name); + + /* Send adjusted stats to sender for accurate progress (local→remote transfers) */ + if (no_i_r_skip_unchanged && stats.num_skipped_files > 0) { + char buf[12]; + SIVAL(buf, 0, stats.num_skipped_files); + SIVAL64(buf, 4, stats.total_size); + send_msg(MSG_FLIST_COUNT, buf, 12, 0); + io_flush(NORMAL_FLUSH); + } + if ((pid = do_fork()) == -1) { rsyserr(FERROR, errno, "fork failed in do_recv"); exit_cleanup(RERR_IPC); diff --git a/options.c b/options.c index 74b39bf6..f31a5311 100644 --- a/options.c +++ b/options.c @@ -112,6 +112,7 @@ int human_readable = 1; int recurse = 0; int mkpath_dest_arg = 0; int allow_inc_recurse = 1; +int no_i_r_skip_unchanged = 0; int xfer_dirs = -1; int am_daemon = 0; int connect_timeout = 0; @@ -585,7 +586,7 @@ enum {OPT_SERVER = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM, OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_BLOCK_SIZE, OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT, OPT_STDERR, OPT_OLD_COMPRESS, OPT_NEW_COMPRESS, OPT_NO_COMPRESS, OPT_OLD_ARGS, - OPT_STOP_AFTER, OPT_STOP_AT, + OPT_STOP_AFTER, OPT_STOP_AT, OPT_NO_I_R_SKIP_UNCHANGED, OPT_REFUSED_BASE = 9000}; static struct poptOption long_options[] = { @@ -616,6 +617,9 @@ static struct poptOption long_options[] = { {"no-inc-recursive", 0, POPT_ARG_VAL, &allow_inc_recurse, 0, 0, 0 }, {"i-r", 0, POPT_ARG_VAL, &allow_inc_recurse, 1, 0, 0 }, {"no-i-r", 0, POPT_ARG_VAL, &allow_inc_recurse, 0, 0, 0 }, + {"no-inc-recursive-skip-unchanged", 0, POPT_ARG_NONE, 0, OPT_NO_I_R_SKIP_UNCHANGED, 0, 0 }, + {"no-i-r-skip-unchanged", 0, POPT_ARG_NONE, 0, OPT_NO_I_R_SKIP_UNCHANGED, 0, 0 }, + {"no-i-r-s-u", 0, POPT_ARG_NONE, 0, OPT_NO_I_R_SKIP_UNCHANGED, 0, 0 }, {"dirs", 'd', POPT_ARG_VAL, &xfer_dirs, 2, 0, 0 }, {"no-dirs", 0, POPT_ARG_VAL, &xfer_dirs, 0, 0, 0 }, {"no-d", 0, POPT_ARG_VAL, &xfer_dirs, 0, 0, 0 }, @@ -1900,6 +1904,11 @@ int parse_arguments(int *argc_p, const char ***argv_p) break; #endif + case OPT_NO_I_R_SKIP_UNCHANGED: + no_i_r_skip_unchanged = 1; + allow_inc_recurse = 0; + break; + case OPT_STDERR: { int len; arg = poptGetOptArg(pc); @@ -2987,6 +2996,9 @@ void server_options(char **args, int *argc_p) if (mkpath_dest_arg && am_sender) args[ac++] = "--mkpath"; + if (no_i_r_skip_unchanged) + args[ac++] = "--no-i-r-skip-unchanged"; + if (ac > MAX_SERVER_ARGS) { /* Not possible... */ rprintf(FERROR, "argc overflow in server_options().\n"); exit_cleanup(RERR_MALLOC); diff --git a/progress.c b/progress.c index 87207fbf..d5f70bf2 100644 --- a/progress.c +++ b/progress.c @@ -78,7 +78,7 @@ static void rprint_progress(OFF_T ofs, OFF_T size, struct timeval *now, int is_l int len = snprintf(eol, sizeof eol, " (xfr#%d, %s-chk=%d/%d)\n", stats.xferred_files, flist_eof ? "to" : "ir", - stats.num_files - current_file_index - 1, + stats.num_files - stats.current_active_index, stats.num_files); if (INFO_GTE(PROGRESS, 2)) { static int last_len = 0; @@ -153,6 +153,10 @@ void set_current_file_index(struct file_struct *file, int ndx) else current_file_index = ndx; current_file_index -= cur_flist->flist_num; + + /* Track active file index for accurate progress with --no-i-r-skip-unchanged */ + if (file && F_IS_ACTIVE(file)) + stats.current_active_index++; } void instant_progress(const char *fname) diff --git a/rsync.1.md b/rsync.1.md index 2b4b7508..e00e1351 100644 --- a/rsync.1.md +++ b/rsync.1.md @@ -909,6 +909,43 @@ expand it. before it begins to transfer files. See [`--inc-recursive`](#opt) for more info. +0. `--no-inc-recursive-skip-unchanged`, `--no-i-r-skip-unchanged`, `--no-i-r-s-u` + + This option combines [`--no-i-r`](#opt) with pre-scanning to skip unchanged + files, providing accurate progress reporting for resumed transfers. When + using [`--info=progress2`](#opt), interrupted transfers that are resumed + normally show incorrect progress percentages because `stats.total_size` + includes already-transferred files. This option pre-scans the destination + during generator initialization, marks unchanged files for skipping, and + adjusts `stats.total_size` accordingly, resulting in accurate 0% to 100% + progress reporting. + + The pre-scan uses the same comparison logic as normal rsync operations + (checking size, mtime, checksums if [`--checksum`](#opt) is used, etc.). + Files determined to be unchanged are completely skipped from processing, + reducing both CPU and I/O overhead while fixing progress reporting. + + This option works for all transfer types: local-to-local, local-to-remote, + remote-to-local, and daemon transfers. Because the generator runs on the + receiver side and has access to destination files in all scenarios, the + feature functions correctly regardless of transfer direction. + + This option implies [`--no-i-r`](#opt), so it requires the full file list + to be available before processing begins. The performance overhead is + minimal since the pre-scan performs the same stat operations that would + occur anyway during normal generator operation, just earlier in the pipeline. + + Example use cases: + + - Resuming interrupted transfers with accurate progress: + `rsync -av --no-i-r-s-u --info=progress2 src/ host:dest/` + + - Large synchronization with mostly unchanged files: + `rsync -av --no-inc-recursive-skip-unchanged /data/ /backup/` + + - Remote-to-local transfer with progress tracking: + `rsync -av --no-i-r-skip-unchanged --info=progress2 host:src/ dest/` + 0. `--relative`, `-R` Use relative paths. This means that the full path names specified on the diff --git a/rsync.h b/rsync.h index 479ac484..50f8d1e8 100644 --- a/rsync.h +++ b/rsync.h @@ -269,6 +269,7 @@ enum msgcode { MSG_LOG=FLOG, MSG_CLIENT=FCLIENT, /* sibling logging */ MSG_REDO=9, /* reprocess indicated flist index */ MSG_STATS=10, /* message has stats data for generator */ + MSG_FLIST_COUNT=11, /* receiver sends adjusted stats to sender after prescan */ MSG_IO_ERROR=22,/* the sending side had an I/O error */ MSG_IO_TIMEOUT=33,/* tell client about a daemon's timeout value */ MSG_NOOP=42, /* a do-nothing message (legacy protocol-30 only) */ @@ -1044,6 +1045,8 @@ struct stats { int created_files, created_dirs, created_symlinks, created_devices, created_specials; int deleted_files, deleted_dirs, deleted_symlinks, deleted_devices, deleted_specials; int xferred_files; + int num_skipped_files; /* files marked as unchanged by --no-i-r-skip-unchanged */ + int current_active_index; /* current index among active files (excluding skipped) */ }; struct chmod_mode_struct;