Skip to content

Commit 8533df9

Browse files
committed
fix(dwarfsextract): progress didn't work with patterns (fixes gh #316)
1 parent 6394184 commit 8533df9

File tree

5 files changed

+123
-37
lines changed

5 files changed

+123
-37
lines changed

include/dwarfs/utility/filesystem_extractor.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
#pragma once
3030

3131
#include <filesystem>
32-
#include <functional>
3332
#include <memory>
33+
#include <optional>
3434
#include <ostream>
3535
#include <string>
3636
#include <string_view>
@@ -57,11 +57,16 @@ struct filesystem_extractor_archive_format;
5757
struct filesystem_extractor_options {
5858
size_t max_queued_bytes{static_cast<size_t>(512) << 20};
5959
bool continue_on_error{false};
60-
std::function<void(std::string_view, uint64_t, uint64_t)> progress;
60+
bool enable_progress{true};
6161
};
6262

6363
class filesystem_extractor {
6464
public:
65+
struct progress_info {
66+
uint64_t extracted_bytes{0};
67+
std::optional<uint64_t> total_bytes{};
68+
};
69+
6570
filesystem_extractor(logger& lgr, os_access const& os,
6671
std::shared_ptr<file_access const> fa = nullptr);
6772

@@ -96,6 +101,8 @@ class filesystem_extractor {
96101
return impl_->extract(fs, matcher, opts);
97102
}
98103

104+
progress_info get_progress() const { return impl_->get_progress(); }
105+
99106
class impl {
100107
public:
101108
virtual ~impl() = default;
@@ -111,6 +118,7 @@ class filesystem_extractor {
111118
virtual bool
112119
extract(reader::filesystem_v2_lite const& fs, glob_matcher const* matcher,
113120
filesystem_extractor_options const& opts) = 0;
121+
virtual progress_info get_progress() const = 0;
114122
};
115123

116124
private:

src/utility/filesystem_extractor.cpp

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,21 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
254254
extract(reader::filesystem_v2_lite const& fs, glob_matcher const* matcher,
255255
filesystem_extractor_options const& opts) override;
256256

257+
filesystem_extractor::progress_info get_progress() const override {
258+
filesystem_extractor::progress_info pi;
259+
260+
{
261+
std::lock_guard lock(bytes_total_mx_);
262+
if (bytes_total_) {
263+
pi.total_bytes = *bytes_total_;
264+
}
265+
}
266+
267+
pi.extracted_bytes = bytes_written_.load();
268+
269+
return pi;
270+
}
271+
257272
private:
258273
static la_ssize_t on_stream_write(struct archive* /*a*/, void* client_data,
259274
void const* buffer, size_t length) {
@@ -372,6 +387,9 @@ class filesystem_extractor_ final : public filesystem_extractor::impl {
372387
std::array<int, 2> pipefd_{-1, -1};
373388
std::unique_ptr<std::thread> iot_;
374389
sparse_file_mode sparse_mode_{sparse_file_mode::auto_detect};
390+
std::atomic<uint64_t> bytes_written_{0};
391+
std::mutex mutable bytes_total_mx_;
392+
std::optional<uint64_t> bytes_total_{};
375393
};
376394

377395
template <typename LoggerPolicy>
@@ -420,18 +438,12 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
420438
});
421439
}
422440

423-
vfs_stat vfs;
424-
fs.statvfs(&vfs);
425-
426441
std::atomic<size_t> hard_error{0};
427442
std::atomic<size_t> soft_error{0};
428-
std::atomic<uint64_t> bytes_written{0};
429-
uint64_t const bytes_total{vfs.blocks};
430443

431444
auto do_archive = [&](worker_ptr aptr, std::shared_ptr<::archive_entry> ae,
432445
reader::inode_view const& entry) {
433-
bool added{false};
434-
446+
// hard links will have size 0
435447
if (auto const size = ::archive_entry_size(ae.get());
436448
entry.is_regular_file() && size > 0) {
437449
reader::detail::file_reader fr(fs, entry);
@@ -449,8 +461,7 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
449461
aptr->add_job<struct archive*>(
450462
[this, &hard_error, &soft_error, &opts, extents = std::move(extents),
451463
ranges = fr.read_sequential(data_ranges, sem, opts.max_queued_bytes),
452-
ae = std::move(ae), size, &sparse_mode, &bytes_written,
453-
bytes_total](struct archive* a) mutable {
464+
ae = std::move(ae), size, &sparse_mode](struct archive* a) mutable {
454465
try {
455466
assert(ae);
456467

@@ -521,9 +532,8 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
521532
extents.erase(extents.begin());
522533
}
523534

524-
if (opts.progress) {
525-
bytes_written += r.size();
526-
opts.progress(path, bytes_written, bytes_total);
535+
if (opts.enable_progress) {
536+
bytes_written_ += r.size();
527537
}
528538
}
529539

@@ -541,11 +551,7 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
541551
}
542552
}
543553
});
544-
545-
added = true;
546-
}
547-
548-
if (!added) {
554+
} else {
549555
aptr->add_job<struct archive*>(
550556
[this, ae = std::move(ae), &hard_error](struct archive* a) {
551557
try {
@@ -567,11 +573,22 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
567573
std::unordered_set<std::string> matched_dirs;
568574

569575
if (matcher) {
576+
std::unordered_set<uint32_t> seen_hardlinks;
577+
uint64_t data_size{0};
578+
570579
// Collect all directories that contain matching files to make sure
571580
// we descend into them during the extraction walk below.
572581
fs.walk([&](auto entry) {
573-
if (!entry.inode().is_directory()) {
582+
auto inode = entry.inode();
583+
if (!inode.is_directory()) {
574584
if (matcher->match(entry.unix_path())) {
585+
if (opts.enable_progress) {
586+
auto stat = fs.getattr(inode);
587+
if (stat.nlink() == 1 ||
588+
seen_hardlinks.insert(inode.inode_num()).second) {
589+
data_size += stat.allocated_size();
590+
}
591+
}
575592
while (auto parent = entry.parent()) {
576593
if (!matched_dirs.insert(parent->unix_path()).second) {
577594
break;
@@ -581,6 +598,16 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
581598
}
582599
}
583600
});
601+
602+
if (opts.enable_progress) {
603+
std::lock_guard lock(bytes_total_mx_);
604+
bytes_total_.emplace(data_size);
605+
}
606+
} else if (opts.enable_progress) {
607+
vfs_stat vfs;
608+
fs.statvfs(&vfs);
609+
std::lock_guard lock(bytes_total_mx_);
610+
bytes_total_.emplace(vfs.blocks);
584611
}
585612

586613
auto do_archive_entry = [&](auto const& entry) {
@@ -642,8 +669,8 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
642669
if (ec) {
643670
LOG_ERROR << "readlink() failed: " << ec.message();
644671
}
645-
if (opts.progress) {
646-
bytes_written += link.size();
672+
if (opts.enable_progress) {
673+
bytes_written_ += link.size();
647674
}
648675
#ifdef _WIN32
649676
std::filesystem::path linkpath(string_to_u8string(link));
@@ -720,7 +747,7 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
720747
return false;
721748
}
722749

723-
LOG_INFO << "extraction finished without errors";
750+
LOG_VERBOSE << "extraction finished without errors";
724751

725752
return true;
726753
}

test/tool_main_perfmon_test.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ TEST(dwarfsextract_test, perfmon) {
4343
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.readv_future]"));
4444
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.getattr]"));
4545
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.readlink_ec]"));
46-
EXPECT_THAT(errs, ::testing::HasSubstr("[filesystem_v2.statvfs]"));
4746
EXPECT_THAT(errs, ::testing::HasSubstr("[inode_reader_v2.readv_future]"));
4847
static std::regex const perfmon_re{R"(\[filesystem_v2\.getattr\])"
4948
R"(\s+samples:\s+\d+)"

test/tools_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1952,7 +1952,7 @@ TEST(tools_test, dwarfsextract_progress) {
19521952
#endif
19531953

19541954
ASSERT_TRUE(out);
1955-
EXPECT_GT(out->size(), 100) << *out;
1955+
EXPECT_GT(out->size(), 1) << *out;
19561956
#ifdef _WIN32
19571957
EXPECT_THAT(*out, ::testing::EndsWith("100%\r\n"));
19581958
#else

tools/src/dwarfsextract_main.cpp

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
* SPDX-License-Identifier: MIT
2727
*/
2828

29+
#include <chrono>
2930
#include <exception>
3031
#include <iostream>
3132
#include <string>
@@ -52,6 +53,7 @@
5253
#include <dwarfs_tool_manpage.h>
5354

5455
namespace po = boost::program_options;
56+
using namespace std::chrono_literals;
5557

5658
namespace dwarfs::tool {
5759

@@ -65,6 +67,49 @@ constexpr std::string_view kDash{"-"};
6567
#endif
6668
#endif
6769

70+
class progress_thread {
71+
public:
72+
using fn_type = std::function<void(bool last)>;
73+
74+
progress_thread(std::chrono::nanoseconds interval, fn_type fn)
75+
: thread_([this, interval, fn] { this->run(interval, fn); }) {}
76+
77+
~progress_thread() { stop(); }
78+
79+
void stop() {
80+
try {
81+
{
82+
std::lock_guard lock(running_mx_);
83+
if (!running_) {
84+
return;
85+
}
86+
running_ = false;
87+
}
88+
cond_.notify_all();
89+
thread_.join();
90+
} catch (...) {
91+
DWARFS_PANIC(
92+
fmt::format("exception thrown in writer_progress destructor: {}",
93+
exception_str(std::current_exception())));
94+
}
95+
}
96+
97+
private:
98+
void run(std::chrono::nanoseconds interval, fn_type fn) {
99+
std::unique_lock lock(running_mx_);
100+
do {
101+
fn(false);
102+
cond_.wait_for(lock, interval);
103+
} while (running_);
104+
fn(true);
105+
}
106+
107+
mutable std::mutex running_mx_;
108+
bool running_{true};
109+
std::condition_variable cond_;
110+
std::thread thread_;
111+
};
112+
68113
} // namespace
69114

70115
int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
@@ -247,26 +292,33 @@ int dwarfsextract_main(int argc, sys_char** argv, iolayer const& iol) {
247292

248293
fsx_opts.max_queued_bytes = fsopts.block_cache.max_bytes;
249294
fsx_opts.continue_on_error = continue_on_error;
250-
int prog{-1};
295+
fsx_opts.enable_progress = stdout_progress;
296+
297+
std::optional<progress_thread> prog;
298+
251299
if (stdout_progress) {
252-
fsx_opts.progress = [&prog, &iol](std::string_view, uint64_t extracted,
253-
uint64_t total) {
254-
int p = 100 * extracted / total;
255-
if (p > prog) {
256-
prog = p;
257-
iol.out << "\r" << prog << "%";
258-
iol.out.flush();
259-
}
260-
if (extracted == total) {
261-
iol.out << "\n";
300+
prog.emplace(40ms, [&fsx, &iol](bool last) {
301+
if (!last) {
302+
auto const info = fsx.get_progress();
303+
int pct = 0;
304+
if (info.total_bytes) {
305+
pct = static_cast<int>(100 * info.extracted_bytes /
306+
info.total_bytes.value());
307+
}
308+
iol.out << "\r" << pct << "%";
309+
} else {
310+
iol.out << "\r100%\n";
262311
}
263-
};
312+
iol.out.flush();
313+
});
264314
}
265315

266316
rv = fsx.extract(fs, matcher.get(), fsx_opts) ? 0 : 2;
267317

268318
fsx.close();
269319

320+
prog.reset();
321+
270322
if (perfmon) {
271323
perfmon->summarize(iol.err);
272324
}

0 commit comments

Comments
 (0)