Skip to content

Commit 4f02244

Browse files
committed
fix(filesystem_extractor): handle sparse files/hardlinks progress
1 parent 1066f22 commit 4f02244

File tree

3 files changed

+165
-17
lines changed

3 files changed

+165
-17
lines changed

src/utility/filesystem_extractor.cpp

Lines changed: 57 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,32 @@ sparse_file_mode get_sparse_file_mode_for_format(int format) {
102102
return sparse_file_mode::no_sparse;
103103
}
104104

105+
bool format_supports_hardlinks(int const format) {
106+
switch (format) {
107+
case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
108+
case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
109+
return true;
110+
111+
default:
112+
break;
113+
}
114+
115+
int const fmtbase = format & ARCHIVE_FORMAT_BASE_MASK;
116+
117+
switch (fmtbase) {
118+
case ARCHIVE_FORMAT_ISO9660:
119+
case ARCHIVE_FORMAT_SHAR:
120+
case ARCHIVE_FORMAT_TAR:
121+
case ARCHIVE_FORMAT_XAR:
122+
return true;
123+
124+
default:
125+
break;
126+
}
127+
128+
return false;
129+
}
130+
105131
la_ssize_t write_range_data(sparse_file_mode mode, struct archive* a,
106132
void const* data, size_t size, la_int64_t offset) {
107133
if (mode == sparse_file_mode::sparse_disk) {
@@ -399,17 +425,22 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
399425
DWARFS_CHECK(a_, "filesystem not opened");
400426

401427
auto sparse_mode = sparse_mode_;
428+
bool supports_hardlinks{true};
402429

403430
auto lr = ::archive_entry_linkresolver_new();
404431

405432
scope_exit free_resolver{[&] { ::archive_entry_linkresolver_free(lr); }};
406433

407434
if (auto fmt = ::archive_format(a_.get())) {
435+
LOG_DEBUG << "setting link resolver strategy for format " << fmt;
436+
408437
::archive_entry_linkresolver_set_strategy(lr, fmt);
409438

410439
if (sparse_mode == sparse_file_mode::auto_detect) {
411440
sparse_mode = get_sparse_file_mode_for_format(fmt);
412441
}
442+
443+
supports_hardlinks = format_supports_hardlinks(fmt);
413444
}
414445

415446
::archive_entry* spare = nullptr;
@@ -589,11 +620,14 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
589620

590621
if (opts.enable_progress) {
591622
auto stat = fs.getattr(inode);
592-
if (stat.nlink() == 1 ||
623+
if (!supports_hardlinks || stat.nlink() == 1 ||
593624
seen_hardlinks.insert(inode.inode_num()).second) {
594-
data_size += stat.allocated_size();
625+
data_size += sparse_mode == sparse_file_mode::sparse_disk
626+
? stat.allocated_size()
627+
: stat.size();
595628
}
596629
}
630+
597631
while (auto parent = entry.parent()) {
598632
if (!matched_dirs.insert(parent->unix_path()).second) {
599633
break;
@@ -618,8 +652,19 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
618652
} else if (opts.enable_progress) {
619653
vfs_stat vfs;
620654
fs.statvfs(&vfs);
655+
656+
uint64_t data_size;
657+
658+
if (sparse_mode == sparse_file_mode::sparse_disk) {
659+
data_size = vfs.total_allocated_fs_size;
660+
} else if (supports_hardlinks) {
661+
data_size = vfs.total_fs_size;
662+
} else {
663+
data_size = vfs.total_fs_size + vfs.total_hardlink_size;
664+
}
665+
621666
std::lock_guard lock(bytes_total_mx_);
622-
bytes_total_.emplace(vfs.blocks);
667+
bytes_total_.emplace(data_size);
623668
}
624669

625670
if (opts.enable_progress) {
@@ -766,11 +811,6 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
766811
archiver->wait();
767812
reg_archiver->wait();
768813

769-
if (opts.enable_progress) {
770-
LOG_DEBUG << "progress: " << bytes_written_.load() << "/"
771-
<< bytes_total_.value() << " bytes written";
772-
}
773-
774814
if (hard_error) {
775815
DWARFS_THROW(runtime_error, "extraction aborted");
776816
}
@@ -799,6 +839,15 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
799839
}
800840
}
801841

842+
if (opts.enable_progress) {
843+
DWARFS_CHECK(bytes_written_.load() == bytes_total_.value(),
844+
fmt::format("progress mismatch: {} (written) != {} (total)",
845+
bytes_written_.load(), bytes_total_.value()));
846+
847+
LOG_DEBUG << "progress: " << bytes_written_.load() << "/"
848+
<< bytes_total_.value() << " bytes written";
849+
}
850+
802851
if (soft_error > 0) {
803852
LOG_ERROR << "extraction finished with " << soft_error << " error(s)";
804853
return false;

test/compat_test.cpp

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
#include <dwarfs/checksum.h>
4343
#include <dwarfs/config.h>
4444
#include <dwarfs/file_stat.h>
45+
#include <dwarfs/file_util.h>
46+
#include <dwarfs/glob_matcher.h>
4547
#include <dwarfs/logger.h>
4648
#include <dwarfs/reader/filesystem_options.h>
4749
#include <dwarfs/reader/filesystem_v2.h>
@@ -1161,7 +1163,14 @@ void check_compat(logger& lgr [[maybe_unused]], reader::filesystem_v2 const& fs,
11611163
std::ostringstream oss;
11621164

11631165
EXPECT_NO_THROW(ext.open_stream(oss, {.name = "mtree"}));
1164-
EXPECT_NO_THROW(ext.extract(fs));
1166+
EXPECT_NO_THROW(ext.extract(fs, {.enable_progress = true}));
1167+
1168+
{
1169+
auto const pi = ext.get_progress();
1170+
ASSERT_TRUE(pi.total_bytes.has_value());
1171+
EXPECT_EQ(pi.total_bytes.value(), pi.extracted_bytes);
1172+
}
1173+
11651174
EXPECT_NO_THROW(ext.close());
11661175

11671176
ref_entries.erase("");
@@ -1270,6 +1279,72 @@ void check_compat(logger& lgr [[maybe_unused]], reader::filesystem_v2 const& fs,
12701279
}
12711280
}
12721281

1282+
void check_extract_format(logger& lgr, test::os_access_mock& os,
1283+
reader::filesystem_v2 const& fs,
1284+
std::optional<std::string_view> format = std::nullopt,
1285+
glob_matcher const* matcher = nullptr) {
1286+
utility::filesystem_extractor fsx(lgr, os);
1287+
1288+
auto do_extract = [&] {
1289+
auto const context = fmt::format("{}{}", format.value_or("disk"),
1290+
matcher ? " with matcher" : "");
1291+
1292+
EXPECT_NO_THROW(fsx.extract(fs, matcher,
1293+
{
1294+
.enable_progress = true,
1295+
.skip_devices = true,
1296+
.skip_specials = true,
1297+
}))
1298+
<< context;
1299+
1300+
auto const pi = fsx.get_progress();
1301+
ASSERT_TRUE(pi.total_bytes.has_value()) << context;
1302+
EXPECT_EQ(pi.total_bytes.value(), pi.extracted_bytes) << context;
1303+
1304+
EXPECT_NO_THROW(fsx.close()) << context;
1305+
};
1306+
1307+
if (format.has_value()) {
1308+
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
1309+
std::ostringstream oss;
1310+
EXPECT_NO_THROW(fsx.open_stream(oss, {.name = std::string{*format}}));
1311+
do_extract();
1312+
#endif
1313+
} else {
1314+
temporary_directory tempdir("dwarfs");
1315+
auto const td = tempdir.path();
1316+
EXPECT_NO_THROW(fsx.open_disk(td));
1317+
do_extract();
1318+
}
1319+
}
1320+
1321+
void check_extract_matcher(logger& lgr, test::os_access_mock& os,
1322+
reader::filesystem_v2 const& fs,
1323+
glob_matcher const* matcher = nullptr) {
1324+
// check extract-to-disk
1325+
check_extract_format(lgr, os, fs, std::nullopt, matcher);
1326+
1327+
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
1328+
// check extract to various formats
1329+
for (auto const& fmt : test::supported_libarchive_formats()) {
1330+
if (utility::filesystem_extractor::supports_format(
1331+
{.name = std::string{fmt.name}})) {
1332+
check_extract_format(lgr, os, fs, fmt.name, matcher);
1333+
}
1334+
}
1335+
#endif
1336+
}
1337+
1338+
void check_extract(logger& lgr, test::os_access_mock& os,
1339+
reader::filesystem_v2 const& fs) {
1340+
// no matcher - extract all
1341+
check_extract_matcher(lgr, os, fs);
1342+
1343+
// extract only .sh files
1344+
glob_matcher matcher{"**/*.sh"};
1345+
check_extract_matcher(lgr, os, fs, &matcher);
1346+
}
1347+
12731348
auto get_image_path(std::string const& version) {
12741349
return test_dir / "compat" / fmt::format("compat-v{}.dwarfs", version);
12751350
}
@@ -1409,6 +1484,7 @@ TEST_P(compat_filesystem, backwards_compat) {
14091484
reader::filesystem_v2 fs(lgr, os, test::make_real_file_view(filename),
14101485
opts);
14111486
check_compat(lgr, fs, version);
1487+
check_extract(lgr, os, fs);
14121488
}
14131489

14141490
opts.image_offset = reader::filesystem_options::IMAGE_OFFSET_AUTO;
@@ -1481,6 +1557,7 @@ TEST_P(rewrite, filesystem_rewrite) {
14811557
reader::filesystem_v2 fs(lgr, os, mm);
14821558
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
14831559
check_checksums(fs);
1560+
check_extract(lgr, os, fs);
14841561
EXPECT_TRUE(fs.has_valid_section_index());
14851562
}
14861563

@@ -1566,6 +1643,7 @@ TEST_P(rewrite, filesystem_rewrite) {
15661643
reader::filesystem_v2 fs(lgr, os, mm);
15671644
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
15681645
check_checksums(fs);
1646+
check_extract(lgr, os, fs);
15691647
}
15701648

15711649
std::ostringstream rewritten5;
@@ -1587,6 +1665,7 @@ TEST_P(rewrite, filesystem_rewrite) {
15871665
reader::filesystem_v2 fs(lgr, os, mm);
15881666
check_dynamic(version, fs, origmm, rebuild_metadata.has_value());
15891667
check_checksums(fs);
1668+
check_extract(lgr, os, fs);
15901669
}
15911670
}
15921671

test/tool_dwarfsextract_main_basic_test.cpp

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232

3333
#include <dwarfs/binary_literals.h>
3434
#include <dwarfs/file_util.h>
35+
#include <dwarfs/reader/filesystem_v2.h>
3536
#include <dwarfs/utility/filesystem_extractor.h>
3637
#include <dwarfs/utility/filesystem_extractor_archive_format.h>
3738
#include <dwarfs/vfs_stat.h>
@@ -259,16 +260,22 @@ std::vector<libarchive_format_def> const libarchive_formats{
259260
} // namespace
260261

261262
class dwarfsextract_format_test
262-
: public testing::TestWithParam<libarchive_format_def> {};
263+
: public testing::TestWithParam<std::tuple<libarchive_format_def, bool>> {};
263264

264265
TEST_P(dwarfsextract_format_test, basic) {
265-
auto fmt = GetParam();
266+
auto const [fmt, use_matcher] = GetParam();
266267
bool const is_ar = fmt.name.starts_with("ar");
267268
bool const is_shar = fmt.name.starts_with("shar");
268269
auto t = dwarfsextract_tester::create_with_image();
269270
int const expected_exit = fmt.expected_error ? 1 : 0;
270-
int exit_code =
271-
t.run({"-i", "image.dwarfs", "-f", fmt.name, "--log-level=debug"});
271+
std::vector<std::string> args{"-i", "image.dwarfs", "-f", fmt.name,
272+
"--log-level=debug"};
273+
if (use_matcher) {
274+
args.push_back("**/b*.pl");
275+
args.push_back("**/ipsum.*");
276+
args.push_back("**/*link");
277+
}
278+
int exit_code = t.run(args);
272279
if (!fmt.expected_error && exit_code != 0) {
273280
if (t.err().find("not supported on this platform") != std::string::npos) {
274281
GTEST_SKIP();
@@ -280,14 +287,21 @@ TEST_P(dwarfsextract_format_test, basic) {
280287
EXPECT_THAT(t.err(), ::testing::HasSubstr("extraction aborted"));
281288
} else if (!is_shar and !is_ar) {
282289
auto out = t.out();
283-
EXPECT_GE(out.size(), fmt.min_size);
290+
291+
if (!use_matcher) {
292+
EXPECT_GE(out.size(), fmt.min_size);
293+
}
284294

285295
std::set<std::string> paths;
286296
std::set<std::string> expected_paths{
287297
"bar.pl", "baz.pl", "empty", "foo.pl",
288298
"ipsum.txt", "somedir", "somedir/bad", "somedir/empty",
289299
"somedir/ipsum.py", "somelink", "test.pl",
290300
};
301+
std::set<std::string> expected_paths_with_matcher{
302+
"bar.pl", "baz.pl", "ipsum.txt",
303+
"somedir", "somedir/ipsum.py", "somelink",
304+
};
291305

292306
auto ar = ::archive_read_new();
293307
ASSERT_EQ(ARCHIVE_OK, ::archive_read_support_format_all(ar))
@@ -319,12 +333,18 @@ TEST_P(dwarfsextract_format_test, basic) {
319333
EXPECT_EQ(ARCHIVE_OK, ::archive_read_free(ar))
320334
<< ::archive_error_string(ar);
321335

322-
EXPECT_EQ(expected_paths, paths);
336+
if (use_matcher) {
337+
EXPECT_EQ(expected_paths_with_matcher, paths);
338+
} else {
339+
EXPECT_EQ(expected_paths, paths);
340+
}
323341
}
324342
}
325343

326-
INSTANTIATE_TEST_SUITE_P(dwarfs, dwarfsextract_format_test,
327-
::testing::ValuesIn(libarchive_formats));
344+
INSTANTIATE_TEST_SUITE_P(
345+
dwarfs, dwarfsextract_format_test,
346+
::testing::Combine(::testing::ValuesIn(libarchive_formats),
347+
::testing::Bool()));
328348

329349
#endif
330350

0 commit comments

Comments
 (0)