Skip to content

Commit 1066f22

Browse files
committed
fix(filesystem_extractor): process all deferred hard link entries
This is *only* a problem when extracting a subset of hard links. In particular, for the `newc` format *only*, there may be multiple deferred entries queued up at the end.
1 parent 020229b commit 1066f22

File tree

2 files changed

+206
-14
lines changed

2 files changed

+206
-14
lines changed

src/utility/filesystem_extractor.cpp

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,10 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
627627
<< " total bytes to extract";
628628
}
629629

630+
auto shared_entry_ptr = [](::archive_entry* e) {
631+
return std::shared_ptr<::archive_entry>(e, ::archive_entry_free);
632+
};
633+
630634
auto do_archive_entry = [&](auto const& entry) {
631635
if (entry.is_root()) {
632636
// skip root entry
@@ -721,10 +725,6 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
721725

722726
::archive_entry_linkify(lr, &ae, &spare);
723727

724-
auto shared_entry_ptr = [](::archive_entry* e) {
725-
return std::shared_ptr<::archive_entry>(e, ::archive_entry_free);
726-
};
727-
728728
if (ae) {
729729
do_archive(inode.is_regular_file() && stat.nlink_unchecked() == 1
730730
? reg_archiver
@@ -734,9 +734,7 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
734734

735735
if (spare) {
736736
auto ev = fs.find(::archive_entry_ino(spare));
737-
if (!ev) {
738-
LOG_ERROR << "find() failed";
739-
}
737+
DWARFS_CHECK(ev, "find() failed for spare hard link entry");
740738
LOG_DEBUG << "archiving spare entry " << ::archive_entry_pathname(spare);
741739
do_archive(archiver, shared_entry_ptr(spare), *ev);
742740
}
@@ -777,13 +775,28 @@ bool filesystem_extractor_<LoggerPolicy>::extract(
777775
DWARFS_THROW(runtime_error, "extraction aborted");
778776
}
779777

780-
// As we're visiting *all* hardlinks, we should never see any deferred
781-
// entries.
782-
::archive_entry* ae = nullptr;
783-
::archive_entry_linkify(lr, &ae, &spare);
784-
if (ae) {
785-
::archive_entry_free(ae);
786-
DWARFS_THROW(runtime_error, "unexpected deferred entry");
778+
// process any deferred hard link entries
779+
{
780+
::archive_entry* ae = nullptr;
781+
::archive_entry_linkify(lr, &ae, &spare);
782+
783+
if (ae) {
784+
do {
785+
auto ev = fs.find(::archive_entry_ino(ae));
786+
787+
DWARFS_CHECK(ev, "find() failed for deferred hard link entry");
788+
789+
LOG_DEBUG << "archiving deferred entry "
790+
<< ::archive_entry_pathname(ae);
791+
792+
do_archive(archiver, shared_entry_ptr(ae), *ev);
793+
794+
ae = nullptr;
795+
::archive_entry_linkify(lr, &ae, &spare);
796+
} while (ae);
797+
798+
archiver->wait();
799+
}
787800
}
788801

789802
if (soft_error > 0) {

test/tool_dwarfsextract_main_basic_test.cpp

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,27 @@
2121
* SPDX-License-Identifier: GPL-3.0-or-later
2222
*/
2323

24+
#include <filesystem>
25+
2426
#include <gmock/gmock.h>
2527

2628
#include <archive.h>
2729
#include <archive_entry.h>
2830

2931
#include <fmt/format.h>
3032

33+
#include <dwarfs/binary_literals.h>
34+
#include <dwarfs/file_util.h>
3135
#include <dwarfs/utility/filesystem_extractor.h>
3236
#include <dwarfs/utility/filesystem_extractor_archive_format.h>
37+
#include <dwarfs/vfs_stat.h>
3338

3439
#include "test_tool_main_tester.h"
3540

3641
using namespace dwarfs::test;
42+
using namespace dwarfs::binary_literals;
3743
using namespace dwarfs;
44+
namespace fs = std::filesystem;
3845

3946
#ifndef DWARFS_FILESYSTEM_EXTRACTOR_NO_OPEN_FORMAT
4047
TEST(dwarfsextract_test, mtree) {
@@ -320,3 +327,175 @@ INSTANTIATE_TEST_SUITE_P(dwarfs, dwarfsextract_format_test,
320327
::testing::ValuesIn(libarchive_formats));
321328

322329
#endif
330+
331+
class dwarfsextract_sparse_test
332+
: public testing::TestWithParam<std::tuple<archive_format_info, bool>> {};
333+
334+
TEST_P(dwarfsextract_sparse_test, extract_sparse_files) {
335+
auto const [fmt, use_matcher] = GetParam();
336+
337+
if (!fmt.is_disk && !utility::filesystem_extractor::supports_format(
338+
{.name = std::string{fmt.name}})) {
339+
GTEST_SKIP() << "format " << fmt.name << " not supported on this platform";
340+
}
341+
342+
std::string const image_file = "test.dwarfs";
343+
std::string image;
344+
std::mt19937_64 rng{42};
345+
346+
{
347+
auto t = mkdwarfs_tester::create_empty();
348+
t.add_root_dir();
349+
auto const stat1 = t.os->add_file("/sparse1",
350+
{
351+
{extent_kind::data, 10_KiB, &rng},
352+
{extent_kind::hole, 500_KiB},
353+
{extent_kind::data, 3_KiB, &rng},
354+
},
355+
{.nlink = 3});
356+
auto const stat2 = t.os->add_file("/sparse2",
357+
{
358+
{extent_kind::hole, 300_KiB},
359+
},
360+
{.nlink = 3});
361+
auto const stat3 = t.os->add_file("/sparse3",
362+
{
363+
{extent_kind::hole, 400_KiB},
364+
{extent_kind::data, 7_KiB, nullptr},
365+
},
366+
{.nlink = 3});
367+
auto const stat4 = t.os->add_file("/sparse4",
368+
{
369+
{extent_kind::data, 9_KiB, nullptr},
370+
{extent_kind::hole, 200_KiB},
371+
},
372+
{.nlink = 3});
373+
t.os->add("/hardlink1a", stat1);
374+
t.os->add("/hardlink1b", stat1);
375+
t.os->add("/hardlink2a", stat2);
376+
t.os->add("/hardlink2b", stat2);
377+
t.os->add("/hardlink3a", stat3);
378+
t.os->add("/hardlink3b", stat3);
379+
t.os->add("/hardlink4a", stat4);
380+
t.os->add("/hardlink4b", stat4);
381+
382+
ASSERT_EQ(0, t.run({"-i", "/", "-o", image_file, "-l3"})) << t.err();
383+
384+
auto img = t.fa->get_file(image_file);
385+
ASSERT_TRUE(img);
386+
image = std::move(img.value());
387+
388+
auto fs =
389+
t.fs_from_file(image_file, {.metadata = {.enable_sparse_files = true}});
390+
391+
vfs_stat vfs;
392+
fs.statvfs(&vfs);
393+
394+
EXPECT_EQ(5, vfs.files); // root dir + 4 files (no hardlinks)
395+
EXPECT_EQ(1, vfs.frsize);
396+
EXPECT_EQ(29_KiB, vfs.blocks);
397+
398+
EXPECT_EQ(1400_KiB + 29_KiB, vfs.total_fs_size);
399+
EXPECT_EQ((1400_KiB + 29_KiB) * 2, vfs.total_hardlink_size);
400+
EXPECT_EQ(29_KiB, vfs.total_allocated_fs_size);
401+
}
402+
403+
auto t = dwarfsextract_tester::create_with_image(image);
404+
405+
std::vector<std::string> args{"-i", "image.dwarfs", "--log-level=debug"};
406+
std::optional<temporary_directory> temp_dir;
407+
408+
if (fmt.is_disk) {
409+
temp_dir.emplace("dwarfs");
410+
args.push_back("-o");
411+
args.push_back(path_to_utf8_string_sanitized(temp_dir->path()));
412+
} else {
413+
args.push_back("-f");
414+
args.push_back(std::string{fmt.name});
415+
}
416+
417+
if (use_matcher) {
418+
args.push_back("**/sparse*");
419+
args.push_back("**/*b");
420+
}
421+
422+
int exit_code = t.run(args);
423+
424+
if (exit_code != 0) {
425+
if (t.err().find("not supported on this platform") != std::string::npos) {
426+
GTEST_SKIP() << "format " << fmt.name
427+
<< " not supported on this platform";
428+
}
429+
}
430+
431+
ASSERT_EQ(0, exit_code) << t.err();
432+
433+
bool const is_ar = fmt.name.starts_with("ar");
434+
bool const is_shar = fmt.name.starts_with("shar");
435+
436+
if (!is_shar and !is_ar) {
437+
auto out = t.out();
438+
439+
std::set<std::string> paths;
440+
std::set<std::string> expected_paths{
441+
"sparse1", "sparse2", "sparse3", "sparse4",
442+
"hardlink1a", "hardlink1b", "hardlink2a", "hardlink2b",
443+
"hardlink3a", "hardlink3b", "hardlink4a", "hardlink4b",
444+
};
445+
std::set<std::string> expected_paths_with_matcher{
446+
"sparse1", "sparse2", "sparse3", "sparse4",
447+
"hardlink1b", "hardlink2b", "hardlink3b", "hardlink4b",
448+
};
449+
450+
if (fmt.is_disk) {
451+
for (auto const& entry :
452+
fs::recursive_directory_iterator(temp_dir->path())) {
453+
if (entry.path() != temp_dir->path()) {
454+
paths.insert(path_to_utf8_string_sanitized(
455+
fs::relative(entry.path(), temp_dir->path())));
456+
}
457+
}
458+
} else {
459+
auto ar = ::archive_read_new();
460+
ASSERT_EQ(ARCHIVE_OK, ::archive_read_support_format_all(ar))
461+
<< ::archive_error_string(ar);
462+
ASSERT_EQ(ARCHIVE_OK,
463+
::archive_read_open_memory(ar, out.data(), out.size()))
464+
<< ::archive_error_string(ar);
465+
466+
for (;;) {
467+
struct archive_entry* entry;
468+
int ret = ::archive_read_next_header(ar, &entry);
469+
if (ret == ARCHIVE_EOF) {
470+
break;
471+
}
472+
ASSERT_EQ(ARCHIVE_OK, ret) << ::archive_error_string(ar);
473+
std::string path{::archive_entry_pathname(entry)};
474+
if (path != ".") {
475+
if (path.back() == '/') {
476+
path.pop_back();
477+
}
478+
if (path.starts_with("./")) {
479+
path.erase(0, 2);
480+
}
481+
std::replace(path.begin(), path.end(), '\\', '/');
482+
EXPECT_TRUE(paths.insert(path).second) << path;
483+
}
484+
}
485+
486+
EXPECT_EQ(ARCHIVE_OK, ::archive_read_free(ar))
487+
<< ::archive_error_string(ar);
488+
}
489+
490+
if (use_matcher) {
491+
EXPECT_EQ(expected_paths_with_matcher, paths);
492+
} else {
493+
EXPECT_EQ(expected_paths, paths);
494+
}
495+
}
496+
}
497+
498+
INSTANTIATE_TEST_SUITE_P(
499+
dwarfs, dwarfsextract_sparse_test,
500+
::testing::Combine(::testing::ValuesIn(supported_libarchive_formats(true)),
501+
::testing::Bool()));

0 commit comments

Comments
 (0)