diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..77565771 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,117 @@ +--- +name: Build & tests +on: + push: + branches: + - main + pull_request: + branches: + - main + workflow_dispatch: {} + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + MFU_BIN: ${{github.workspace}}/build/bin + MFU_MPIRUN_ARGS: --mca mpi_abort_print_stack 1 --bind-to none --oversubscribe -N 8 + +jobs: + build: + runs-on: ubuntu-latest + steps: + + - name: Install tests dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential libopenmpi-dev libattr1-dev libarchive-dev python3-pytest python3-xattr e2fsprogs + + # + # lwgrp + # + - name: lwgrp checkout + uses: actions/checkout@v4 + with: + repository: 'LLNL/lwgrp' + ref: 'v1.0.6' + path: 'lwgrp' + - name: lwgrp autogen + run: ./autogen.sh + working-directory: lwgrp + - name: lwgrp configure + run: ./configure --prefix=${{github.workspace}}/build --disable-static + working-directory: lwgrp + - name: lwgrp make + run: make + working-directory: lwgrp + - name: lwgrp make install + run: make install + working-directory: lwgrp + + + # + # libcircle + # + - name: libcircle checkout + uses: actions/checkout@v4 + with: + repository: 'hpc/libcircle' + ref: 'v0.3' + path: 'libcircle' + - name: libcircle autogen + run: ./autogen.sh + working-directory: libcircle + - name: libcircle configure + run: ./configure --prefix=${{github.workspace}}/build --disable-static + working-directory: libcircle + - name: libcircle make + run: make + working-directory: libcircle + - name: libcircle make install + run: make install + working-directory: libcircle + + # + # dtcmp + # + - name: dtcmp checkout + uses: actions/checkout@v4 + with: + repository: 'LLNL/dtcmp' + ref: 'v1.1.5' + path: 'dtcmp' + - name: dtcmp autogen + run: ./autogen.sh + working-directory: dtcmp + - name: dtcmp configure + run: ./configure --prefix=${{github.workspace}}/build --with-lwgrp=${{github.workspace}}/build --disable-static + working-directory: dtcmp + - name: dtcmp make + run: make + working-directory: dtcmp + - name: dtcmp make install + run: make install + working-directory: dtcmp + + # + # mpifileutils + # + - uses: actions/checkout@v4 + with: + path: 'mpifileutils' + - name: Configure CMake + run: > + cmake + -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build + -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + -DDTCMP_INCLUDE_DIRS=${{github.workspace}}/build/include + -DDTCMP_LIBRARIES=${{github.workspace}}/build/lib/libdtcmp.so + -DLibCircle_INCLUDE_DIRS=${{github.workspace}}/build/include + -DLibCircle_LIBRARIES=${{github.workspace}}/build/lib/libcircle.so + working-directory: mpifileutils + - name: Build + run: cmake --build mpifileutils --config ${{env.BUILD_TYPE}} + - name: Install + run: cmake --install mpifileutils + - name: Run tests + run: pytest + working-directory: mpifileutils diff --git a/src/common/mfu_flist.c b/src/common/mfu_flist.c index 9b13a3a5..9281a70c 100644 --- a/src/common/mfu_flist.c +++ b/src/common/mfu_flist.c @@ -125,7 +125,7 @@ static size_t list_elem_pack2_size(int detail, uint64_t chars, const elem_t* ele { size_t size; if (detail) { - size = 2 * 4 + chars + 0 * 4 + 10 * 8; + size = 2 * 4 + chars + 1 * 4 + 10 * 8 + 8 + chars; } else { size = 2 * 4 + chars + 1 * 4; @@ -154,11 +154,7 @@ static size_t list_elem_pack2(void* buf, int detail, uint64_t chars, const elem_ mfu_pack_uint32(&ptr, (uint32_t) chars); /* copy in file name */ - char* file = elem->file; - if (file != NULL) { - strcpy(ptr, file); - } - ptr += chars; + mfu_pack_sized_str(&ptr, elem->file, chars); #ifdef DAOS_SUPPORT /* copy in values for obj ids */ @@ -166,6 +162,7 @@ static size_t list_elem_pack2(void* buf, int detail, uint64_t chars, const elem_ mfu_pack_uint64(&ptr, elem->obj_id_hi); #endif + mfu_pack_uint32(&ptr, elem->type); if (detail) { /* copy in fields */ mfu_pack_uint64(&ptr, elem->mode); @@ -178,10 +175,8 @@ static size_t list_elem_pack2(void* buf, int detail, uint64_t chars, const elem_ mfu_pack_uint64(&ptr, elem->ctime); mfu_pack_uint64(&ptr, elem->ctime_nsec); mfu_pack_uint64(&ptr, elem->size); - } - else { - /* just have the file type */ - mfu_pack_uint32(&ptr, elem->type); + mfu_pack_uint64(&ptr, elem->nlink); + mfu_pack_sized_str(&ptr, elem->ref, chars); } size_t bytes = (size_t)(ptr - start); @@ -203,15 +198,11 @@ static size_t list_elem_unpack2(const void* buf, elem_t* elem) uint32_t chars; mfu_unpack_uint32(&ptr, &chars); - /* get name and advance pointer */ - const char* file = ptr; - ptr += chars; - - /* copy path */ - elem->file = MFU_STRDUP(file); + /* get name */ + mfu_unpack_sized_str(&ptr, &elem->file, chars); /* set depth */ - elem->depth = mfu_flist_compute_depth(file); + elem->depth = mfu_flist_compute_depth(elem->file); elem->detail = (int) detail; @@ -221,6 +212,10 @@ static size_t list_elem_unpack2(const void* buf, elem_t* elem) mfu_unpack_uint64(&ptr, &elem->obj_id_hi); #endif + uint32_t type; + mfu_unpack_uint32(&ptr, &type); + elem->type = (mfu_filetype) type; + if (detail) { /* extract fields */ mfu_unpack_uint64(&ptr, &elem->mode); @@ -233,14 +228,8 @@ static size_t list_elem_unpack2(const void* buf, elem_t* elem) mfu_unpack_uint64(&ptr, &elem->ctime); mfu_unpack_uint64(&ptr, &elem->ctime_nsec); mfu_unpack_uint64(&ptr, &elem->size); - /* use mode to set file type */ - elem->type = mfu_flist_mode_to_filetype((mode_t)elem->mode); - } - else { - /* only have type */ - uint32_t type; - mfu_unpack_uint32(&ptr, &type); - elem->type = (mfu_filetype) type; + mfu_unpack_uint64(&ptr, &elem->nlink); + mfu_unpack_sized_str(&ptr, &elem->ref, chars); } size_t bytes = (size_t)(ptr - start); @@ -346,6 +335,8 @@ static void list_insert_copy(flist_t* flist, elem_t* src) elem->ctime = src->ctime; elem->ctime_nsec = src->ctime_nsec; elem->size = src->size; + elem->nlink = src->nlink; + elem->ref = MFU_STRDUP(src->ref); /* append element to tail of linked list */ mfu_flist_insert_elem(flist, elem); @@ -368,6 +359,9 @@ void mfu_flist_insert_stat(flist_t* flist, const char* fpath, mode_t mode, const /* set file type */ elem->type = mfu_flist_mode_to_filetype(mode); + /* hardlinks references are discovered afterwhile */ + elem->ref = NULL; + /* copy stat info */ if (sb != NULL) { elem->detail = 1; @@ -389,6 +383,7 @@ void mfu_flist_insert_stat(flist_t* flist, const char* fpath, mode_t mode, const elem->ctime_nsec = nsecs; elem->size = (uint64_t) sb->st_size; + elem->nlink = (uint64_t) sb->st_nlink; /* TODO: link to user and group names? */ } @@ -409,6 +404,8 @@ static void list_delete(flist_t* flist) while (current != NULL) { elem_t* next = current->next; mfu_free(¤t->file); + if (current->detail) + mfu_free(¤t->ref); mfu_free(¤t); current = next; } @@ -987,6 +984,28 @@ uint64_t mfu_flist_file_get_size(mfu_flist bflist, uint64_t idx) return ret; } +uint64_t mfu_flist_file_get_nlink(mfu_flist bflist, uint64_t idx) +{ + uint64_t ret = (uint64_t) - 1; + flist_t* flist = (flist_t*) bflist; + elem_t* elem = list_get_elem(flist, idx); + if (elem != NULL && flist->detail) { + ret = elem->nlink; + } + return ret; +} + +const char* mfu_flist_file_get_ref(mfu_flist bflist, uint64_t idx) +{ + const char* ref = NULL; + flist_t* flist = (flist_t*) bflist; + elem_t* elem = list_get_elem(flist, idx); + if (elem != NULL) { + ref = elem->ref; + } + return ref; +} + const char* mfu_flist_file_get_username(mfu_flist bflist, uint64_t idx) { const char* ret = NULL; @@ -1171,6 +1190,19 @@ void mfu_flist_file_set_size(mfu_flist bflist, uint64_t idx, uint64_t size) return; } +void mfu_flist_file_set_ref(mfu_flist bflist, uint64_t idx, const char* ref) +{ + flist_t* flist = (flist_t*) bflist; + elem_t* elem = list_get_elem(flist, idx); + if (elem != NULL) { + /* free existing name if there is one */ + mfu_free(&elem->ref); + /* set new ref*/ + elem->ref = MFU_STRDUP(ref); + } + return; +} + mfu_flist mfu_flist_subset(mfu_flist src) { /* allocate a new file list */ @@ -1353,6 +1385,8 @@ uint64_t mfu_flist_file_create(mfu_flist bflist) elem->ctime = 0; elem->ctime_nsec = 0; elem->size = 0; + elem->nlink = 0; + elem->ref = NULL; /* for DAOS */ #ifdef DAOS_SUPPORT @@ -1817,11 +1851,12 @@ void mfu_flist_print_summary(mfu_flist flist) MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* initlialize counters */ - uint64_t total_dirs = 0; - uint64_t total_files = 0; - uint64_t total_links = 0; - uint64_t total_unknown = 0; - uint64_t total_bytes = 0; + uint64_t total_dirs = 0; + uint64_t total_files = 0; + uint64_t total_links = 0; + uint64_t total_hardlinks = 0; + uint64_t total_unknown = 0; + uint64_t total_bytes = 0; /* step through and print data */ uint64_t idx = 0; @@ -1839,8 +1874,12 @@ void mfu_flist_print_summary(mfu_flist flist) total_dirs++; } else if (S_ISREG(mode)) { - total_files++; - total_bytes += size; + if (mfu_flist_file_get_ref(flist, idx) != NULL) { + total_hardlinks++; + } else { + total_files++; + total_bytes += size; + } } else if (S_ISLNK(mode)) { total_links++; @@ -1863,6 +1902,9 @@ void mfu_flist_print_summary(mfu_flist flist) else if (type == MFU_TYPE_LINK) { total_links++; } + else if (type == MFU_TYPE_HARDLINK) { + total_hardlinks++; + } else { /* unknown file type */ total_unknown++; @@ -1874,13 +1916,14 @@ void mfu_flist_print_summary(mfu_flist flist) } /* get total directories, files, links, and bytes */ - uint64_t all_dirs, all_files, all_links, all_unknown, all_bytes; + uint64_t all_dirs, all_files, all_links, all_hardlinks, all_unknown, all_bytes; + MPI_Allreduce(&total_dirs, &all_dirs, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_files, &all_files, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_links, &all_links, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_hardlinks, &all_hardlinks, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_unknown, &all_unknown, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_bytes, &all_bytes, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); uint64_t all_count = mfu_flist_global_size(flist); - MPI_Allreduce(&total_dirs, &all_dirs, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&total_files, &all_files, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&total_links, &all_links, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&total_unknown, &all_unknown, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&total_bytes, &all_bytes, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); /* convert total size to units */ if (rank == 0) { @@ -1888,6 +1931,7 @@ void mfu_flist_print_summary(mfu_flist flist) MFU_LOG(MFU_LOG_INFO, " Directories: %llu", (unsigned long long) all_dirs); MFU_LOG(MFU_LOG_INFO, " Files: %llu", (unsigned long long) all_files); MFU_LOG(MFU_LOG_INFO, " Links: %llu", (unsigned long long) all_links); + MFU_LOG(MFU_LOG_INFO, " Hardlinks: %llu", (unsigned long long) all_hardlinks); /* MFU_LOG(MFU_LOG_INFO, " Unknown: %lu", (unsigned long long) all_unknown); */ if (mfu_flist_have_detail(flist)) { diff --git a/src/common/mfu_flist.h b/src/common/mfu_flist.h index 58dc0c43..9a8f2dfe 100644 --- a/src/common/mfu_flist.h +++ b/src/common/mfu_flist.h @@ -94,11 +94,12 @@ extern "C" { * so changing their values can break backwards compatibility * in reading any such files */ typedef enum mfu_filetypes_e { - MFU_TYPE_NULL = 0, /* type not set */ - MFU_TYPE_UNKNOWN = 1, /* type not known */ - MFU_TYPE_FILE = 2, /* regular file */ - MFU_TYPE_DIR = 3, /* directory */ - MFU_TYPE_LINK = 4, /* symlink */ + MFU_TYPE_NULL = 0, /* type not set */ + MFU_TYPE_UNKNOWN = 1, /* type not known */ + MFU_TYPE_FILE = 2, /* regular file */ + MFU_TYPE_DIR = 3, /* directory */ + MFU_TYPE_LINK = 4, /* symlink */ + MFU_TYPE_HARDLINK = 5, /* hardlink */ } mfu_filetype; /* define handle type to a file list */ @@ -395,6 +396,8 @@ uint64_t mfu_flist_file_get_mtime_nsec(mfu_flist flist, uint64_t index); uint64_t mfu_flist_file_get_ctime(mfu_flist flist, uint64_t index); uint64_t mfu_flist_file_get_ctime_nsec(mfu_flist flist, uint64_t index); uint64_t mfu_flist_file_get_size(mfu_flist flist, uint64_t index); +uint64_t mfu_flist_file_get_nlink(mfu_flist flist, uint64_t idx); +const char* mfu_flist_file_get_ref(mfu_flist flist, uint64_t idx); uint64_t mfu_flist_file_get_perm(mfu_flist flist, uint64_t index); #if DCOPY_USE_XATTRS void *mfu_flist_file_get_acl(mfu_flist bflist, uint64_t idx, ssize_t *acl_size, char *type); @@ -420,6 +423,7 @@ void mfu_flist_file_set_mtime_nsec(mfu_flist flist, uint64_t index, uint64_t mti void mfu_flist_file_set_ctime(mfu_flist flist, uint64_t index, uint64_t ctime); void mfu_flist_file_set_ctime_nsec(mfu_flist flist, uint64_t index, uint64_t ctime_nsec); void mfu_flist_file_set_size(mfu_flist flist, uint64_t index, uint64_t size); +void mfu_flist_file_set_ref(mfu_flist flist, uint64_t index, const char* ref); #if DCOPY_USE_XATTRS //void *mfu_flist_file_set_acl(mfu_flist bflist, uint64_t idx, ssize_t *acl_size, char *type); #endif diff --git a/src/common/mfu_flist_archive.c b/src/common/mfu_flist_archive.c index aaeffcd6..3157fe06 100644 --- a/src/common/mfu_flist_archive.c +++ b/src/common/mfu_flist_archive.c @@ -70,6 +70,12 @@ typedef struct { size_t io_bufsize; /* size of memory i/o buffer in bytes */ } DTAR_writer_t; +/* linked list of archive entries */ +typedef struct entry_list { + struct archive_entry* entry; + struct entry_list* next; +} entry_list_t; + DTAR_writer_t DTAR_writer; /* state of open archive file and I/O buffer */ mfu_flist DTAR_flist; /* source flist of set of items being copied into archive */ @@ -93,6 +99,22 @@ static void DTAR_exit(int code) exit(code); } +/* append a copy of entry in entries list and move current */ +static void entries_list_add(entry_list_t** entries, entry_list_t** current, struct archive_entry* entry) { + entry_list_t* new_entry = (entry_list_t*) malloc(sizeof(entry_list_t)); + new_entry->entry = archive_entry_clone(entry); + new_entry->next = NULL; + if(!*entries) { + *entries = new_entry; + } + if(!*current) { + *current = new_entry; + } else { + (*current)->next = new_entry; + *current = (*current)->next; + } +} + /**************************************** * Cache opened files to avoid repeated open/close of * the same file when using libcicle @@ -446,6 +468,15 @@ static int encode_header( rc = MFU_FAILURE; } + /* hardlink not managed by archive_read_disk_entry_from_file(), call + * archive_entry_set_hardlink() on the entry afterwhile. */ + + if (type == MFU_TYPE_HARDLINK) { + const char* target = mfu_flist_file_get_ref(flist, idx); + const char* reltarget = mfu_param_path_relative(target, cwdpath); + archive_entry_set_hardlink(entry, reltarget); + } + /* we can free the archive now that we have the entry */ archive_read_free(source); @@ -497,6 +528,10 @@ static int encode_header( ); rc = MFU_FAILURE; } + } else if (type == MFU_TYPE_HARDLINK) { + const char* target = mfu_flist_file_get_ref(flist, idx); + const char* reltarget = mfu_param_path_relative(target, cwdpath); + archive_entry_copy_hardlink(entry, reltarget); } } @@ -2156,7 +2191,7 @@ static int compute_entry_sizes( /* identify item type to compute its size in the archive */ mfu_filetype type = mfu_flist_file_get_type(flist, idx); - if (type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) { + if (type == MFU_TYPE_DIR || type == MFU_TYPE_LINK || type == MFU_TYPE_HARDLINK) { /* directories and symlinks only need the header */ uint64_t header_size; encode_header(flist, idx, cwdpath, @@ -2661,9 +2696,11 @@ int mfu_flist_archive_create( /* write headers for our files */ for (idx = 0; idx < listsize; idx++) { - /* we currently only support regular files, directories, and symlinks */ + /* we currently only support regular files, directories, symlinks and + * hardlinks. */ mfu_filetype type = mfu_flist_file_get_type(flist, idx); - if (type == MFU_TYPE_FILE || type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) { + if (type == MFU_TYPE_FILE || type == MFU_TYPE_DIR || + type == MFU_TYPE_LINK || type == MFU_TYPE_HARDLINK) { /* write header for this item to the archive, * this sets DTAR_err on any error */ write_header(flist, idx, cwdpath, @@ -3847,9 +3884,20 @@ static void insert_entry_into_flist( mfu_flist_file_set_name(flist, idx, fullpath); mfu_free(&fullpath); - /* get mode of entry, and deduce mfu type */ + /* get mode of entry */ mode_t mode = archive_entry_mode(entry); - mfu_filetype type = mfu_flist_mode_to_filetype(mode); + mfu_filetype type = MFU_TYPE_UNKNOWN; + + /* If hardlink target is defined, set type accordingly and reference. Else + * deduce type from mode. */ + const char* target = archive_entry_hardlink(entry); + if (target != NULL) { + type = MFU_TYPE_HARDLINK; + mfu_flist_file_set_ref(flist, idx, target); + } else { + type = mfu_flist_mode_to_filetype(mode); + } + mfu_flist_file_set_type(flist, idx, type); mfu_flist_file_set_mode(flist, idx, mode); @@ -4223,6 +4271,57 @@ static void extract1_progress_fn(const uint64_t* vals, int count, int complete, } } +/* extract list of entries from archive, update progress and free the list */ +static int extract_archive_list_entries(struct archive* ext, entry_list_t** entries) { + + /* assume we'll succeed */ + int rc = MFU_SUCCESS; + entry_list_t *current_entry = NULL, *previous_entry = NULL; + + current_entry = *entries; + while(current_entry && rc == MFU_SUCCESS) { + /* create entry on disk */ + int r = archive_write_header(ext, current_entry->entry); + if (r != ARCHIVE_OK) { + MFU_LOG(MFU_LOG_ERR, "writing entry %s", + archive_error_string(ext) + ); + rc = MFU_FAILURE; + break; + } + + /* set any properties on the item that need to be set at end, + * e.g., turn off write bit on a file we just wrote or set timestamps */ + r = archive_write_finish_entry(ext); + if (r != ARCHIVE_OK) { + MFU_LOG(MFU_LOG_ERR, "finish writing entry %s", + archive_error_string(ext) + ); + rc = MFU_FAILURE; + break; + } + + /* increment our count of items extracted */ + reduce_buf[REDUCE_ITEMS]++; + + /* update number of items we have completed for progress messages */ + mfu_progress_update(reduce_buf, extract_prog); + + /* jump to next entry and free current entry */ + previous_entry = current_entry; + current_entry = current_entry->next; + archive_entry_free(previous_entry->entry); + mfu_free(&previous_entry); + previous_entry = NULL; + } + + /* Free entries list */ + *entries = NULL; + + return rc; +} + + /* compute total bytes in regular files in flist */ static uint64_t flist_sum_bytes(mfu_flist flist) { @@ -4312,6 +4411,7 @@ static int extract_files_offsets_libarchive( } /* iterate over and extract each item we're responsible for */ + entry_list_t* hardlink_entries = NULL, *current_hardlink_entry = NULL; uint64_t count = 0; while (count < entry_count && rc == MFU_SUCCESS) { /* seek to start of the entry in the archive file */ @@ -4372,6 +4472,15 @@ static int extract_files_offsets_libarchive( break; } + /* if hardlink entry, add a copy of this entry in hardlink_entries list + * for later processing */ + if (archive_entry_hardlink(entry) != NULL) { + entries_list_add(&hardlink_entries, ¤t_hardlink_entry, entry); + /* advance to our next entry */ + count++; + continue; + } + /* got an entry, create corresponding item on disk and * then copy data */ r = archive_write_header(ext, entry); @@ -4436,6 +4545,16 @@ static int extract_files_offsets_libarchive( count++; } + /* wait for all tasks to write regular files */ + MPI_Barrier(MPI_COMM_WORLD); + + /* extract pending hardlinks entries */ + r = extract_archive_list_entries(ext, &hardlink_entries); + if (r != MFU_SUCCESS) { + MFU_LOG(MFU_LOG_ERR, "Failed to extract hardlink entries for archive"); + rc = MFU_FAILURE; + } + /* close out our write archive, this may update timestamps and permissions on items */ r = archive_write_close(ext); if (r != ARCHIVE_OK) { @@ -4840,8 +4959,7 @@ static int extract_files( int ranks; MPI_Comm_size(MPI_COMM_WORLD, &ranks); - /* iterate over all entry from the start of the file, - * looking to find the range of items it is responsible for */ + entry_list_t* hardlink_entries = NULL, *current_hardlink_entry = NULL; uint64_t count = 0; while (rc == MFU_SUCCESS) { /* read the next entry from the archive */ @@ -4860,6 +4978,16 @@ static int extract_files( /* write item out to disk if this is one of our assigned items */ if (count % ranks == mfu_rank) { + + /* if hardlink entry, add a copy of this entry in hardlink_entries list + * for later processing */ + if (archive_entry_hardlink(entry) != NULL) { + entries_list_add(&hardlink_entries, ¤t_hardlink_entry, entry); + /* advance to next entry in the archive */ + count++; + continue; + } + /* create item on disk */ r = archive_write_header(ext, entry); if (r != ARCHIVE_OK) { @@ -4899,6 +5027,16 @@ static int extract_files( count++; } + /* wait for all tasks to write regular files */ + MPI_Barrier(MPI_COMM_WORLD); + + /* extract pending hardlinks entries */ + r = extract_archive_list_entries(ext, &hardlink_entries); + if (r != MFU_SUCCESS) { + MFU_LOG(MFU_LOG_ERR, "Failed to extract hardlink entries for archive"); + rc = MFU_FAILURE; + } + /* free off our write archive, this may update timestamps and permissions on items */ r = archive_write_close(ext); if (r != ARCHIVE_OK) { @@ -4957,8 +5095,8 @@ static int extract_files( } /* iterate through our portion of the given file list, - * identify symlinks and extract them from archive */ -static int extract_symlinks( + * identify symlinks and hardlinks and extract them from archive */ +static int extract_links( const char* filename, /* name of archive file */ mfu_flist flist, /* file list of items */ uint64_t* offsets, /* offset of each item in the archive */ @@ -4966,31 +5104,37 @@ static int extract_symlinks( { int rc = MFU_SUCCESS; - /* iterate over all items in our list and count symlinks */ - uint64_t count = 0; + /* iterate over all items in our list, count symlinks and hardlinks */ + uint64_t count_symlinks = 0, count_hardlinks = 0; uint64_t idx; uint64_t size = mfu_flist_size(flist); for (idx = 0; idx < size; idx++) { mfu_filetype type = mfu_flist_file_get_type(flist, idx); if (type == MFU_TYPE_LINK) { /* found a symlink */ - count++; + count_symlinks++; + } + if (type == MFU_TYPE_HARDLINK) { + /* found a hardlink */ + count_hardlinks++; } } /* count total number of links */ - uint64_t all_count; - MPI_Allreduce(&count, &all_count, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + uint64_t all_count_symlinks, all_count_hardlinks; + MPI_Allreduce(&count_symlinks, &all_count_symlinks, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&count_hardlinks, &all_count_hardlinks, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); /* bail out early if there is nothing to do */ - if (all_count == 0) { + if (all_count_symlinks + all_count_hardlinks == 0) { return MFU_SUCCESS; } /* let user know what we're doing */ MPI_Barrier(MPI_COMM_WORLD); if (mfu_rank == 0) { - MFU_LOG(MFU_LOG_INFO, "Creating %llu symlinks", (unsigned long long)all_count); + MFU_LOG(MFU_LOG_INFO, "Creating %llu symlinks", (unsigned long long)all_count_symlinks); + MFU_LOG(MFU_LOG_INFO, "Creating %llu hardlinks", (unsigned long long)all_count_hardlinks); } /* open the archive file for reading */ @@ -5018,10 +5162,14 @@ static int extract_symlinks( for (idx = 0; idx < size; idx++) { /* skip entries that are not symlinks */ mfu_filetype type = mfu_flist_file_get_type(flist, idx); - if (type != MFU_TYPE_LINK) { - /* not a symlink, go to next item */ + char* type_s; + if (type == MFU_TYPE_LINK) { + type_s = "symlink"; + } else if (type == MFU_TYPE_HARDLINK) { + type_s = "hardlink"; + } else + /* not a symlink or hardlink, go to next item */ continue; - } /* got a symlink, get its path */ const char* name = mfu_flist_file_get_name(flist, idx); @@ -5050,8 +5198,8 @@ static int extract_symlinks( /* use a small read block size, since we just need the header */ int r = archive_read_open_fd(a, fd, 10240); if (r != ARCHIVE_OK) { - MFU_LOG(MFU_LOG_ERR, "opening archive to extract symlink `%s' at offset %llu %s", - name, offset, archive_error_string(a) + MFU_LOG(MFU_LOG_ERR, "opening archive to extract %s `%s' at offset %llu %s", + type_s, name, offset, archive_error_string(a) ); archive_read_free(a); rc = MFU_FAILURE; @@ -5062,8 +5210,8 @@ static int extract_symlinks( struct archive_entry* entry; r = archive_read_next_header(a, &entry); if (r == ARCHIVE_EOF) { - MFU_LOG(MFU_LOG_ERR, "Unexpected end of archive while extracting symlink `%s' at offset %llu", - name, offset + MFU_LOG(MFU_LOG_ERR, "Unexpected end of archive while extracting %s `%s' at offset %llu", + type_s, name, offset ); archive_read_close(a); archive_read_free(a); @@ -5071,8 +5219,8 @@ static int extract_symlinks( continue; } if (r != ARCHIVE_OK) { - MFU_LOG(MFU_LOG_ERR, "Extracting symlink '%s' at offset %llu %s", - name, offset, archive_error_string(a) + MFU_LOG(MFU_LOG_ERR, "Extracting %s '%s' at offset %llu %s", + type_s, name, offset, archive_error_string(a) ); archive_read_close(a); archive_read_free(a); @@ -5080,33 +5228,66 @@ static int extract_symlinks( continue; } - /* get target of the link */ - const char* target = archive_entry_symlink(entry); - if (target == NULL) { - MFU_LOG(MFU_LOG_ERR, "Item is not a symlink as expected `%s'", - name); - archive_read_close(a); - archive_read_free(a); - rc = MFU_FAILURE; - continue; - } + if (type == MFU_TYPE_LINK) { + /* get target of the symlink */ + const char* target = archive_entry_symlink(entry); + if (target == NULL) { + MFU_LOG(MFU_LOG_ERR, "Item is not a symlink as expected `%s'", + name); + archive_read_close(a); + archive_read_free(a); + rc = MFU_FAILURE; + continue; + } + /* create the symlink on the file system */ + int symlink_rc = mfu_symlink(target, name); + if (symlink_rc != 0) { + /* TODO: check whether user wants overwrite */ + if (errno == EEXIST) { + /* failed because something exists, + * attempt to delete item and try again */ + mfu_unlink(name); + symlink_rc = mfu_symlink(target, name); + } - /* create the link on the file system */ - int symlink_rc = mfu_symlink(target, name); - if (symlink_rc != 0) { - /* TODO: check whether user wants overwrite */ - if (errno == EEXIST) { - /* failed because something exists, - * attempt to delete item and try again */ - mfu_unlink(name); - symlink_rc = mfu_symlink(target, name); + /* if we still failed, give up */ + if (symlink_rc != 0) { + MFU_LOG(MFU_LOG_ERR, "Failed to set symlink `%s' (errno=%d %s)", + name, errno, strerror(errno)); + rc = MFU_FAILURE; + } } + } - /* if we still failed, give up */ - if (symlink_rc != 0) { - MFU_LOG(MFU_LOG_ERR, "Failed to set symlink `%s' (errno=%d %s)", - name, errno, strerror(errno)); + if (type == MFU_TYPE_HARDLINK) { + /* get target of the hardlink */ + const char* target = archive_entry_hardlink(entry); + if (target == NULL) { + MFU_LOG(MFU_LOG_ERR, "Item is not a hardlink as expected `%s'", + name); + archive_read_close(a); + archive_read_free(a); rc = MFU_FAILURE; + continue; + } + + /* create the hardlink on the file system */ + int hardlink_rc = mfu_hardlink(target, name); + if (hardlink_rc != 0) { + /* TODO: check whether user wants overwrite */ + if (errno == EEXIST) { + /* failed because something exists, + * attempt to delete item and try again */ + mfu_unlink(name); + hardlink_rc = mfu_hardlink(target, name); + } + + /* if we still failed, give up */ + if (hardlink_rc != 0) { + MFU_LOG(MFU_LOG_ERR, "Failed to set hardlink `%s' (errno=%d %s)", + name, errno, strerror(errno)); + rc = MFU_FAILURE; + } } } @@ -5604,8 +5785,8 @@ int mfu_flist_archive_extract( * create the files in advance */ mfu_flist_mknod(flist, create_opts); - /* create symlinks */ - int tmp_rc = extract_symlinks(filename, flist, offsets, opts); + /* create symlinks and hardlinks */ + int tmp_rc = extract_links(filename, flist, offsets, opts); if (tmp_rc != MFU_SUCCESS) { /* tried but failed to get some symlink, so mark as failure */ ret = tmp_rc; diff --git a/src/common/mfu_flist_copy.c b/src/common/mfu_flist_copy.c index c7b3638c..cae328d1 100644 --- a/src/common/mfu_flist_copy.c +++ b/src/common/mfu_flist_copy.c @@ -80,6 +80,7 @@ typedef struct { int64_t total_dirs; /* sum of all directories */ int64_t total_files; /* sum of all files */ int64_t total_links; /* sum of all symlinks */ + int64_t total_hardlinks; /* sum of all hardlinks */ int64_t total_size; /* sum of all file sizes */ int64_t total_bytes_copied; /* total bytes written */ time_t time_started; /* time when dcp command started */ @@ -734,6 +735,13 @@ static int mfu_copy_set_metadata( for (idx = 0; idx < size; idx++) { /* TODO: skip file if it's not readable */ + mfu_filetype type = mfu_flist_file_get_type(list, idx); + /* skip hardlinks as metadata changes on reference paths also affect + * hardlinks */ + if (type == MFU_TYPE_HARDLINK) { + continue; + } + /* get source name of item */ const char* name = mfu_flist_file_get_name(list, idx); @@ -1380,7 +1388,7 @@ static int mfu_create_file( /* creates hardlink in destpath for specified file, identifies source path * returns 0 on success and -1 on error */ -static int mfu_create_hardlink( +static int mfu_create_hardlink_dest( mfu_flist list, uint64_t idx, const mfu_param_path* srcpath, @@ -1560,7 +1568,7 @@ static int mfu_create_files( /* creates hardlinks, * returns 0 on success and -1 on error */ -static int mfu_create_hardlinks( +static int mfu_create_hardlinks_dest( int levels, int minlevel, mfu_flist* lists, @@ -1622,16 +1630,16 @@ static int mfu_create_hardlinks( for (idx = 0; idx < size; idx++) { /* get type of item */ mfu_filetype type = mfu_flist_file_get_type(list, idx); - if (type != MFU_TYPE_FILE) { + if (type != MFU_TYPE_FILE && type != MFU_TYPE_HARDLINK) { MFU_LOG(MFU_LOG_ERR, "Can't create link for unregular files."); rc = -1; total_count++; continue; } - int tmp_rc = mfu_create_hardlink(list, idx, srcpath, - destpath, copy_opts, - mfu_src_file, mfu_dst_file); + int tmp_rc = mfu_create_hardlink_dest(list, idx, srcpath, + destpath, copy_opts, + mfu_src_file, mfu_dst_file); if (tmp_rc != 0) { rc = -1; } @@ -1652,6 +1660,171 @@ static int mfu_create_hardlinks( return rc; } +/* tracks number of hardlinks created by this process */ +static uint64_t hardlinks_total_count; + +/* progress message to print while creating hardlinks */ +static void create_hardlinks_progress_fn(const uint64_t* vals, int count, int complete, int ranks, double secs) +{ + /* get number of items created so far */ + uint64_t items = vals[0]; + + /* compute item rate */ + double item_rate = 0.0; + if (secs > 0) { + item_rate = (double)items / secs; + } + + /* compute percentage of items created */ + double percent = 0.0; + if (hardlinks_total_count > 0) { + percent = (double)items * 100.0 / (double)hardlinks_total_count; + } + + /* estimate seconds remaining */ + double secs_remaining = -1.0; + if (item_rate > 0.0) { + secs_remaining = (double)(hardlinks_total_count - items) / item_rate; + } + + if (complete < ranks) { + MFU_LOG(MFU_LOG_INFO, "Created %llu items (%.0f%%) in %.3lf secs (%.3lf items/sec) %.0f secs left ...", + items, percent, secs, item_rate, secs_remaining); + } else { + MFU_LOG(MFU_LOG_INFO, "Created %llu items (%.0f%%) in %.3lf secs (%.3lf items/sec) done", + items, percent, secs, item_rate); + } +} + +/* creates hardlink in destpath for specified file, identifies source path + * returns 0 on success and -1 on error */ +static int mfu_create_hardlink( + mfu_flist list, + uint64_t idx, + int numpaths, + const mfu_param_path* paths, + const mfu_param_path* destpath, + mfu_copy_opts_t* copy_opts, + mfu_file_t* mfu_src_file, + mfu_file_t* mfu_dst_file) +{ + /* assume we'll succeed */ + int rc = 0; + + const char* name = mfu_flist_file_get_name(list, idx); + const char* ref = mfu_flist_file_get_ref(list, idx); + + /* get reference name */ + const char* src_path = mfu_param_path_copy_dest(ref, numpaths, + paths, destpath, copy_opts, mfu_src_file, mfu_dst_file); + + /* get destination name */ + const char* dest_path = mfu_param_path_copy_dest(name, numpaths, + paths, destpath, copy_opts, mfu_src_file, mfu_dst_file); + + /* No need to copy it */ + if (dest_path == NULL) { + return 0; + } + + rc = mfu_hardlink(src_path, dest_path); + if (rc != 0) { + MFU_LOG(MFU_LOG_ERR, "Failed to create hardlink %s --> %s", + dest_path, src_path); + mfu_free(&src_path); + mfu_free(&dest_path); + return rc; + } + + /* free source path */ + mfu_free(&src_path); + + /* free destination path */ + mfu_free(&dest_path); + + /* increment our hardlinks count by one */ + mfu_copy_stats.total_hardlinks++; + + return rc; +} + +/* creates hardlinks, + * returns 0 on success and -1 on error */ +static int mfu_create_hardlinks( + mfu_flist list, + int numpaths, + const mfu_param_path* paths, + const mfu_param_path* destpath, + mfu_copy_opts_t* copy_opts, + mfu_file_t* mfu_src_file, + mfu_file_t* mfu_dst_file) +{ + int rc = 0; + flist_t* flist = (flist_t*)list; + + /* get current rank */ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + /* iterate over items and create hardlink for each */ + uint64_t idx; + uint64_t size = mfu_flist_size(list); + uint64_t hardlinks_local_count = 0; + + /* get type of item */ + for (idx = 0; idx < size; idx++) { + mfu_filetype type = mfu_flist_file_get_type(list, idx); + if (type == MFU_TYPE_HARDLINK) { + hardlinks_local_count++; + } + } + + /* get total for print percent progress while creating */ + hardlinks_total_count = 0; + MPI_Allreduce(&hardlinks_local_count, &hardlinks_total_count, 1, + MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + + /* bail early if there is no work to do */ + if (hardlinks_total_count == 0) { + return rc; + } + + /* indicate to user what phase we're in */ + if (rank == 0) { + MFU_LOG(MFU_LOG_INFO, "Linking %llu files.", hardlinks_total_count); + } + + /* start progress messages for creating files */ + mfu_progress* create_prog = mfu_progress_start(mfu_progress_timeout, 1, + MPI_COMM_WORLD, create_hardlinks_progress_fn); + + uint64_t total_count = 0; + + for (idx = 0; idx < size; idx++) { + mfu_filetype type = mfu_flist_file_get_type(list, idx); + if (type == MFU_TYPE_HARDLINK) { + int tmp_rc = mfu_create_hardlink(list, idx, numpaths, paths, + destpath, copy_opts, + mfu_src_file, mfu_dst_file); + if (tmp_rc != 0) { + rc = -1; + } + + /* update number of files we have created for progress messages */ + total_count++; + mfu_progress_update(&total_count, create_prog); + } + } + + /* wait for all procs to finish */ + MPI_Barrier(MPI_COMM_WORLD); + + /* finalize progress messages */ + mfu_progress_complete(&total_count, &create_prog); + + return rc; +} + /* hold state for copy progress messages */ static mfu_progress* copy_prog; @@ -2699,6 +2872,7 @@ int mfu_flist_copy( mfu_copy_stats.total_dirs = 0; mfu_copy_stats.total_files = 0; mfu_copy_stats.total_links = 0; + mfu_copy_stats.total_hardlinks = 0; mfu_copy_stats.total_size = 0; mfu_copy_stats.total_bytes_copied = 0; @@ -2777,7 +2951,7 @@ int mfu_flist_copy( mfu_flist* lists2; mfu_flist_array_by_depth(spreadlist, &levels2, &minlevel2, &lists2); - /* create files and links */ + /* create files and symlinks */ tmp_rc = mfu_create_files(levels2, minlevel2, lists2, numpaths, paths, destpath, copy_opts, mfu_src_file, mfu_dst_file); if (tmp_rc < 0) { @@ -2820,23 +2994,25 @@ int mfu_flist_copy( double rel_time = mfu_copy_stats.wtime_ended - mfu_copy_stats.wtime_started; /* prep our values into buffer */ - int64_t values[5]; + int64_t values[6]; values[0] = mfu_copy_stats.total_dirs; values[1] = mfu_copy_stats.total_files; values[2] = mfu_copy_stats.total_links; - values[3] = mfu_copy_stats.total_size; - values[4] = mfu_copy_stats.total_bytes_copied; + values[3] = mfu_copy_stats.total_hardlinks; + values[4] = mfu_copy_stats.total_size; + values[5] = mfu_copy_stats.total_bytes_copied; /* sum values across processes */ - int64_t sums[5]; - MPI_Allreduce(values, sums, 5, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); + int64_t sums[6]; + MPI_Allreduce(values, sums, 6, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); /* extract results from allreduce */ - int64_t agg_dirs = sums[0]; - int64_t agg_files = sums[1]; - int64_t agg_links = sums[2]; - int64_t agg_size = sums[3]; - int64_t agg_copied = sums[4]; + int64_t agg_dirs = sums[0]; + int64_t agg_files = sums[1]; + int64_t agg_links = sums[2]; + int64_t agg_hardlinks = sums[3]; + int64_t agg_size = sums[4]; + int64_t agg_copied = sums[5]; /* compute rate of copy */ double agg_rate = (double)agg_copied / rel_time; @@ -2917,6 +3093,13 @@ int mfu_flist_copy( mfu_sync_all("Syncing directory updates to disk."); } + /* create hardlinks */ + tmp_rc = mfu_create_hardlinks(src_cp_list, numpaths, paths, destpath, + copy_opts, mfu_src_file, mfu_dst_file); + if (tmp_rc < 0) { + rc = -1; + } + /* free our lists of levels */ mfu_flist_array_free(levels, &lists); @@ -2933,23 +3116,25 @@ int mfu_flist_copy( mfu_copy_stats.wtime_started; /* prep our values into buffer */ - int64_t values[5]; + int64_t values[6]; values[0] = mfu_copy_stats.total_dirs; values[1] = mfu_copy_stats.total_files; values[2] = mfu_copy_stats.total_links; - values[3] = mfu_copy_stats.total_size; - values[4] = mfu_copy_stats.total_bytes_copied; + values[3] = mfu_copy_stats.total_hardlinks; + values[4] = mfu_copy_stats.total_size; + values[5] = mfu_copy_stats.total_bytes_copied; /* sum values across processes */ - int64_t sums[5]; - MPI_Allreduce(values, sums, 5, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); + int64_t sums[6]; + MPI_Allreduce(values, sums, 6, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); /* extract results from allreduce */ - int64_t agg_dirs = sums[0]; - int64_t agg_files = sums[1]; - int64_t agg_links = sums[2]; - int64_t agg_size = sums[3]; - int64_t agg_copied = sums[4]; + int64_t agg_dirs = sums[0]; + int64_t agg_files = sums[1]; + int64_t agg_links = sums[2]; + int64_t agg_hardlinks = sums[3]; + int64_t agg_size = sums[4]; + int64_t agg_copied = sums[5]; /* compute rate of copy */ double agg_rate = (double)agg_copied / rel_time; @@ -2969,7 +3154,7 @@ int mfu_flist_copy( strftime(endtime_str, 256, "%b-%d-%Y,%H:%M:%S", localend); /* total number of items */ - int64_t agg_items = agg_dirs + agg_files + agg_links; + int64_t agg_items = agg_dirs + agg_files + agg_links + agg_hardlinks; /* convert size to units */ double agg_size_tmp; @@ -2988,6 +3173,7 @@ int mfu_flist_copy( MFU_LOG(MFU_LOG_INFO, " Directories: %" PRId64, agg_dirs); MFU_LOG(MFU_LOG_INFO, " Files: %" PRId64, agg_files); MFU_LOG(MFU_LOG_INFO, " Links: %" PRId64, agg_links); + MFU_LOG(MFU_LOG_INFO, " Hardlinks: %" PRId64, agg_hardlinks); MFU_LOG(MFU_LOG_INFO, "Data: %.3lf %s (%" PRId64 " bytes)", agg_size_tmp, agg_size_units, agg_size); @@ -3381,7 +3567,7 @@ int mfu_flist_hardlink( * under any directories that were created). We can imrove this if someone * has better idea for it. */ /* create hard links */ - tmp_rc = mfu_create_hardlinks(levels, minlevel, lists, + tmp_rc = mfu_create_hardlinks_dest(levels, minlevel, lists, srcpath, destpath, copy_opts, mfu_src_file, mfu_dst_file); if (tmp_rc < 0) { rc = -1; diff --git a/src/common/mfu_flist_internal.h b/src/common/mfu_flist_internal.h index 01606c0b..67f744b3 100644 --- a/src/common/mfu_flist_internal.h +++ b/src/common/mfu_flist_internal.h @@ -40,12 +40,21 @@ typedef struct list_elem { uint64_t ctime; /* create time */ uint64_t ctime_nsec; /* create time nanoseconds */ uint64_t size; /* file size in bytes */ + uint64_t nlink; /* number of links to inode */ + char* ref; /* reference path for hardlinks */ struct list_elem* next; /* pointer to next item */ /* vars for a non-posix DAOS copy */ uint64_t obj_id_lo; uint64_t obj_id_hi; } elem_t; +/* linked list inode/path pairs used during walk */ +typedef struct inodes_hardlinks_map { + uint64_t *inodes; /* array of inodes numbers for each item in hardlinks temporary list */ + uint64_t count; /* number of inodes */ + uint64_t cap; /* current capacity */ +} inodes_hardlink_map_t; + /* holds an array of objects: users, groups, or file data */ typedef struct { void* buf; /* pointer to memory buffer holding data */ diff --git a/src/common/mfu_flist_io.c b/src/common/mfu_flist_io.c index 0b0a2e59..244daf5b 100644 --- a/src/common/mfu_flist_io.c +++ b/src/common/mfu_flist_io.c @@ -221,12 +221,14 @@ static void list_elem_decode(char* buf, elem_t* elem) elem->type = MFU_TYPE_UNKNOWN; } + elem->ref = NULL; + return; } /* create a datatype to hold file name and stat info */ /* return number of bytes needed to pack element */ -static size_t list_elem_pack_size(int detail, uint64_t chars, const elem_t* elem) +static size_t list_elem_pack_size_le4(int detail, uint64_t chars, const elem_t* elem) { size_t size; if (detail) { @@ -238,6 +240,18 @@ static size_t list_elem_pack_size(int detail, uint64_t chars, const elem_t* elem return size; } +static size_t list_elem_pack_size(int detail, uint64_t chars, const elem_t* elem) +{ + size_t size; + if (detail) { + size = chars + 1 * 4 + 11 * 8 + chars; + } + else { + size = chars + 1 * 4; + } + return size; +} + /* pack element into buffer and return number of bytes written */ static size_t list_elem_pack(void* buf, int detail, uint64_t chars, const elem_t* elem) { @@ -246,9 +260,9 @@ static size_t list_elem_pack(void* buf, int detail, uint64_t chars, const elem_t char* ptr = start; /* copy in file name */ - char* file = elem->file; - strncpy(ptr, file, chars); - ptr += chars; + mfu_pack_sized_str(&ptr, elem->file, chars); + + mfu_pack_io_uint32(&ptr, elem->type); if (detail) { mfu_pack_io_uint64(&ptr, elem->mode); @@ -261,18 +275,16 @@ static size_t list_elem_pack(void* buf, int detail, uint64_t chars, const elem_t mfu_pack_io_uint64(&ptr, elem->ctime); mfu_pack_io_uint64(&ptr, elem->ctime_nsec); mfu_pack_io_uint64(&ptr, elem->size); - } - else { - /* just have the file type */ - mfu_pack_io_uint32(&ptr, elem->type); + mfu_pack_io_uint64(&ptr, elem->nlink); + mfu_pack_sized_str(&ptr, elem->ref, chars); } size_t bytes = (size_t)(ptr - start); return bytes; } -/* unpack element from buffer and return number of bytes read */ -static size_t list_elem_unpack(const void* buf, int detail, uint64_t chars, elem_t* elem) +/* unpack element (encoded in format v4 or below) from buffer and return number of bytes read */ +static size_t list_elem_unpack_le4(const void* buf, int detail, uint64_t chars, elem_t* elem) { const char* start = (const char*) buf; const char* ptr = start; @@ -289,6 +301,9 @@ static size_t list_elem_unpack(const void* buf, int detail, uint64_t chars, elem elem->detail = detail; + /* ref is not not supported in format v4 or below */ + elem->ref = NULL; + if (detail) { /* extract fields */ mfu_unpack_io_uint64(&ptr, &elem->mode); @@ -313,6 +328,43 @@ static size_t list_elem_unpack(const void* buf, int detail, uint64_t chars, elem return bytes; } +/* unpack element from buffer and return number of bytes read */ +static size_t list_elem_unpack(const void* buf, int detail, uint64_t chars, elem_t* elem) +{ + const char* start = (const char*) buf; + const char* ptr = start; + + /* get name */ + mfu_unpack_sized_str(&ptr, &elem->file, chars); + + /* set depth */ + elem->depth = mfu_flist_compute_depth(elem->file); + + mfu_unpack_io_uint32(&ptr, &elem->type); + + elem->detail = detail; + + if (detail) { + /* extract fields */ + mfu_unpack_io_uint64(&ptr, &elem->mode); + mfu_unpack_io_uint64(&ptr, &elem->uid); + mfu_unpack_io_uint64(&ptr, &elem->gid); + mfu_unpack_io_uint64(&ptr, &elem->atime); + mfu_unpack_io_uint64(&ptr, &elem->atime_nsec); + mfu_unpack_io_uint64(&ptr, &elem->mtime); + mfu_unpack_io_uint64(&ptr, &elem->mtime_nsec); + mfu_unpack_io_uint64(&ptr, &elem->ctime); + mfu_unpack_io_uint64(&ptr, &elem->ctime_nsec); + mfu_unpack_io_uint64(&ptr, &elem->size); + mfu_unpack_io_uint64(&ptr, &elem->nlink); + mfu_unpack_sized_str(&ptr, &elem->ref, chars); + } else + elem->ref = NULL; + + size_t bytes = (size_t)(ptr - start); + return bytes; +} + /* insert a file given a pointer to packed data */ static void list_insert_decode(flist_t* flist, char* buf) { @@ -328,6 +380,21 @@ static void list_insert_decode(flist_t* flist, char* buf) return; } +/* insert a file given a pointer to packed data */ +static size_t list_insert_ptr_le4(flist_t* flist, char* ptr, int detail, uint64_t chars) +{ + /* create new element to record file path, file type, and stat info */ + elem_t* elem = (elem_t*) MFU_MALLOC(sizeof(elem_t)); + + /* get name and advance pointer */ + size_t bytes = list_elem_unpack_le4(ptr, detail, chars, elem); + + /* append element to tail of linked list */ + mfu_flist_insert_elem(flist, elem); + + return bytes; +} + /* insert a file given a pointer to packed data */ static size_t list_insert_ptr(flist_t* flist, char* ptr, int detail, uint64_t chars) { @@ -803,7 +870,7 @@ static void read_cache_v3( uint64_t packcount = 0; while (packcount < (uint64_t) read_count) { /* unpack item from buffer and advance pointer */ - list_insert_ptr(flist, ptr, 1, chars); + list_insert_ptr_le4(flist, ptr, 1, chars); ptr += extent_file; packcount++; } @@ -988,6 +1055,247 @@ static void read_cache_v4( disp += (MPI_Offset) group_buf_size; } + /* read files, if any */ + if (all_count > 0 && chars > 0) { + /* get size of file element */ + size_t elem_size = list_elem_pack_size_le4(flist->detail, (int)chars, NULL); + + /* in order to avoid blowing out memory, we'll pack into a smaller + * buffer and iteratively make many collective reads */ + + /* allocate a buffer, ensure it's large enough to hold at least one + * complete record */ + size_t bufsize = 1024 * 1024; + if (bufsize < elem_size) { + bufsize = elem_size; + } + void* buf = MFU_MALLOC(bufsize); + + /* compute number of items we can fit in each read iteration */ + uint64_t bufcount = (uint64_t)bufsize / (uint64_t)elem_size; + + /* determine number of iterations we need to read all items */ + uint64_t iters = count / bufcount; + if (iters * bufcount < count) { + iters++; + } + + /* compute max iterations across all procs */ + uint64_t all_iters; + MPI_Allreduce(&iters, &all_iters, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD); + + /* set file view to be sequence of datatypes past header */ + mpirc = MPI_File_set_view(fh, disp, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + /* compute byte offset to read our element */ + MPI_Offset read_offset = (MPI_Offset)offset * elem_size; + + /* iterate with multiple reads until all records are read */ + uint64_t totalcount = 0; + while (all_iters > 0) { + /* determine number to read */ + int read_count = (int) bufcount; + uint64_t remaining = count - totalcount; + if (remaining < bufcount) { + read_count = (int) remaining; + } + + /* TODO: read_at_all w/ external32 is broken in ROMIO as of MPICH-3.2rc1 */ + + /* compute number of bytes to read */ + int read_size = read_count * (int)elem_size; + + /* issue a collective read */ + //MPI_File_read_at_all(fh, read_offset, buf, read_size, MPI_BYTE, &status); + mpirc = MPI_File_read_at(fh, read_offset, buf, read_size, MPI_BYTE, &status); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to read file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + /* update our offset with the number of items we just read */ + read_offset += (MPI_Offset)read_size; + totalcount += (uint64_t) read_count; + + /* unpack data from buffer into list */ + char* ptr = (char*) buf; + uint64_t packcount = 0; + while (packcount < (uint64_t) read_count) { + /* unpack item from buffer and advance pointer */ + list_insert_ptr_le4(flist, ptr, 1, chars); + ptr += elem_size; + packcount++; + } + + /* one less iteration */ + all_iters--; + } + + /* free buffer */ + mfu_free(&buf); + } + + /* create maps of users and groups */ + mfu_flist_usrgrp_create_map(&flist->users, flist->user_id2name); + mfu_flist_usrgrp_create_map(&flist->groups, flist->group_id2name); + + *outdisp = disp; + return; +} + +/* file format: same as v4 except nlink and ref added in list elements to + * support hardlinks */ + static void read_cache_v5( + const char* name, + MPI_Offset* outdisp, + MPI_File fh, + const char* datarep, + flist_t* flist) +{ + MPI_Status status; + + MPI_Offset disp = *outdisp; + + /* indicate that we have stat data */ + flist->detail = 1; + + /* pointer to users, groups, and file buffer data structure */ + buf_t* users = &flist->users; + buf_t* groups = &flist->groups; + + /* get our rank */ + int rank, ranks; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &ranks); + + /* rank 0 reads and broadcasts header */ + uint64_t header[6]; + int header_size = 6 * 8; /* 6 consecutive uint64_t */ + int mpirc = MPI_File_set_view(fh, disp, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + if (rank == 0) { + uint64_t header_packed[6]; + mpirc = MPI_File_read_at(fh, 0, header_packed, header_size, MPI_BYTE, &status); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to read file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + const char* ptr = (const char*) header_packed; + mfu_unpack_io_uint64(&ptr, &header[0]); + mfu_unpack_io_uint64(&ptr, &header[1]); + mfu_unpack_io_uint64(&ptr, &header[2]); + mfu_unpack_io_uint64(&ptr, &header[3]); + mfu_unpack_io_uint64(&ptr, &header[4]); + mfu_unpack_io_uint64(&ptr, &header[5]); + } + MPI_Bcast(header, 6, MPI_UINT64_T, 0, MPI_COMM_WORLD); + disp += header_size; + + uint64_t all_count; + users->count = header[0]; + users->chars = header[1]; + groups->count = header[2]; + groups->chars = header[3]; + all_count = header[4]; + uint64_t chars = header[5]; + + /* compute count for each process */ + uint64_t count = all_count / (uint64_t)ranks; + uint64_t remainder = all_count - count * (uint64_t)ranks; + if ((uint64_t)rank < remainder) { + count++; + } + + /* get our offset */ + uint64_t offset; + MPI_Exscan(&count, &offset, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + if (rank == 0) { + offset = 0; + } + + /* read users, if any */ + if (users->count > 0 && users->chars > 0) { + /* create type */ + mfu_flist_usrgrp_create_stridtype((int)users->chars, &(users->dt)); + + /* get extent */ + MPI_Aint lb_user, extent_user; + MPI_Type_get_extent(users->dt, &lb_user, &extent_user); + + /* allocate memory to hold data */ + size_t bufsize_user = users->count * (size_t)extent_user; + users->buf = (void*) MFU_MALLOC(bufsize_user); + users->bufsize = bufsize_user; + + /* set view to read data */ + mpirc = MPI_File_set_view(fh, disp, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + /* read data */ + int user_buf_size = (int) buft_pack_size(users); + if (rank == 0) { + char* user_buf = (char*) MFU_MALLOC(user_buf_size); + mpirc = MPI_File_read_at(fh, 0, user_buf, user_buf_size, MPI_BYTE, &status); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to read file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + buft_unpack(user_buf, users); + mfu_free(&user_buf); + } + MPI_Bcast(users->buf, (int)users->count, users->dt, 0, MPI_COMM_WORLD); + disp += (MPI_Offset) user_buf_size; + } + + /* read groups, if any */ + if (groups->count > 0 && groups->chars > 0) { + /* create type */ + mfu_flist_usrgrp_create_stridtype((int)groups->chars, &(groups->dt)); + + /* get extent */ + MPI_Aint lb_group, extent_group; + MPI_Type_get_extent(groups->dt, &lb_group, &extent_group); + + /* allocate memory to hold data */ + size_t bufsize_group = groups->count * (size_t)extent_group; + groups->buf = (void*) MFU_MALLOC(bufsize_group); + groups->bufsize = bufsize_group; + + /* set view to read data */ + mpirc = MPI_File_set_view(fh, disp, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + /* read data */ + int group_buf_size = (int) buft_pack_size(groups); + if (rank == 0) { + char* group_buf = (char*) MFU_MALLOC(group_buf_size); + mpirc = MPI_File_read_at(fh, 0, group_buf, group_buf_size, MPI_BYTE, &status); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to read file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + buft_unpack(group_buf, groups); + mfu_free(&group_buf); + } + MPI_Bcast(groups->buf, (int)groups->count, groups->dt, 0, MPI_COMM_WORLD); + disp += (MPI_Offset) group_buf_size; + } + /* read files, if any */ if (all_count > 0 && chars > 0) { /* get size of file element */ @@ -1139,7 +1447,9 @@ void mfu_flist_read_cache( disp += 1 * 8; /* 9 consecutive uint64_t types in external32 */ /* read data from file */ - if (version == 4) { + if (version == 5) { + read_cache_v5(name, &disp, fh, datarep, flist); + } else if (version == 4) { read_cache_v4(name, &disp, fh, datarep, flist); } else if (version == 3) { /* need a couple of dummy params to record walk start and end times */ @@ -1326,7 +1636,7 @@ static void write_cache_readdir_variable( return; } -static void write_cache_stat_v4( +static void write_cache_stat_v5( const char* name, flist_t* flist) { @@ -1389,7 +1699,7 @@ static void write_cache_stat_v4( int header_bytes = 7 * 8; uint64_t header[7]; char* ptr = (char*) header; - mfu_pack_io_uint64(&ptr, 4); /* file version */ + mfu_pack_io_uint64(&ptr, 5); /* file version */ mfu_pack_io_uint64(&ptr, users->count); /* number of user records */ mfu_pack_io_uint64(&ptr, users->chars); /* number of chars in user name */ mfu_pack_io_uint64(&ptr, groups->count); /* number of group records */ @@ -1563,7 +1873,7 @@ void mfu_flist_write_cache( if (all_count > 0) { if (flist->detail) { - write_cache_stat_v4(name, flist); + write_cache_stat_v5(name, flist); } else { write_cache_readdir_variable(name, flist); diff --git a/src/common/mfu_flist_walk.c b/src/common/mfu_flist_walk.c index 7a5ce6ec..a22097d6 100644 --- a/src/common/mfu_flist_walk.c +++ b/src/common/mfu_flist_walk.c @@ -48,6 +48,8 @@ static uint64_t CURRENT_NUM_DIRS; static const char** CURRENT_DIRS; static flist_t* CURRENT_LIST; +static flist_t* HARDLINKS_TMP_LIST; +static inodes_hardlink_map_t* HARDLINKS_INODES_MAP; static int SET_DIR_PERMS; static int REMOVE_FILES; static int DEREFERENCE; @@ -494,6 +496,67 @@ static void walk_stat_create(CIRCLE_handle* handle) } } +/* allocate and initialize a new inodes map */ +inodes_hardlink_map_t* inodes_map_new() +{ + /* allocate memory for map, cast it to handle, initialize and return */ + inodes_hardlink_map_t* map = (inodes_hardlink_map_t*) MFU_MALLOC(sizeof(inodes_hardlink_map_t)); + + map->inodes = NULL; + map->count = 0; + map->cap = 0; + + return map; +} + +/* free memory of inodes map */ +inodes_hardlink_map_t* inodes_map_free(inodes_hardlink_map_t** map) +{ + mfu_free(&(*map)->inodes); + mfu_free(map); +} + +/* add new element to running list index, allocates additional + * capactiy for index if needed */ +static void inodes_map_insert(inodes_hardlink_map_t* map, uint64_t inode) +{ + /* if we have no capacity for the index, + * initialize with a small array */ + uint64_t cap = map->cap; + if (cap == 0) { + /* have no index at all, initialize it */ + uint64_t new_capacity = 32; + size_t index_size = new_capacity * sizeof(uint64_t); + map->inodes = (uint64_t*) MFU_MALLOC(index_size); + map->cap = new_capacity; + } + + map->count++; + + /* check that our index has space before we add it */ + uint64_t count = map->count; + if (count == cap) { + /* we have exhausted the current capacity of the index array, + * allocate a new memory region that is double the size */ + uint64_t new_capacity = cap * 2; + size_t index_size = new_capacity * sizeof(uint64_t); + uint64_t* new_inodes = (uint64_t*) MFU_MALLOC(index_size); + + /* copy over existing list */ + memcpy(new_inodes, map->inodes, count * sizeof(uint64_t)); + + /* free the old index memory and assign the new one */ + mfu_free(&map->inodes); + map->inodes = new_inodes; + map->cap = new_capacity; + } + + /* append the item to the index */ + map->inodes[count - 1] = inode; + + return; +} + /** Callback given to process the dataset. */ static void walk_stat_process(CIRCLE_handle* handle) { @@ -527,8 +590,13 @@ static void walk_stat_process(CIRCLE_handle* handle) if (REMOVE_FILES && !S_ISDIR(st.st_mode)) { mfu_file_unlink(path, mfu_file); } else { - /* record info for item in list */ - mfu_flist_insert_stat(CURRENT_LIST, path, st.st_mode, &st); + if (S_ISREG(st.st_mode) && st.st_nlink > 1) { + /* record info for item in temporary hardlinks list and inodes map */ + mfu_flist_insert_stat(HARDLINKS_TMP_LIST, path, st.st_mode, &st); + inodes_map_insert(HARDLINKS_INODES_MAP, (uint64_t)st.st_ino); + } else + /* record info for item in list */ + mfu_flist_insert_stat(CURRENT_LIST, path, st.st_mode, &st); } /* recurse into directory */ @@ -552,6 +620,281 @@ static void walk_stat_process(CIRCLE_handle* handle) return; } +/* sort elements in flist and inodes by name and place them in sorted_list and + * sorted_inodes respectively. */ +static void walk_hardlinks_sort_names(flist_t* flist, inodes_hardlink_map_t* inodes, flist_t** sorted_flist, inodes_hardlink_map_t** sorted_inodes) { + + uint64_t incount = mfu_flist_size(flist); + uint64_t chars = mfu_flist_file_max_name(flist); + + /* create datatype for packed file list element */ + MPI_Datatype dt_elem; + size_t bytes = mfu_flist_file_pack_size(flist); + MPI_Type_contiguous((int)bytes, MPI_BYTE, &dt_elem); + + MPI_Datatype dt_key; + DTCMP_Op op_str; + DTCMP_Str_create_ascend(chars, &dt_key, &op_str); + + /* build keysat type */ + MPI_Datatype dt_keysat, keysat_types[3] = { dt_key, MPI_UINT64_T, dt_elem }; + if (DTCMP_Type_create_series(3, keysat_types, &dt_keysat) != DTCMP_SUCCESS) { + MFU_ABORT(1, "Failed to create keysat type"); + } + + /* get extent of key type */ + MPI_Aint key_lb, key_extent; + MPI_Type_get_extent(dt_key, &key_lb, &key_extent); + + /* get extent of keysat type */ + MPI_Aint inode_lb, inode_extent; + MPI_Type_get_extent(MPI_UINT64_T, &inode_lb, &inode_extent); + + /* get extent of keysat type */ + MPI_Aint keysat_lb, keysat_extent; + MPI_Type_get_extent(dt_keysat, &keysat_lb, &keysat_extent); + + /* compute size of sort element and allocate buffer */ + size_t sortbufsize = (size_t)keysat_extent * incount; + void* sortbuf = MFU_MALLOC(sortbufsize); + + /* copy data into sort elements */ + char* sortptr = (char*) sortbuf; + for (uint64_t idx=0; idxinodes[idx]; + + sortptr += inode_extent; + /* pack file element */ + sortptr += mfu_flist_file_pack(sortptr, flist, idx); + } + + /* sort data */ + void* outsortbuf; + int outsortcount; + DTCMP_Handle handle; + int sort_rc = DTCMP_Sortz( + sortbuf, (int)incount, &outsortbuf, &outsortcount, + dt_key, dt_keysat, op_str, DTCMP_FLAG_NONE, + MPI_COMM_WORLD, &handle + ); + if (sort_rc != DTCMP_SUCCESS) { + MFU_ABORT(1, "Failed to sort data"); + } + + /* free input buffer holding sort elements */ + mfu_free(&sortbuf); + + /* create a new list as subset of original list */ + *sorted_flist = mfu_flist_subset(flist); + *sorted_inodes = inodes_map_new(); + + /* step through sorted data filenames */ + sortptr = (char*) outsortbuf; + for (uint64_t idx=0; idx<(uint64_t)outsortcount; idx++) { + sortptr += key_extent; + inodes_map_insert(*sorted_inodes, *(uint64_t*)sortptr); + sortptr += inode_extent; + sortptr += mfu_flist_file_unpack(sortptr, *sorted_flist); + } + + /* compute summary of new list */ + mfu_flist_summarize(*sorted_flist); + + /* free memory */ + DTCMP_Free(&handle); + + DTCMP_Op_free(&op_str); + MPI_Type_free(&dt_keysat); + MPI_Type_free(&dt_key); + MPI_Type_free(&dt_elem); + +} + +/* rank elements in flist by inodes in order to determine reference and secondary + * links (aka. hardlinks). */ +static void walk_hardlinks_rank(flist_t* flist, inodes_hardlink_map_t* inodes) { + + uint64_t incount = mfu_flist_size(flist); + uint64_t chars = mfu_flist_file_max_name(flist); + + uint64_t* rankbuf = NULL; + if(incount) + rankbuf = (uint64_t*) MFU_MALLOC(sizeof(uint64_t)*incount); + + for(int idx=0; idxinodes[idx]; + + uint64_t groups = 0; + uint64_t output_bytes = incount * sizeof(uint64_t); + uint64_t* group_id = (uint64_t*) MFU_MALLOC(output_bytes); + uint64_t* group_ranks = (uint64_t*) MFU_MALLOC(output_bytes); + uint64_t* group_rank = (uint64_t*) MFU_MALLOC(output_bytes); + int rank_rc = DTCMP_Rankv( + (int)incount, rankbuf, &groups, group_id, group_ranks, + group_rank, MPI_UINT64_T, MPI_UINT64_T, DTCMP_OP_UINT64T_ASCEND, DTCMP_FLAG_NONE, + MPI_COMM_WORLD); + + if (rank_rc != DTCMP_SUCCESS) { + MFU_ABORT(1, "Failed to rank hardlinks inodes"); + } + + /* The rank 0 is considered the reference link to the inode (ie. the regular + * file). Set file type MFU_TYPE_HARDLINK on all other elements. */ + for(int idx=0; idxinodes[idx]; + sendptr += inode_extent; + strncpy(sendptr, name, chars); + sendptr += (struct_extent - inode_extent); + } + } + } + + MPI_Allgatherv(sendbuf, nb_local_refs, dt_struct, recvbuf, recvcounts, recvdispls, dt_struct, MPI_COMM_WORLD); + + /* set reference on all local hardlinks */ + char* recvptr = (char*) recvbuf; + for (int i = 0; i < (int) ranks; i++) { + for (int j = 0; j < recvcounts[i]; j++) { + uint64_t inode = *(uint64_t *)recvptr; + const char* ref = recvptr + inode_extent; + /* look for indexes with the name inode and set the refs accordingly */ + for (int idx = 0; idx < incount; idx++) { + mfu_filetype type = mfu_flist_file_get_type(flist, idx); + if(inodes->inodes[idx] == inode && type == MFU_TYPE_HARDLINK) { + mfu_flist_file_set_ref(flist, idx, ref); + } + } + recvptr += struct_extent; + } + } + + mfu_free(&recvcounts); + mfu_free(&recvdispls); + mfu_free(&recvbuf); + mfu_free(&sendbuf); + MPI_Type_free(&dt_struct); + +} + +/* extend flist with add all items from sorted_hardlinks_flist */ +static void walk_hardlinks_merge(flist_t* flist, flist_t* sorted_hardlinks_flist) { + + uint64_t incount = mfu_flist_size(sorted_hardlinks_flist); + for(uint64_t idx=0; idxdetail = 0; if (walk_opts->use_stat) { flist->detail = 1; + HARDLINKS_TMP_LIST->detail = 1; if (flist->have_users == 0) { mfu_flist_usrgrp_get_users(flist); } @@ -667,6 +1013,12 @@ int mfu_flist_walk_paths(uint64_t num_paths, const char** paths, CIRCLE_begin(); CIRCLE_finalize(); + /* compute hardlinks temporary list global summary */ + mfu_flist_summarize(HARDLINKS_TMP_LIST); + + /* resolve hardlinks and merge them in flist */ + walk_resolve_hardlinks(flist, HARDLINKS_TMP_LIST, HARDLINKS_INODES_MAP); + /* compute global summary */ mfu_flist_summarize(bflist); @@ -743,9 +1095,13 @@ void mfu_flist_stat( mfu_file_t* mfu_file) { flist_t* file_list = (flist_t*)flist; + /* lists to track and resolve hardlinks */ + flist_t* hardlinks_tmp_list = mfu_flist_new(); + inodes_hardlink_map_t* hardlinks_inodes_map = inodes_map_new(); /* we will stat all items in output list, so set detail to 1 */ file_list->detail = 1; + hardlinks_tmp_list->detail = 1; /* get user data if needed */ if (file_list->have_users == 0) { @@ -809,10 +1165,21 @@ void mfu_flist_stat( } } - /* insert item into output list */ - mfu_flist_insert_stat(flist, name, st.st_mode, &st); + if (S_ISREG(st.st_mode) && st.st_nlink > 1) { + /* record info for item in temporary hardlinks list and inodes map */ + mfu_flist_insert_stat(hardlinks_tmp_list, name, st.st_mode, &st); + inodes_map_insert(hardlinks_inodes_map, (uint64_t)st.st_ino); + } else + /* record info for item in list */ + mfu_flist_insert_stat(flist, name, st.st_mode, &st); + } + /* compute hardlinks temporary list global summary */ + mfu_flist_summarize(hardlinks_tmp_list); + /* resolve hardlinks and merge them in flist */ + walk_resolve_hardlinks(flist, hardlinks_tmp_list, hardlinks_inodes_map); + /* compute global summary */ mfu_flist_summarize(flist); } diff --git a/src/common/mfu_util.c b/src/common/mfu_util.c index 5b374cfc..7d58a358 100644 --- a/src/common/mfu_util.c +++ b/src/common/mfu_util.c @@ -577,6 +577,26 @@ void mfu_unpack_uint64(const char** pptr, uint64_t* value) *pptr += 8; } +void mfu_pack_sized_str(char** pptr, char* value, uint64_t chars) +{ + char* ptr = *pptr; + if (value == NULL) + *ptr = (char) 0; + else + strncpy(ptr, value, (size_t)chars); + *pptr += chars; +} + +void mfu_unpack_sized_str(const char** pptr, char** value, uint64_t chars) +{ + const char* ptr = *pptr; + if(*ptr == 0) + *value = NULL; + else + *value = MFU_STRDUP(ptr); + *pptr += chars; +} + /* Bob Jenkins one-at-a-time hash: http://en.wikipedia.org/wiki/Jenkins_hash_function */ uint32_t mfu_hash_jenkins(const char* key, size_t len) { diff --git a/src/common/mfu_util.h b/src/common/mfu_util.h index f8e25720..1b6a8da4 100644 --- a/src/common/mfu_util.h +++ b/src/common/mfu_util.h @@ -236,6 +236,14 @@ void mfu_pack_uint64(char** pptr, uint64_t value); * host order and advance pointer */ void mfu_unpack_uint64(const char** pptr, uint64_t* value); +/* given address of pointer to buffer, pack value into buffer in + * network order and advance pointer */ + void mfu_pack_sized_str(char** pptr, char* value, uint64_t chars); + + /* given address of pointer to buffer, unpack value into buffer in + * host order and advance pointer */ + void mfu_unpack_sized_str(const char** pptr, char** value, uint64_t chars); + /* Bob Jenkins one-at-a-time hash: http://en.wikipedia.org/wiki/Jenkins_hash_function */ uint32_t mfu_hash_jenkins(const char* key, size_t len); diff --git a/src/dcmp/dcmp.c b/src/dcmp/dcmp.c index 082e849c..3e397af0 100644 --- a/src/dcmp/dcmp.c +++ b/src/dcmp/dcmp.c @@ -1040,11 +1040,9 @@ static int dcmp_strmap_compare( dcmp_strmap_item_update(src_map, key, DCMPF_EXIST, DCMPS_COMMON); dcmp_strmap_item_update(dst_map, key, DCMPF_EXIST, DCMPS_COMMON); - /* get modes of files */ - mode_t src_mode = (mode_t) mfu_flist_file_get_mode(src_list, - src_index); - mode_t dst_mode = (mode_t) mfu_flist_file_get_mode(dst_list, - dst_index); + /* get types of files */ + mfu_filetype src_type = mfu_flist_file_get_type(src_list, src_index); + mfu_filetype dst_type = mfu_flist_file_get_type(dst_list, dst_index); tmp_rc = dcmp_compare_metadata(src_list, src_map, src_index, dst_list, dst_map, dst_index, @@ -1061,7 +1059,7 @@ static int dcmp_strmap_compare( } /* check whether files are of the same type */ - if ((src_mode & S_IFMT) != (dst_mode & S_IFMT)) { + if (src_type != dst_type) { /* file type is different, no need to go any futher */ dcmp_strmap_item_update(src_map, key, DCMPF_TYPE, DCMPS_DIFFER); dcmp_strmap_item_update(dst_map, key, DCMPF_TYPE, DCMPS_DIFFER); @@ -1084,16 +1082,17 @@ static int dcmp_strmap_compare( continue; } - /* for now, we can only compare content of regular files and symlinks */ - if (! S_ISREG(dst_mode) && ! S_ISLNK(dst_mode)) { - /* not regular file or symlink, take them as common content */ + /* for now, we can only compare content of regular files, symlinks and + * hardlinks targets */ + if (dst_type != MFU_TYPE_FILE && dst_type != MFU_TYPE_LINK && dst_type != MFU_TYPE_HARDLINK) { + /* not regular file, take them as common content */ dcmp_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_COMMON); dcmp_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_COMMON); continue; } /* For symlinks, compare targets */ - if (S_ISLNK(dst_mode)) { + if (dst_type == MFU_TYPE_LINK) { const char* src_name = mfu_flist_file_get_name(src_list, src_index); const char* dst_name = mfu_flist_file_get_name(dst_list, dst_index); int compare_rc = mfu_compare_symlinks(src_name, dst_name, mfu_src_file, mfu_dst_file); @@ -1116,6 +1115,22 @@ static int dcmp_strmap_compare( continue; } + /* compare hardlink references */ + if (dst_type == MFU_TYPE_HARDLINK) { + const char* src_ref = mfu_flist_file_get_ref(src_list, src_index) + strlen_prefix; + const char* dst_ref = mfu_flist_file_get_ref(dst_list, dst_index) + strlen(dest_path->path); + if(!strcmp(src_ref, dst_ref)) { + /* update to say contents of the hardlinks were found to be the same */ + dcmp_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_COMMON); + dcmp_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_COMMON); + } else { + /* update to say contents of the hardlinks were found to be different */ + dcmp_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + dcmp_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + } + continue; + } + dcmp_state state; tmp_rc = dcmp_strmap_item_state(src_map, key, DCMPF_SIZE, &state); assert(tmp_rc == 0); diff --git a/src/dsync/dsync.c b/src/dsync/dsync.c index 2b4276f4..665a889f 100644 --- a/src/dsync/dsync.c +++ b/src/dsync/dsync.c @@ -1581,6 +1581,172 @@ static int dsync_strmap_compare_link_dest( return rc; } +/* For all local references files (ie. regular files with nlink > 1), flag all + * hardlinks using these files as references as having different content and + * place them in destination removal list and source copy list. Return -1 on + * error on any task. */ +static int dsync_remove_hardlinks_with_removed_refs( + mfu_flist src_list, + mfu_flist src_cp_list, + strmap* src_map, + mfu_flist dst_list, + mfu_flist dst_remove_list, + strmap* dst_map, + mfu_file_t* mfu_src_file, + mfu_file_t* mfu_dst_file +) { + /* assume we'll succeed */ + int rc = 0; + int tmp_rc; + + uint64_t chars = mfu_flist_file_max_name(dst_remove_list); + + /* bail out if there is nothing removed in destination */ + if (!chars) { + return 0; + } + + int ranks; + MPI_Comm_size(MPI_COMM_WORLD, &ranks); + + /* Count all local references selected for removal. */ + int local_removed_refs = 0; + uint64_t remove_count = mfu_flist_size(dst_remove_list); + for(uint64_t idx=0; idx 1) { + local_removed_refs++; + } + } + + /* get number of references removed by all tasks */ + int* recvcounts = (int*) MFU_MALLOC(ranks * sizeof(int)); + MPI_Allgather(&local_removed_refs, 1, MPI_INT, + recvcounts, 1, MPI_INT, MPI_COMM_WORLD); + + MPI_Aint char_lb, char_extent; + MPI_Type_get_extent(MPI_CHAR, &char_lb, &char_extent); + + /* compute displacements and total number of bytes that we'll receive */ + size_t allbytes = 0; + int disp = 0; + int* recvdispls = (int*) MFU_MALLOC(ranks * sizeof(int)); + + for (int i = 0; i < (int) ranks; i++) { + /* adjust values in recvcounts for MPI_Allgatherv() */ + recvcounts[i] *= chars; + recvdispls[i] = disp; + disp += (int) recvcounts[i]; + allbytes += (size_t) recvcounts[i]; + } + + /* allocate memory for recv buffers */ + char* recvbuf = MFU_MALLOC(allbytes); + void* sendbuf = NULL; + + /* fill sendbuf with names of local references that will be removed */ + if (local_removed_refs) { + sendbuf = MFU_MALLOC((size_t)char_extent * chars * local_removed_refs); + char* sendptr = (char*) sendbuf; + for(int idx=0; idx 1) { + strncpy(sendptr, name, chars); + sendptr += char_extent * chars; + } + } + } + + MPI_Allgatherv(sendbuf, local_removed_refs * chars, MPI_CHAR, + recvbuf, recvcounts, recvdispls, MPI_CHAR, MPI_COMM_WORLD); + + /* iterate of all reference names received */ + uint64_t count = mfu_flist_size(dst_list); + char* recvptr = (char*) recvbuf; + for (int i = 0; i < (int) ranks; i++) { + size_t ref_size = char_extent * chars; + char *rank_next = recvptr + recvcounts[i]; + while(recvptr < rank_next) { + const char* removed_ref = recvptr; + /* Search for local hardlinks with this reference and mark them to + * be removed. */ + const strmap_node* node; + strmap_foreach(dst_map, node) { + /* get file name */ + const char* key = strmap_node_key(node); + + /* get index of destination file */ + uint64_t dst_index; + int tmp_rc; + tmp_rc = dsync_strmap_item_index(dst_map, key, &dst_index); + assert(tmp_rc == 0); + + /* get index of source file */ + uint64_t src_index; + tmp_rc = dsync_strmap_item_index(src_map, key, &src_index); + /* skip item if it only exists in the destination */ + if (tmp_rc) + continue; + + /* skip item if not hardlink */ + mfu_filetype type = mfu_flist_file_get_type(dst_list, dst_index); + if(type != MFU_TYPE_HARDLINK) + continue; + + /* skip item if reference does not match */ + const char *ref = mfu_flist_file_get_ref(dst_list, dst_index); + if(strcmp(ref, removed_ref)) + continue; + + /* skip item if type or content already differ */ + dsync_state state; + + tmp_rc = dsync_strmap_item_state(dst_map, key, DCMPF_TYPE, &state); + assert(tmp_rc == 0); + if (state == DCMPS_DIFFER) + continue; + + tmp_rc = dsync_strmap_item_state(dst_map, key, DCMPF_CONTENT, &state); + assert(tmp_rc == 0); + if (state == DCMPS_DIFFER) + continue; + + /* Update to say contents of the hardlinks were found to be + * different */ + dsync_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + dsync_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + + /* Unless dry run mode, mark the file to be removed in + * destination and copied from source. */ + if (!options.dry_run) { + mfu_flist_file_copy(src_list, src_index, src_cp_list); + mfu_flist_file_copy(dst_list, dst_index, dst_remove_list); + } + } + /* jump to next received path for this rank */ + recvptr += ref_size; + } + /* jump to next received path */ + recvptr = rank_next; + } + + mfu_free(&recvcounts); + mfu_free(&recvdispls); + mfu_free(&sendbuf); + mfu_free(&recvbuf); + + /* determine whether any process hit an error, + * input is either 0 or -1, so MIN will return -1 if any */ + int all_rc; + MPI_Allreduce(&rc, &all_rc, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + rc = all_rc; + + return rc; +} + /* compare entries from src into dst */ static int dsync_strmap_compare( mfu_flist src_list, @@ -1712,14 +1878,11 @@ static int dsync_strmap_compare( continue; } - /* get modes of files */ - mode_t src_mode = (mode_t) mfu_flist_file_get_mode(src_list, - src_index); - mode_t dst_mode = (mode_t) mfu_flist_file_get_mode(dst_list, - dst_index); + mfu_filetype src_type = mfu_flist_file_get_type(src_list, src_index); + mfu_filetype dst_type = mfu_flist_file_get_type(dst_list, dst_index); /* check whether files are of the same type */ - if ((src_mode & S_IFMT) != (dst_mode & S_IFMT)) { + if (src_type != dst_type) { /* file type is different, no need to go any futher */ dsync_strmap_item_update(src_map, key, DCMPF_TYPE, DCMPS_DIFFER); dsync_strmap_item_update(dst_map, key, DCMPF_TYPE, DCMPS_DIFFER); @@ -1751,8 +1914,8 @@ static int dsync_strmap_compare( continue; } - /* for now, we can only compare content of regular files and symlinks */ - if (! S_ISREG(dst_mode) && ! S_ISLNK(dst_mode)) { + /* for now, we can only compare content of regular files, symlinks and hardlinks */ + if (dst_type != MFU_TYPE_FILE && dst_type != MFU_TYPE_LINK && dst_type != MFU_TYPE_HARDLINK) { /* not regular file or symlink, take them as common content */ dsync_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_COMMON); dsync_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_COMMON); @@ -1761,7 +1924,7 @@ static int dsync_strmap_compare( /* if symlink, check if targets of source and destination files match. If not, * mark the files as being different. */ - if (S_ISLNK(dst_mode)) { + if (dst_type == MFU_TYPE_LINK) { const char* src_name = mfu_flist_file_get_name(src_list, src_index); const char* dst_name = mfu_flist_file_get_name(dst_list, dst_index); int compare_rc = mfu_compare_symlinks(src_name, dst_name, mfu_src_file, mfu_dst_file); @@ -1791,6 +1954,24 @@ static int dsync_strmap_compare( continue; } + /* compare hardlink references */ + if (dst_type == MFU_TYPE_HARDLINK) { + const char* src_ref = mfu_flist_file_get_ref(src_list, src_index) + strlen_prefix; + const char* dst_ref = mfu_flist_file_get_ref(dst_list, dst_index) + strlen(dest_path->path); + + if(strcmp(src_ref, dst_ref)) { + /* take them as differ content */ + dsync_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + dsync_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + + if (!options.dry_run) { + mfu_flist_file_copy(src_list, src_index, src_cp_list); + mfu_flist_file_copy(dst_list, dst_index, dst_remove_list); + } + } + continue; + } + /* first check whether file sizes match */ dsync_state state; tmp_rc = dsync_strmap_item_state(src_map, key, DCMPF_SIZE, &state); @@ -1874,6 +2055,16 @@ static int dsync_strmap_compare( } } + mfu_flist_summarize(dst_remove_list); + + /* For all references (ie. regular files with nlink > 1) in dst_remove_list, + * select all hardlinks pointing to this reference for removal as well. */ + tmp_rc = dsync_remove_hardlinks_with_removed_refs(src_list, src_cp_list, + src_map, dst_list, dst_remove_list, dst_map, mfu_src_file, mfu_dst_file); + if (tmp_rc < 0) { + rc = -1; + } + /* wait for all procs to finish before stopping timer */ MPI_Barrier(MPI_COMM_WORLD); diff --git a/test/tests/__init__.py b/test/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/tests/lib.py b/test/tests/lib.py new file mode 100644 index 00000000..cde64528 --- /dev/null +++ b/test/tests/lib.py @@ -0,0 +1,425 @@ +#!/usr/bin/python3 + +import unittest +import dataclasses +from pathlib import Path +import tempfile +import os +import stat +import subprocess +import typing as t +import shlex + +import yaml + + +def mpirun_cmd(): + mpirun = os.environ.get("MFU_MPIRUN_CMD", "mpirun") + args = os.environ.get("MFU_MPIRUN_ARGS") + if args is None: + args = [] + else: + args = args.split(" ") + return [mpirun] + args + + +def mfu_cmd(cmd): + return mpirun_cmd() + [os.path.join(os.environ["MFU_BIN"], cmd)] + + +def dfilemaker_cmd(): + return mfu_cmd("dfilemaker") + + +def dwalk_cmd(): + return mfu_cmd("dwalk") + + +def dfind_cmd(): + return mfu_cmd("dfind") + + +def dsync_cmd(): + return mfu_cmd("dsync") + + +def dcp_cmd(): + return mfu_cmd("dcp") + + +def dcmp_cmd(): + return mfu_cmd("dcmp") + + +def dtar_cmd(): + return mfu_cmd("dtar") + + +def create_file(path: Path): + fh = open(path, "w+") + fh.close() + + +# Global variables holding the real dataclasses instanciated in tests for +# comparisons. +RegularFile = None +Symlink = None +Directory = None + + +@dataclasses.dataclass +class FilesystemObject: + parent: Path = dataclasses.field(compare=False) + name: str + mode: int + uid: int + gid: int + mtime: int + + @property + def path(self) -> Path: + return self.parent / self.name + + +@dataclasses.dataclass +class _RegularFile(FilesystemObject): + inode: int = dataclasses.field(compare=False) + nlink: int + size: int + + def __str__(self): + return ( + f"File[{self.path} inode:{self.inode} nlink:{self.nlink}, " + f"size:{self.size}, mode: {stat.filemode(self.mode)}, " + f"uid:{self.uid}, gid:{self.gid}, mtime:{self.mtime}]" + ) + + +@dataclasses.dataclass +class _Symlink(FilesystemObject): + target: str + + def __str__(self): + return ( + f"Symlink[{self.path}→{self.target} " + f"mode: {stat.filemode(self.mode)}, " + f"uid:{self.uid}, gid:{self.gid}, mtime:{self.mtime}]" + ) + + +@dataclasses.dataclass +class _Directory(FilesystemObject): + content: dict = dataclasses.field(default_factory=dict) + + def get(self, path: str): + value = self.content + components = path.split(os.path.sep) + for component in components[:-1]: + value = value.get(component) + return value.get(components[-1]) + + def __str__(self): + return ( + f"Directory[{self.path} " + f"mode: {stat.filemode(self.mode)}, " + f"uid:{self.uid}, gid:{self.gid}, mtime:{self.mtime}]" + ) + + def dump(self, indent=0): + if not self.content: + print(f"{' '*indent}āˆ…") + for name, item in self.content.items(): + print(f"{' '*indent}{name:20}: {item}") + if isinstance(item, Directory): + item.dump(indent + 2) + + @classmethod + def from_path(cls, path: Path): + fs_o_stat = path.stat() + dir_o = cls( + path.parent, + path.name, + fs_o_stat.st_mode, + fs_o_stat.st_uid, + fs_o_stat.st_gid, + fs_o_stat.st_mtime, + ) + for item in Path(path).iterdir(): + fs_o_stat = item.lstat() + if item.is_symlink(): + fs_o = Symlink( + dir_o.path, + item.name, + fs_o_stat.st_mode, + fs_o_stat.st_uid, + fs_o_stat.st_gid, + fs_o_stat.st_mtime, + str(item.readlink()), + ) + elif item.is_dir(): + fs_o = Directory.from_path(item) + else: + fs_o = RegularFile( + dir_o.path, + item.name, + fs_o_stat.st_mode, + fs_o_stat.st_uid, + fs_o_stat.st_gid, + fs_o_stat.st_mtime, + fs_o_stat.st_ino, + fs_o_stat.st_nlink, + fs_o_stat.st_size, + ) + dir_o.content[item.name] = fs_o + return dir_o + + +class FileTree(_Directory): + + def dump(self): + print(f"\nFile tree {self.path}:") + super().dump() + + +def _create_dir_content(path: Path, layout: dict): + for s_item, attrs in layout.items(): + fs_o_type = attrs.get("type", "file") + if fs_o_type == "hardlink": + continue + item = path / s_item + match fs_o_type: + case "file": + create_file(item) + case "symlink": + item.symlink_to(attrs["target"]) + case "dir": + item.mkdir() + _create_dir_content(item, attrs["layout"]) + + +def _create_hardlinks(path: Path, layout: dict): + for s_item, attrs in layout.items(): + fs_o_type = attrs.get("type", "file") + if fs_o_type not in ["hardlink", "dir"]: + continue + item = path / s_item + match fs_o_type: + case "hardlink": + target = path / attrs["target"] + item.hardlink_to(target) + case "dir": + _create_hardlinks(item, attrs["layout"]) + + +def create_filetree_from_yaml(path: str, layout: str): + layout = yaml.safe_load(layout) + # 1st pass without hardlinks + _create_dir_content(path, layout) + # create hardlinks recursively on 2nd pass + _create_hardlinks(path, layout) + + +BASIC_FILES_LAYOUT = """ +file1: {} +file2: {} +file3: {} +file4: {} +symlink2: + type: symlink + target: file2 +hardlink3: + type: hardlink + target: file3 +hardlink4.0: + type: hardlink + target: file4 +hardlink4.1: + type: hardlink + target: file4 +dir1: + type: dir + layout: + file4: {} + symlink1: + type: symlink + target: ../file1 +""" + + +def create_basic_layout(dst): + create_filetree_from_yaml( + path=dst, + layout=BASIC_FILES_LAYOUT, + ) + + +class TestFileTreeCmp(unittest.TestCase): + + def setUp(self): + self._tmp_src = tempfile.TemporaryDirectory() + self.src = Path(self._tmp_src.name) + self._tmp_dst = tempfile.TemporaryDirectory() + self.dst = Path(self._tmp_dst.name) + self.archive = self.dst / "archive.tar" # used for dtar tests + + def tearDown(self): + self._tmp_src.cleanup() + self._tmp_dst.cleanup() + + def assertSameFileTree( + self, + dir1: Directory, + dir2: Directory, + root_dir1: Path, + root_dir2: Path, + ignore_paths: None | list[str] = None, + ): + if ignore_paths is None: + ignore_paths = [] + try: + # discard ignored items in this directory + if ignore_paths: + dir1.content = { + key: value + for key, value in dir1.content.items() + if str(value.path.relative_to(root_dir1)) + not in ignore_paths + } + dir2.content = { + key: value + for key, value in dir2.content.items() + if str(value.path.relative_to(root_dir2)) + not in ignore_paths + } + self.assertCountEqual( + dir1.content.keys(), + dir2.content.keys(), + f"Directories {dir1} and {dir2} do not have the same content", + ) + for key in dir1.content.keys(): + fso_1 = dir1.content[key] + fso_2 = dir2.content[key] + self.assertEqual( + type(fso_1), + type(fso_2), + f"Paths {fso_1} and {fso_2} do not have the same type", + ) + if isinstance(fso_1, Directory): + self.assertSameFileTree( + fso_1, fso_2, root_dir1, root_dir2, ignore_paths + ) + else: + self.assertEqual( + fso_1, + fso_2, + f"Paths {fso_1} and {fso_2} are not equal", + ) + except AssertionError as err: + dir1.dump() + dir2.dump() + raise AssertionError(err) + + def assertSrcDstEqual( + self, + ignore_paths=None, + ignore_nlink=False, + ignore_mtime=False, + dest: t.Optional[Path] = None, + ): + if dest is None: + dest = self.dst + global RegularFile + global Symlink + global Directory + if ignore_nlink or ignore_mtime: + # Create new dataclasses in which nlink or mtime are ignored in + # __eq__ operator. + RegularFile = dataclasses.make_dataclass( + "RegularFile", + [ + ("nlink", int, dataclasses.field(compare=not ignore_nlink)), + ("mtime", int, dataclasses.field(compare=not ignore_mtime)), + ], + bases=(_RegularFile,), + ) + Symlink = dataclasses.make_dataclass( + "Symlink", + [ + ("mtime", int, dataclasses.field(compare=not ignore_mtime)), + ], + bases=(_Symlink,), + ) + Directory = dataclasses.make_dataclass( + "Directory", + [ + ("mtime", int, dataclasses.field(compare=not ignore_mtime)), + ], + bases=(_Directory,), + ) + + else: + RegularFile = _RegularFile + Symlink = _Symlink + Directory = _Directory + + ft_src = FileTree.from_path(self.src) + ft_dst = FileTree.from_path(dest) + self.assertSameFileTree(ft_src, ft_dst, self.src, dest, ignore_paths) + + def assertInProcStdout(self, proc, msg): + # Remove timestamp prefix from dsync output + def untimestamp_line(line): + if line.startswith("["): + return line.split(" ", maxsplit=1)[1] + return line + + unprefixed_output = "\n".join( + [ + untimestamp_line(line) + for line in proc.stdout.decode().strip().split("\n") + ] + ) + msg = msg.strip() + if msg not in unprefixed_output: + raise AssertionError( + "Unable to find message in output.\n" + f" - Message:\n{msg}\n" + f" - Output:\n{unprefixed_output}" + ) + + def run_cmd( + self, + cmd: t.List[str | Path | int], + cwd: t.Optional[Path] = None, + env: t.Optional[t.Dict[str, str]] = None, + ) -> subprocess.CompletedProcess: + _env = os.environ.copy() + if env: + _env.update(env) + + def cmd_result(proc) -> str: + return ( + f" - exit code: {proc.returncode}\n" + f" - stdout:\n{proc.stdout.decode()}\n" + f" - stderr:\n{proc.stderr.decode()}\n" + ) + + cmd_s = shlex.join([str(arg) for arg in cmd]) + print(f"\n→ Running command: {cmd_s}") + try: + proc = subprocess.run( + cmd, check=True, capture_output=True, cwd=cwd, env=_env + ) + print(cmd_result(proc)) + except subprocess.CalledProcessError as err: + raise AssertionError( + "Command error:\n" f" - command: {cmd_s}\n" + cmd_result(err) + ) from err + else: + return proc + + def run_dfilemaker(self, dest: t.Optional[Path] = None): + if not dest: + dest = self.src + cmd = dfilemaker_cmd() + [dest] + return self.run_cmd(cmd) diff --git a/test/tests/test_dcmp.py b/test/tests/test_dcmp.py new file mode 100644 index 00000000..9946958d --- /dev/null +++ b/test/tests/test_dcmp.py @@ -0,0 +1,208 @@ +#!/usr/bin/python3 + +import textwrap +from pathlib import Path +import typing as t + +from .lib import ( + TestFileTreeCmp, + dcmp_cmd, + dsync_cmd, + create_basic_layout, + create_file, +) + + +class TestDcp(TestFileTreeCmp): + + def run_dcmp( + self, + dest: t.Optional[Path] = None, + ): + if not dest: + dest = self.dst + cmd = dcmp_cmd() + [self.src, dest] + return self.run_cmd(cmd) + + def run_dsync(self): + cmd = dsync_cmd() + [self.src, self.dst] + return self.run_cmd(cmd) + + +class TestDcpBasic(TestDcp): + + def setUp(self): + super().setUp() + create_basic_layout(self.src) + + def test_dcmp(self): + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items : 12 + """ + ), + ) + + def test_dcmp_after_dsync(self): + self.run_dsync() + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 12 (Src: 12 Dest: 12) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 12 (Src: 12 Dest: 12) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 12 (Src: 12 Dest: 12) + Number of items that exist in both directories and have different contents: 0 (Src: 0 Dest: 0) + """ + ), + ) + + def test_dcmp_additional_file(self): + self.run_dsync() + create_file(self.dst / "newfile") + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 12 (Src: 12 Dest: 12) + Number of items that exist only in one directory: N/A (Src: 0 Dest: 1) + Number of items that exist in both directories and have the same type: 12 (Src: 12 Dest: 12) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 12 (Src: 12 Dest: 12) + Number of items that exist in both directories and have different contents: 0 (Src: 0 Dest: 0) + """ + ), + ) + + def test_dcmp_missing_file(self): + self.run_dsync() + (self.dst / "file1").unlink() + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 11 (Src: 11 Dest: 11) + Number of items that exist only in one directory: N/A (Src: 1 Dest: 0) + Number of items that exist in both directories and have the same type: 11 (Src: 11 Dest: 11) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 11 (Src: 11 Dest: 11) + Number of items that exist in both directories and have different contents: 0 (Src: 0 Dest: 0) + """ + ), + ) + + def test_dcmp_different_type(self): + self.run_dsync() + # change file1 in destination from regular file to directory + (self.dst / "file1").unlink() + (self.dst / "file1").mkdir() + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 12 (Src: 12 Dest: 12) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 11 (Src: 11 Dest: 11) + Number of items that exist in both directories and have different types: 1 (Src: 1 Dest: 1) + Number of items that exist in both directories and have the same content: 11 (Src: 11 Dest: 11) + Number of items that exist in both directories and have different contents: 1 (Src: 1 Dest: 1) + """ + ), + ) + + def test_dcmp_different_symlink_target(self): + self.run_dsync() + # change target of symlink2 from file2 to file1 in destination + (self.dst / "symlink2").unlink() + (self.dst / "symlink2").symlink_to("file1") + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 12 (Src: 12 Dest: 12) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 12 (Src: 12 Dest: 12) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 11 (Src: 11 Dest: 11) + Number of items that exist in both directories and have different contents: 1 (Src: 1 Dest: 1) + """ + ), + ) + + def test_dcmp_different_hardlink_target(self): + self.run_dsync() + # change target of hardlink3 from file3 to file1 in destination + (self.dst / "hardlink3").unlink() + (self.dst / "hardlink3").hardlink_to(self.dst / "file1") + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 12 (Src: 12 Dest: 12) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 12 (Src: 12 Dest: 12) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 11 (Src: 11 Dest: 11) + Number of items that exist in both directories and have different contents: 1 (Src: 1 Dest: 1) + """ + ), + ) + + def test_dcmp_missing_hardlink_ref(self): + self.run_dsync() + # Remove file4 (reference of hardlink4.1 and hardlink4.2) in source. + # + # In practice, a single file is missing in destination. However, due to + # limitations in comparison algorithm, dcmp considers: + # - file4 is missing in dest (ok) + # - hardlink4.1 to be a reference file in source and a "secondary" + # hardlink in destination → type difference + # - hardlink4.2 is a hardlink to file4 in source and a hardlink to + # hardlink4.1 in destination → content difference + (self.src / "file4").unlink() + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 11 (Src: 11 Dest: 11) + Number of items that exist only in one directory: N/A (Src: 0 Dest: 1) + Number of items that exist in both directories and have the same type: 10 (Src: 10 Dest: 10) + Number of items that exist in both directories and have different types: 1 (Src: 1 Dest: 1) + Number of items that exist in both directories and have the same content: 9 (Src: 9 Dest: 9) + Number of items that exist in both directories and have different contents: 2 (Src: 2 Dest: 2) + """ + ), + ) + + + def test_dcmp_different_content(self): + self.run_dsync() + # change content of file1 in destination + with open(self.dst / "file1", "w") as fh: + fh.write("whatever") + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 12 (Src: 12 Dest: 12) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 12 (Src: 12 Dest: 12) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 11 (Src: 11 Dest: 11) + Number of items that exist in both directories and have different contents: 1 (Src: 1 Dest: 1) + """ + ), + ) diff --git a/test/tests/test_dcp.py b/test/tests/test_dcp.py new file mode 100644 index 00000000..b38c332d --- /dev/null +++ b/test/tests/test_dcp.py @@ -0,0 +1,208 @@ +#!/usr/bin/python3 + +import os +import tempfile +import typing as t +from pathlib import Path +import textwrap + +from .lib import ( + TestFileTreeCmp, + dcp_cmd, + dwalk_cmd, + dfind_cmd, + create_basic_layout, +) + + +class TestDcp(TestFileTreeCmp): + + def run_dcp( + self, + dereference: bool = False, + preserve: bool = False, + chunk: t.Optional[str] = None, + buffer: t.Optional[str] = None, + input: t.Optional[str] = None, + dest: t.Optional[Path] = None, + ): + if not dest: + dest = self.dst + # Remove destination directory as it is created by dcp. + dest.rmdir() + cmd = dcp_cmd() + [self.src, dest] + if dereference: + cmd.insert(len(cmd) - 2, "--dereference") + if preserve: + cmd.insert(len(cmd) - 2, "--preserve") + if chunk: + cmd[-2:0] = ["--chunksize", chunk] + if buffer: + cmd[-2:0] = ["--bufsize", buffer] + if input: + cmd[-2:0] = ["--input", input] + return self.run_cmd(cmd) + + def run_dwalk( + self, + output: t.Optional[Path] = None, + lite: bool = False, + ): + cmd = dwalk_cmd() + [self.src] + if output: + cmd[-1:0] = ["--output", output] + if lite: + cmd.insert(len(cmd) - 1, "--lite") + return self.run_cmd(cmd) + + def run_dfind( + self, + output: t.Optional[Path] = None, + ): + cmd = dfind_cmd() + [self.src] + if output: + cmd[-1:0] = ["--output", output] + return self.run_cmd(cmd) + + +class TestDcpBasic(TestDcp): + + def setUp(self): + super().setUp() + create_basic_layout(self.src) + + def test_dcp(self): + proc = self.run_dcp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + # With basic dcp, files should have the same type and metadata except + # mtime that is not copied. + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_dereference(self): + # Add content of file2, the target of symlink2 + with open(self.src / "file2", "w") as fh: + fh.write("original") + self.run_dcp(dereference=True) + # Check source and destination have the same content, except for + # symlinks and mtime. + self.assertSrcDstEqual( + ignore_paths=["symlink2", "dir1/symlink1"], ignore_mtime=True + ) + # Check symlink2 is not a symlink in destination + self.assertTrue((self.src / "symlink2").is_symlink()) + self.assertFalse((self.dst / "symlink2").is_symlink()) + # Check symlink2 has its own inode, distinct from file2 + self.assertNotEqual( + (self.dst / "symlink2").stat().st_ino, + (self.dst / "file2").stat().st_ino, + ) + # Check both files have the same original content + for filename in ["file2", "symlink2"]: + with open(self.dst / filename) as fh: + self.assertEqual(fh.read(), "original") + + def test_dcp_preserve(self): + self.run_dcp(preserve=True) + # With dcp --preserve, files must have the same metadata in source and + # destination. + self.assertSrcDstEqual() + + def test_dcp_preserve_chmod(self): + # Change some file modes in source + (self.src / "file1").chmod(0o400) + (self.src / "hardlink3").chmod(0o400) + (self.src / "dir1").chmod(0o700) + self.run_dcp(preserve=True) + # With dcp --preserve, files must have the same metadata in source and + # destination. + self.assertSrcDstEqual() + + def test_dcp_chunksize(self): + # Add 16MB of data in a file to have multiple chunks + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(16 * 10**6)) + self.run_dcp(chunk="1MB") + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_bufsize(self): + # Add 16MB of data in a file to fill multiple buffer + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(16 * 10**6)) + self.run_dcp(buffer="1MB") + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_dwalk_input(self): + with tempfile.NamedTemporaryFile() as fh: + self.run_dwalk(output=fh.name) + proc = self.run_dcp(input=fh.name) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_dwalk_input_lite(self): + with tempfile.NamedTemporaryFile() as fh: + self.run_dwalk(output=fh.name, lite=True) + proc = self.run_dcp(input=fh.name) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_dfind_input(self): + with tempfile.NamedTemporaryFile() as fh: + self.run_dfind(output=fh.name) + proc = self.run_dcp(input=fh.name) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + self.assertSrcDstEqual(ignore_mtime=True) + + +class TestDcpDfilemaker(TestDcp): + + def setUp(self): + super().setUp() + self.run_dfilemaker() + + def test_dcp(self): + self.run_dcp() + self.assertSrcDstEqual(ignore_mtime=True) diff --git a/test/tests/test_dsync.py b/test/tests/test_dsync.py new file mode 100644 index 00000000..7858c424 --- /dev/null +++ b/test/tests/test_dsync.py @@ -0,0 +1,640 @@ +#!/usr/bin/python3 + +import tempfile +import textwrap +from pathlib import Path +import os +import typing as t + +from .lib import TestFileTreeCmp, dsync_cmd, create_basic_layout, create_file + + +class TestDsync(TestFileTreeCmp): + + def run_dsync( + self, + delete: bool = False, + contents: bool = False, + dereference: bool = False, + dry_run: bool = False, + link_dest: t.Optional[Path] = None, + chunk: t.Optional[str] = None, + buffer: t.Optional[str] = None, + batch: t.Optional[int] = None, + dest: t.Optional[Path] = None, + ): + if not dest: + dest = self.dst + cmd = dsync_cmd() + [self.src, dest] + if delete: + cmd.insert(len(cmd) - 2, "--delete") + if contents: + cmd.insert(len(cmd) - 2, "--contents") + if dereference: + cmd.insert(len(cmd) - 2, "--dereference") + if dry_run: + cmd.insert(len(cmd) - 2, "--dryrun") + if link_dest: + cmd[-2:0] = ["--link-dest", link_dest] + if chunk: + cmd[-2:0] = ["--chunksize", chunk] + if buffer: + cmd[-2:0] = ["--bufsize", buffer] + if batch: + cmd[-2:0] = ["--batch-files", str(batch)] + return self.run_cmd(cmd) + + +class TestDsyncBasic(TestDsync): + + def setUp(self): + super().setUp() + create_basic_layout(self.src) + + def test_dsync_empty_dest(self): + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 11 + Directories: 1 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_overwrite_dest(self): + # Modify a file dest with different content and check it is overwritten. + with open(self.src / "file2", "w+") as fh: + fh.write("original") + self.run_dsync() + with open(self.dst / "file2", "w+") as fh: + fh.write("modified") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 0 + """ + ), + ) + self.assertSrcDstEqual() + with open(self.dst / "file2", "r") as fh: + self.assertEqual(fh.read(), "original") + + def test_dsync_remove_source(self): + # Synchronize, remove one file in source, re-synchronize and check file + # is still present in dest without dsync delete option. + self.run_dsync() + (self.src / "file2").unlink() + proc = self.run_dsync() + self.assertInProcStdout( + proc, + "Comparing file sizes and modification times of 4 items", + ) + self.assertSrcDstEqual(ignore_paths=["file2"]) + self.assertFalse((self.src / "file2").exists()) + self.assertTrue((self.dst / "file2").exists()) + + def test_dsync_remove_source_delete(self): + # Synchronize, remove one file in source, re-synchronize and check file + # is also removed in dest with dsync delete option. + self.run_dsync() + (self.src / "file1").unlink() + proc = self.run_dsync(delete=True) + self.assertInProcStdout(proc, "Removing 1 items") + self.assertSrcDstEqual() + + def test_dsync_file_in_dst(self): + # Create non-conflicting file in dst, synchronize and check the file + # still exists after sync. + additional_file = self.dst / "other-file" + create_file(additional_file) + proc = self.run_dsync() + # Check dsync reported 2 items in destination + self.assertInProcStdout(proc, "Walked 2 items in ") + # Check dsync reported to copy everything from source + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 11 + Directories: 1 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + self.assertSrcDstEqual(ignore_paths=[additional_file.name]) + self.assertTrue(additional_file.exists()) + + def test_dsync_file_in_dst_delete(self): + # Create non-conflicting file in dst, synchronize with delete option and + # check the file is removed after sync. + additional_file = self.dst / "other-file" + create_file(additional_file) + proc = self.run_dsync(delete=True) + # Check dsync reported to remove an item + self.assertInProcStdout( + proc, + "Removed 1 items in ", + ) + self.assertSrcDstEqual() + self.assertFalse(additional_file.exists()) + + def test_dsync_symlink_dereference(self): + # Add content of file2, the target of symlink2 + with open(self.src / "file2", "w") as fh: + fh.write("original") + self.run_dsync(dereference=True) + # Check source and destination have the same content, except for + # symlinks. + self.assertSrcDstEqual(ignore_paths=["symlink2", "dir1/symlink1"]) + # Check symlink2 is not a symlink in destination + self.assertTrue((self.src / "symlink2").is_symlink()) + self.assertFalse((self.dst / "symlink2").is_symlink()) + # Check symlink2 has its own inode, distinct from file2 + self.assertNotEqual( + (self.dst / "symlink2").stat().st_ino, + (self.dst / "file2").stat().st_ino, + ) + # Check both files have the same original content + for filename in ["file2", "symlink2"]: + with open(self.dst / filename) as fh: + self.assertEqual(fh.read(), "original") + + def test_dsync_symlink_target_change(self): + # Synchronize, change symlink2 target in source, re-synchronize and + # check. + self.run_dsync() + (self.src / "symlink2").unlink() + (self.src / "symlink2").symlink_to("file1") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 0 + Links: 1 + Hardlinks: 0 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_symlink_dereference_target_nlinks(self): + # change symlink2 target in source for file3 which has nlink > 1 and + # synchronize with dereference. + (self.src / "symlink2").unlink() + (self.src / "symlink2").symlink_to("file3") + proc = self.run_dsync(dereference=True) + + # FIXME: when symlinks target file with multiple links (nlink > 1), + # dsync creates in destination an additional link to this inode instead + # of a regular copy for this symlink. + + # Check source and destination have the same content, except for + # symlinks2, hardlink3 and file3 which have 3 nlinks in destination. + self.assertSrcDstEqual( + ignore_paths=["file3", "hardlink3", "symlink2", "dir1/symlink1"] + ) + self.assertEqual((self.dst / "file3").stat().st_nlink, 3) + self.assertEqual((self.dst / "hardlink3").stat().st_nlink, 3) + self.assertEqual((self.dst / "symlink2").stat().st_nlink, 3) + + # Check inode of symlink2 and file3 are the same. + self.assertEqual( + (self.dst / "symlink2").stat().st_ino, + (self.dst / "file3").stat().st_ino, + ) + + # Check dsync reported creation of 4 hardlinks and 0 symlink. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 11 + Directories: 1 + Files: 6 + Links: 0 + Hardlinks: 4 + """ + ), + ) + + def test_dsync_transform_hardlink(self): + # Synchronize, transform hardlink in standalone inode, re-synchronize + # and check. + self.run_dsync() + (self.src / "hardlink3").unlink() + create_file(self.src / "hardlink3") + proc = self.run_dsync() + # Check dsync reported one modified file. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 0 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_sync_same_size_mtime(self): + # Fill source file2 with 64 random bytes and sync in empty dst. + random1 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random1) + previous_atime = (self.src / "file2").stat().st_atime_ns + previous_mtime = (self.src / "file2").stat().st_mtime_ns + self.run_dsync() + + # Update source file2 with other 64 random bytes, restore mtime and + # resync. + random2 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random2) + os.utime(self.src / "file2", ns=(previous_atime, previous_mtime)) + proc = self.run_dsync() + self.assertInProcStdout( + proc, "Comparing file sizes and modification times of 5 items" + ) + # Check src/dst metadata are equal but file2 still contains first 64 + # random bytes. + self.assertSrcDstEqual() + with open(self.dst / "file2", "rb") as fh: + self.assertEqual(fh.read(), random1) + + def test_dsync_sync_same_diff_mtime(self): + # Fill source file2 with 64 random bytes and sync in empty dst. + random1 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random1) + self.run_dsync() + + # Update source file2 with other 64 random bytes and resync. + random2 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random2) + proc = self.run_dsync() + # Check dsync reported one modified file. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 0 + """ + ), + ) + + # Check src/dst metadata are equal but file2 is updated with new content + # because of mtime difference. + self.assertSrcDstEqual() + with open(self.dst / "file2", "rb") as fh: + self.assertEqual(fh.read(), random2) + + def test_dsync_sync_same_size_mtime_contents(self): + # Fill source file2 with 64 random bytes and sync in empty dst. + random1 = os.urandom(64) + random2 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random1) + previous_atime = (self.src / "file2").stat().st_atime_ns + previous_mtime = (self.src / "file2").stat().st_mtime_ns + self.run_dsync() + + # Update source file2 with other 64 random bytes, restore mtime and + # resync with --contents. + with open(self.src / "file2", "wb") as fh: + fh.write(random2) + os.utime(self.src / "file2", ns=(previous_atime, previous_mtime)) + proc = self.run_dsync(contents=True) + self.assertInProcStdout( + proc, + "Comparing file contents of 5 items", + ) + # FIXME: mtime on file2 do not match on src/dst even though it has been + # updated with second dsync. + self.assertSrcDstEqual(ignore_paths=["file2"]) + with open(self.dst / "file2", "rb") as fh: + self.assertEqual(fh.read(), random2) + + def test_dsync_hardlink_dest_ref_changed(self): + # Create a conflicting file in dest with different content and check it + # is overwritten. + with open(self.src / "file3", "w+") as fh: + fh.write("original") + self.run_dsync() + with open(self.dst / "file3", "w+") as fh: + fh.write("modified") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 2 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + for filename in ["file3", "hardlink3"]: + with open(self.dst / filename, "r") as fh: + self.assertEqual(fh.read(), "original") + + def test_dsync_hardlink_dest_ref_changed_w_new_dest(self): + # Create a conflicting file in dest with different content and check it + # is overwritten. Also add new hardlink in dest and check it is not + # touched. + with open(self.src / "file3", "w+") as fh: + fh.write("original") + self.run_dsync() + with open(self.dst / "file3", "w+") as fh: + fh.write("modified") + (self.dst / "hardlink3.1").hardlink_to(self.dst / "file3") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 2 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual(ignore_paths=["hardlink3.1"]) + for filename in ["file3", "hardlink3"]: + with open(self.dst / filename, "r") as fh: + self.assertEqual(fh.read(), "original") + with open(self.dst / "hardlink3.1", "r") as fh: + self.assertEqual(fh.read(), "modified") + + def test_dsync_hardlink_dest_ref_changed_w_new_dest_delete(self): + # Create a conflicting file in dest with different content and check it + # is overwritten. Also add new hardlink in dest and check it is removed. + with open(self.src / "file3", "w+") as fh: + fh.write("original") + self.run_dsync() + with open(self.dst / "file3", "w+") as fh: + fh.write("modified") + (self.dst / "hardlink3.1").hardlink_to(self.dst / "file3") + proc = self.run_dsync(delete=True) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 2 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + for filename in ["file3", "hardlink3"]: + with open(self.dst / filename, "r") as fh: + self.assertEqual(fh.read(), "original") + + def test_dsync_hardlink_dest_multiple_ref_changed(self): + # Create a conflicting file in dest with different content and check it + # is overwritten. + with open(self.src / "file3", "w+") as fh: + fh.write("original3") + with open(self.src / "file4", "w+") as fh: + fh.write("original4") + self.run_dsync() + with open(self.dst / "file3", "w+") as fh: + fh.write("modified3") + with open(self.dst / "file4", "w+") as fh: + fh.write("modified4") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 5 + Directories: 0 + Files: 2 + Links: 0 + Hardlinks: 3 + """ + ), + ) + self.assertSrcDstEqual() + for filename in ["file3", "hardlink3"]: + with open(self.dst / filename, "r") as fh: + self.assertEqual(fh.read(), "original3") + for filename in ["file4", "hardlink4.0", "hardlink4.1"]: + with open(self.dst / filename, "r") as fh: + self.assertEqual(fh.read(), "original4") + + def test_dsync_hardlink_outside_tree(self): + # Create temporary file outside src and dst tree, create hardlink in src + # to this temporary file, sync and check. The destination should contain + # a copy of the file (ie. with st_nlink 1). + with tempfile.NamedTemporaryFile() as outside_file: + (self.src / "hardlink3").unlink() + (self.src / "hardlink3").hardlink_to(outside_file.name) + proc = self.run_dsync() + # Check dsync reported the hardlink1 as a regular file. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 11 + Directories: 1 + Files: 6 + Links: 2 + Hardlinks: 2 + """ + ), + ) + self.assertSrcDstEqual(ignore_paths=["hardlink3"]) + self.assertEqual((self.src / "hardlink3").stat().st_nlink, 2) + self.assertEqual((self.dst / "hardlink3").stat().st_nlink, 1) + # When the temporary file outside src and dst tree is removed, src and + # dst must be equal. + self.assertSrcDstEqual() + + def test_dsync_change_hardlink_dest(self): + # Synchronize, change hardlink destination, re-synchronize + # and check. + self.run_dsync() + (self.src / "hardlink3").unlink() + (self.src / "hardlink3").hardlink_to(self.src / "file2") + proc = self.run_dsync() + # Check dsync reported hardlink to be updated. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 0 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_hardlink_src_ref_replaced(self): + # Synchronize, replace reference file in source, re-synchronize and + # check. + self.run_dsync() + (self.src / "file3").unlink() + create_file(self.src / "file3") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 2 + Directories: 0 + Files: 2 + Links: 0 + Hardlinks: 0 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_add_hardlink_same_inode(self): + # Synchronize, add hardlink on inode which has already multiple links, + # re-synchronize and check. + self.run_dsync() + (self.src / "hardlink2").hardlink_to(self.src / "file3") + proc = self.run_dsync() + # Check dsync reported hardlink to be updated. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 0 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_add_hardlink_another_inode(self): + # Synchronize, add hardlink on inode with one link, re-synchronize and + # check. + self.run_dsync() + (self.src / "hardlink2").hardlink_to(self.src / "file2") + proc = self.run_dsync() + # Check dsync reported hardlink to be updated. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 0 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_dry_run_empty(self): + # Check destination stays empty after dsync --dry-run. + self.run_dsync(dry_run=True) + self.assertCountEqual(list(self.dst.iterdir()), []) + + def test_dsync_dry_run_no_update(self): + # Check destination is not updated after second dsync with --dry-run + self.run_dsync() + (self.src / "file2").unlink() + create_file(self.src / "newfile") + self.run_dsync(dry_run=True) + self.assertTrue((self.dst / "file2").exists()) + self.assertFalse((self.dst / "newfile").exists()) + self.assertSrcDstEqual(ignore_paths=["file2", "newfile"]) + + def test_dsync_link_dest(self): + with tempfile.TemporaryDirectory() as _link_dest: + link_dest = Path(_link_dest) + # First sync in link destination + self.run_dsync(dest=link_dest) + # Add file in source + create_file(self.src / "newfile") + # Run dsync in destination with link destination + self.run_dsync(link_dest=link_dest) + # Check source and destination are the same, except for nlinks + self.assertSrcDstEqual(ignore_nlink=True) + # Check new file is present in destination but not in link + # destination. + self.assertTrue((self.dst / "newfile").exists()) + self.assertFalse((link_dest / "newfile").exists()) + # Check file which did not change between two synchronizations share + # the same inode in destination and link destination. + self.assertEqual( + (self.dst / "file1").stat().st_ino, + (link_dest / "file1").stat().st_ino, + ) + + def test_dsync_chunksize(self): + # Add 16MB of data in a file to have multiple chunks + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(16 * 10**6)) + self.run_dsync(chunk="1MB") + self.assertSrcDstEqual() + + def test_dsync_bufsize(self): + # Add 16MB of data in a file to fill multiple buffer + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(16 * 10**6)) + self.run_dsync(buffer="1MB") + self.assertSrcDstEqual() + + def test_dsync_batch(self): + self.run_dsync(batch=2) + self.assertSrcDstEqual() + + +class TestDsyncDfilemaker(TestDsync): + + def setUp(self): + super().setUp() + self.run_dfilemaker() + + def test_dsync_twice(self): + self.run_dsync() + self.assertSrcDstEqual() + proc = self.run_dsync() + self.assertInProcStdout( + proc, "Comparing file sizes and modification times of 1000 items" + ) + self.assertSrcDstEqual() diff --git a/test/tests/test_dtar.py b/test/tests/test_dtar.py new file mode 100644 index 00000000..18d77d7e --- /dev/null +++ b/test/tests/test_dtar.py @@ -0,0 +1,320 @@ +#!/usr/bin/python3 + +import tarfile +from pathlib import Path +import os +import textwrap +import typing as t + +import yaml +import xattr + +from .lib import ( + TestFileTreeCmp, + dtar_cmd, + create_basic_layout, + BASIC_FILES_LAYOUT, +) + + +class TestDtar(TestFileTreeCmp): + + def run_dtar( + self, + extract: bool = False, + preserve_xattrs: bool = False, + preserve_acls: bool = False, + preserve_flags: bool = False, + env: t.Optional[t.Dict[str, str]] = None, + ): + if extract: + cmd = dtar_cmd() + ["-xf", self.archive] + if preserve_xattrs: + cmd.insert(len(cmd) - 2, "--preserve-xattrs") + if preserve_acls: + cmd.insert(len(cmd) - 2, "--preserve-acls") + if preserve_flags: + cmd.insert(len(cmd) - 2, "--preserve-flags") + cwd = self.dst + else: + cmd = dtar_cmd() + [ + "-cf", + self.archive, + self.src.name, + ] + if preserve_xattrs: + cmd.insert(len(cmd) - 3, "--preserve-xattrs") + if preserve_acls: + cmd.insert(len(cmd) - 3, "--preserve-acls") + if preserve_flags: + cmd.insert(len(cmd) - 3, "--preserve-flags") + cwd = self.src.parent + + return self.run_cmd(cmd, cwd=cwd, env=env) + + def assertArchiveBasicTree(self): + """Compare tarfile with YAML layout""" + tar = tarfile.open(self.archive) + tree = yaml.safe_load(BASIC_FILES_LAYOUT) + self.assertSameDirArchive(tree, tar, Path(self.src.name)) + tar.close() + + def assertSameDirArchive(self, tree, tar, subdir: Path): + for key, value in tree.items(): + try: + member = tar.getmember(f"{subdir}/{key}") + except KeyError: + raise AssertionError( + f"Unable to find {subdir}/{key} in archive" + ) + else: + print(f"Comparing {subdir}/{key} in archive with source layout") + match value.get("type", "file"): + case "file": + self.assertTrue(member.isreg()) + case "symlink": + self.assertTrue(member.issym()) + case "hardlink": + self.assertTrue(member.islnk()) + case "dir": + self.assertTrue(member.isdir()) + self.assertSameDirArchive( + tree[key]["layout"], tar, subdir / key + ) + + +class TestDtarBasic(TestDtar): + + def setUp(self): + super().setUp() + create_basic_layout(self.src) + + def add_data_in_files(self): + size = 16 * 10**6 + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(size)) + with open(self.src / "file2", "wb") as fh: + fh.write(os.urandom(size)) + with open(self.src / "file3", "wb") as fh: + fh.write(os.urandom(size)) + with open(self.src / "dir1" / "file4", "wb") as fh: + fh.write(os.urandom(size)) + + def test_dtar_create(self): + self.add_data_in_files() + proc = self.run_dtar() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + self.assertArchiveBasicTree() + + def test_dtar_create_algo_chunk(self): + self.add_data_in_files() + proc = self.run_dtar(env={"MFU_FLIST_ARCHIVE_CREATE": "CHUNK"}) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + self.assertArchiveBasicTree() + + def test_dtar_create_algo_libcircle(self): + self.add_data_in_files() + proc = self.run_dtar(env={"MFU_FLIST_ARCHIVE_CREATE": "LIBCIRCLE"}) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + self.assertArchiveBasicTree() + + def test_dtar_extract(self): + self.run_dtar() + proc = self.run_dtar(extract=True) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Creating 2 directories + Creating 5 files + Creating 2 symlinks + Creating 3 hardlinks + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_extract_algo_libarchive(self): + self.run_dtar() + proc = self.run_dtar( + extract=True, env={"MFU_FLIST_ARCHIVE_EXTRACT": "LIBARCHIVE"} + ) + # With this algorithm, index file *.tar.dtaridx is counted. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 13 + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_extract_algo_libarchive_idx(self): + self.run_dtar() + proc = self.run_dtar( + extract=True, env={"MFU_FLIST_ARCHIVE_EXTRACT": "LIBARCHIVE_IDX"} + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_extract_algo_chunk(self): + self.run_dtar() + proc = self.run_dtar( + extract=True, env={"MFU_FLIST_ARCHIVE_EXTRACT": "CHUNK"} + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + """ + ), + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Creating 2 directories + Creating 5 files + Creating 2 symlinks + Creating 3 hardlinks + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_extract_algo_libcircle(self): + self.run_dtar() + proc = self.run_dtar( + extract=True, env={"MFU_FLIST_ARCHIVE_EXTRACT": "LIBCIRCLE"} + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + """ + ), + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Creating 2 directories + Creating 5 files + Creating 2 symlinks + Creating 3 hardlinks + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_preserve_xattrs(self): + self.add_data_in_files() + # add xattr + xattr.setxattr(self.src / "file1", "user.xdg.comment", "test".encode()) + original_xattrs = xattr.listxattr(self.src / "file1") + original_value = xattr.getxattr(self.src / "file1", "user.xdg.comment") + self.run_dtar(preserve_xattrs=True) + # check archive can be read by python standard library + self.assertArchiveBasicTree() + self.run_dtar(extract=True, preserve_xattrs=True) + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + new_xattrs = xattr.listxattr(self.dst / self.src.name / "file1") + new_value = xattr.getxattr( + self.dst / self.src.name / "file1", "user.xdg.comment" + ) + self.assertCountEqual(original_xattrs, new_xattrs) + self.assertEqual(original_value, new_value) + + def test_dtar_preserve_acls(self): + self.add_data_in_files() + # add posix ACL + self.run_cmd(["setfacl", "-m", "user:root:r", self.src / "file1"]) + original_xattrs = xattr.listxattr(self.src / "file1") + original_acl = xattr.getxattr( + self.src / "file1", "system.posix_acl_access" + ) + self.run_dtar(preserve_acls=True) + # check archive can be read by python standard library + self.assertArchiveBasicTree() + self.run_dtar(extract=True, preserve_acls=True) + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + new_xattrs = xattr.listxattr(self.dst / self.src.name / "file1") + new_acl = xattr.getxattr( + self.dst / self.src.name / "file1", "system.posix_acl_access" + ) + self.assertCountEqual(original_xattrs, new_xattrs) + self.assertEqual(original_acl, new_acl) + + def test_dtar_preserve_flags(self): + self.add_data_in_files() + # add noatime flag + self.run_cmd(["chattr", "+A", self.src / "file1"]) + self.run_dtar(preserve_flags=True) + # check archive can be read by python standard library + self.assertArchiveBasicTree() + self.run_dtar(extract=True, preserve_flags=True) + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + output = self.run_cmd(["lsattr", self.dst / self.src.name / "file1"]) + self.assertIn("A", output.stdout.decode().split(" ")[0]) + + +class TestDtarDfilemaker(TestDtar): + + def setUp(self): + super().setUp() + self.run_dfilemaker() + + def test_dtar(self): + # Create and extract an archive with tree generated by dfilemaker and + # compare. + self.run_dtar() + self.run_dtar(extract=True) + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) diff --git a/test/tests/test_dwalk.py b/test/tests/test_dwalk.py new file mode 100644 index 00000000..529d013b --- /dev/null +++ b/test/tests/test_dwalk.py @@ -0,0 +1,182 @@ +#!/usr/bin/python3 + +import tempfile +import os +import typing as t +from pathlib import Path +import textwrap + +from .lib import ( + TestFileTreeCmp, + dwalk_cmd, + create_basic_layout, +) + + +class TestDwalk(TestFileTreeCmp): + + def run_dwalk( + self, + output: t.Optional[Path] = None, + input: t.Optional[Path] = None, + lite: bool = False, + text: bool = False, + ): + cmd = dwalk_cmd() + if output: + cmd += ["--output", output] + if input: + cmd += ["--input", input] + if lite: + cmd.append("--lite") + if text: + cmd.append("--text") + if not input: + cmd.append(self.src) + return self.run_cmd(cmd) + + +class TestDwalkBasic(TestDwalk): + + def setUp(self): + super().setUp() + oldmask = os.umask(0o022) + create_basic_layout(self.src) + os.umask(oldmask) + + def test_walk(self): + proc = self.run_dwalk() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + + def test_walk_output(self): + # Create and delete a temporary file but keep its name. + with tempfile.NamedTemporaryFile() as fh: + output = Path(fh.name) + proc = self.run_dwalk(output=output) + # Check dwalk has created file + self.assertTrue(output.exists()) + output.unlink() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + + def test_walk_output_lite(self): + # Create and delete a temporary file but keep its name. + with tempfile.NamedTemporaryFile() as fh: + output = Path(fh.name) + proc = self.run_dwalk(output=output, lite=True) + # dwalk with --lite do not call stat(), it is unable to detect hardlinks. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 8 + Links: 2 + Hardlinks: 0 + """ + ), + ) + # Check dwalk has created file + self.assertTrue(output.exists()) + with open(output) as fh: + content = fh.read() + output.unlink() + for entry in [ + f"{self.src}|D", + f"{self.src}/file4|F", + f"{self.src}/hardlink4.0|F", + f"{self.src}/hardlink4.1|F", + f"{self.src}/file3|F", + f"{self.src}/hardlink3|F", + f"{self.src}/file2|F", + f"{self.src}/file1|F", + f"{self.src}/symlink2|L", + f"{self.src}/dir1|D", + f"{self.src}/dir1/file4|F", + f"{self.src}/dir1/symlink1|L", + ]: + self.assertIn(entry, content) + + def test_walk_output_text(self): + with tempfile.NamedTemporaryFile(mode="w+") as fh: + output = Path(fh.name) + self.run_dwalk(output=output, text=True) + # Check dwalk has created file + self.assertTrue(output.exists()) + with open(output) as fh: + content = fh.read() + + for entry in [ + rf"drwx------ .* {self.src}", + rf"drwxr-xr-x .* {self.src}/dir1", + rf"lrwxrwxrwx .* {self.src}/dir1/symlink1", + rf"-rw-r--r-- .* {self.src}/dir1/file4", + rf"lrwxrwxrwx .* {self.src}/symlink2", + rf"-rw-r--r-- .* {self.src}/file1", + rf"-rw-r--r-- .* {self.src}/file2", + rf"-rw-r--r-- .* {self.src}/file3", + rf"-rw-r--r-- .* {self.src}/file4", + rf"-rw-r--r-- .* {self.src}/hardlink3", + rf"-rw-r--r-- .* {self.src}/hardlink4.0", + rf"-rw-r--r-- .* {self.src}/hardlink4.1", + ]: + self.assertRegex(content, entry) + + def test_walk_input(self): + with tempfile.NamedTemporaryFile(mode="w+") as fh: + cache = Path(fh.name) + self.run_dwalk(output=cache) + proc = self.run_dwalk(input=cache) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 3 + """ + ), + ) + + def test_walk_input_lite(self): + with tempfile.NamedTemporaryFile(mode="w+") as fh: + cache = Path(fh.name) + self.run_dwalk(output=cache, lite=True) + proc = self.run_dwalk(input=cache) + # lite cache do not contain hardlinks, then dwalk misses it. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 12 + Directories: 2 + Files: 8 + Links: 2 + Hardlinks: 0 + """ + ), + )