-
Notifications
You must be signed in to change notification settings - Fork 73
Hardlinks support #633
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
rezib
wants to merge
18
commits into
hpc:main
Choose a base branch
from
rezib:for-upstream/hardlinks-support
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Hardlinks support #633
Changes from 1 commit
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
4ec8071
mfu: index hardlinks during tree walk
rezib b76f4a4
dcp: support hardlinks
rezib 1df4acb
dcmp: support hardlinks
rezib d1d40f9
dsync: support hardlinks
rezib b133a56
dtar: support hardlinks
rezib a6ba7d2
mfu: introduce cache format v5
rezib 50ecbd6
dcp: support hardlinks with --input
rezib d25df11
tests: automatic functional tests
rezib 455f2e5
ci: workflow to run tests in github actions
rezib fc0c6a0
dsync: fix gathered removed hardlinks buffer read
rezib a99621c
dsync: avoid modified hardlinks multiple copies
rezib 411fede
tests: add files and hardlinks in testing tree
rezib 0302f68
tests: add dsync hardlinks corner test cases
rezib 530594f
mfu: set ref to NULL w/o detail
rezib a1bd23d
mfu: avoid free elem_t->ref w/o detail
rezib ca9896f
tests: add dcmp corner case to document limitation
rezib 90ff407
tests: dsync with new hardlink in destination
rezib d9d0cde
dsync: skip dst hardlink deletion when not in src
rezib File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1581,6 +1581,166 @@ static int dsync_strmap_compare_link_dest( | |
| return rc; | ||
| } | ||
|
|
||
| /* For all local references files (ie. regular files with nlink > 1), flag all | ||
| * hardlinks using these files as references as having different content and | ||
| * place them in destination removal list and source copy list. Return -1 on | ||
| * error on any task. */ | ||
| static int dsync_remove_hardlinks_with_removed_refs( | ||
| mfu_flist src_list, | ||
| mfu_flist src_cp_list, | ||
| strmap* src_map, | ||
| mfu_flist dst_list, | ||
| mfu_flist dst_remove_list, | ||
| strmap* dst_map, | ||
| mfu_file_t* mfu_src_file, | ||
| mfu_file_t* mfu_dst_file | ||
| ) { | ||
| /* assume we'll succeed */ | ||
| int rc = 0; | ||
| int tmp_rc; | ||
|
|
||
| uint64_t chars = mfu_flist_file_max_name(dst_remove_list); | ||
|
|
||
| /* bail out if there is nothing removed in destination */ | ||
| if (!chars) { | ||
| return 0; | ||
| } | ||
|
|
||
| int ranks; | ||
| MPI_Comm_size(MPI_COMM_WORLD, &ranks); | ||
|
|
||
| /* Count all local references selected for removal. */ | ||
| int local_removed_refs = 0; | ||
| uint64_t remove_count = mfu_flist_size(dst_remove_list); | ||
| for(uint64_t idx=0; idx<remove_count; idx++) { | ||
| mfu_filetype type = mfu_flist_file_get_type(dst_remove_list, idx); | ||
| uint64_t nlink = mfu_flist_file_get_nlink(dst_remove_list, idx); | ||
| if (type == MFU_TYPE_FILE && nlink > 1) { | ||
| local_removed_refs++; | ||
| } | ||
| } | ||
|
|
||
| /* get number of references removed by all tasks */ | ||
| int* recvcounts = (int*) MFU_MALLOC(ranks * sizeof(int)); | ||
| MPI_Allgather(&local_removed_refs, 1, MPI_INT, | ||
| recvcounts, 1, MPI_INT, MPI_COMM_WORLD); | ||
|
|
||
| MPI_Aint char_lb, char_extent; | ||
| MPI_Type_get_extent(MPI_CHAR, &char_lb, &char_extent); | ||
|
|
||
| /* compute displacements and total number of bytes that we'll receive */ | ||
| size_t allbytes = 0; | ||
| int disp = 0; | ||
| int* recvdispls = (int*) MFU_MALLOC(ranks * sizeof(int)); | ||
|
|
||
| for (int i = 0; i < (int) ranks; i++) { | ||
| /* adjust values in recvcounts for MPI_Allgatherv() */ | ||
| recvcounts[i] *= chars; | ||
| recvdispls[i] = disp; | ||
| disp += (int) recvcounts[i]; | ||
| allbytes += (size_t) recvcounts[i]; | ||
| } | ||
|
|
||
| /* allocate memory for recv buffers */ | ||
| char* recvbuf = MFU_MALLOC(allbytes); | ||
| void* sendbuf = NULL; | ||
|
|
||
| /* fill sendbuf with names of local references that will be removed */ | ||
| if (local_removed_refs) { | ||
| sendbuf = MFU_MALLOC((size_t)char_extent * chars * local_removed_refs); | ||
| char* sendptr = (char*) sendbuf; | ||
| for(int idx=0; idx<remove_count; idx++) { | ||
| const char* name = mfu_flist_file_get_name(dst_remove_list, idx); | ||
| mfu_filetype type = mfu_flist_file_get_type(dst_remove_list, idx); | ||
| uint64_t nlink = mfu_flist_file_get_nlink(dst_remove_list, idx); | ||
| if (type == MFU_TYPE_FILE && nlink > 1) { | ||
| strncpy(sendptr, name, chars); | ||
| sendptr += char_extent * chars; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| MPI_Allgatherv(sendbuf, local_removed_refs * chars, MPI_CHAR, | ||
| recvbuf, recvcounts, recvdispls, MPI_CHAR, MPI_COMM_WORLD); | ||
|
|
||
| /* iterate of all reference names received */ | ||
| uint64_t count = mfu_flist_size(dst_list); | ||
| char* recvptr = (char*) recvbuf; | ||
| for (int i = 0; i < (int) ranks; i++) { | ||
| for (int j = 0; j<recvcounts[i]; j++) { | ||
| const char* removed_ref = recvptr; | ||
| /* Search for local hardlinks with this reference and mark them to | ||
| * be removed. */ | ||
| const strmap_node* node; | ||
| strmap_foreach(dst_map, node) { | ||
| /* get file name */ | ||
| const char* key = strmap_node_key(node); | ||
|
|
||
| /* get index of destination file */ | ||
| uint64_t dst_index; | ||
| int tmp_rc; | ||
| tmp_rc = dsync_strmap_item_index(dst_map, key, &dst_index); | ||
| if (tmp_rc) { | ||
| rc = -1; | ||
| MFU_LOG(MFU_LOG_ERR, "ERROR: Unable to find index of key `%s' in source map", key); | ||
| continue; | ||
rezib marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| /* skip item if not hardlink */ | ||
| mfu_filetype type = mfu_flist_file_get_type(dst_list, dst_index); | ||
| if(type != MFU_TYPE_HARDLINK) | ||
| continue; | ||
rezib marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /* skip item if reference does not match */ | ||
| const char *ref = mfu_flist_file_get_ref(dst_list, dst_index); | ||
| if(strcmp(ref, removed_ref)) | ||
| continue; | ||
rezib marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /* skip item if type already differs */ | ||
| dsync_state state; | ||
| tmp_rc = dsync_strmap_item_state(dst_map, key, DCMPF_TYPE, &state); | ||
| assert(tmp_rc == 0); | ||
| if (state == DCMPS_DIFFER) | ||
| continue; | ||
rezib marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /* Update to say contents of the symlinks were found to be | ||
| * different */ | ||
| dsync_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_DIFFER); | ||
| dsync_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_DIFFER); | ||
|
|
||
| /* Unless dry run mode, mark the file to be removed in | ||
| * destination and copied from source. */ | ||
| if (!options.dry_run) { | ||
| /* get index of source file */ | ||
| uint64_t src_index; | ||
| tmp_rc = dsync_strmap_item_index(src_map, key, &src_index); | ||
| if (tmp_rc) { | ||
| rc = -1; | ||
| MFU_LOG(MFU_LOG_ERR, "ERROR: Unable to find index of key `%s' in destination map", key); | ||
| } else | ||
| mfu_flist_file_copy(src_list, src_index, src_cp_list); | ||
| mfu_flist_file_copy(dst_list, dst_index, dst_remove_list); | ||
| } | ||
| } | ||
| /* jump to next received path */ | ||
| recvptr += char_extent * chars; | ||
| } | ||
| } | ||
|
|
||
| mfu_free(&recvcounts); | ||
| mfu_free(&recvdispls); | ||
| mfu_free(&sendbuf); | ||
| mfu_free(&recvbuf); | ||
|
|
||
| /* determine whether any process hit an error, | ||
| * input is either 0 or -1, so MIN will return -1 if any */ | ||
| int all_rc; | ||
| MPI_Allreduce(&rc, &all_rc, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); | ||
| rc = all_rc; | ||
|
|
||
| return rc; | ||
| } | ||
|
|
||
| /* compare entries from src into dst */ | ||
| static int dsync_strmap_compare( | ||
| mfu_flist src_list, | ||
|
|
@@ -1712,14 +1872,11 @@ static int dsync_strmap_compare( | |
| continue; | ||
| } | ||
|
|
||
| /* get modes of files */ | ||
| mode_t src_mode = (mode_t) mfu_flist_file_get_mode(src_list, | ||
| src_index); | ||
| mode_t dst_mode = (mode_t) mfu_flist_file_get_mode(dst_list, | ||
| dst_index); | ||
| mfu_filetype src_type = mfu_flist_file_get_type(src_list, src_index); | ||
| mfu_filetype dst_type = mfu_flist_file_get_type(dst_list, dst_index); | ||
|
|
||
| /* check whether files are of the same type */ | ||
| if ((src_mode & S_IFMT) != (dst_mode & S_IFMT)) { | ||
| if (src_type != dst_type) { | ||
| /* file type is different, no need to go any futher */ | ||
| dsync_strmap_item_update(src_map, key, DCMPF_TYPE, DCMPS_DIFFER); | ||
| dsync_strmap_item_update(dst_map, key, DCMPF_TYPE, DCMPS_DIFFER); | ||
|
|
@@ -1751,8 +1908,8 @@ static int dsync_strmap_compare( | |
| continue; | ||
| } | ||
|
|
||
| /* for now, we can only compare content of regular files and symlinks */ | ||
| if (! S_ISREG(dst_mode) && ! S_ISLNK(dst_mode)) { | ||
| /* for now, we can only compare content of regular files, symlinks and hardlinks */ | ||
| if (dst_type != MFU_TYPE_FILE && dst_type != MFU_TYPE_LINK && dst_type != MFU_TYPE_HARDLINK) { | ||
| /* not regular file or symlink, take them as common content */ | ||
| dsync_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_COMMON); | ||
| dsync_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_COMMON); | ||
|
|
@@ -1761,7 +1918,7 @@ static int dsync_strmap_compare( | |
|
|
||
| /* if symlink, check if targets of source and destination files match. If not, | ||
| * mark the files as being different. */ | ||
| if (S_ISLNK(dst_mode)) { | ||
| if (dst_type == MFU_TYPE_LINK) { | ||
| const char* src_name = mfu_flist_file_get_name(src_list, src_index); | ||
| const char* dst_name = mfu_flist_file_get_name(dst_list, dst_index); | ||
| int compare_rc = mfu_compare_symlinks(src_name, dst_name, mfu_src_file, mfu_dst_file); | ||
|
|
@@ -1791,6 +1948,24 @@ static int dsync_strmap_compare( | |
| continue; | ||
| } | ||
|
|
||
| /* compare hardlink references */ | ||
| if (dst_type == MFU_TYPE_HARDLINK) { | ||
| const char* src_ref = mfu_flist_file_get_ref(src_list, src_index) + strlen_prefix; | ||
| const char* dst_ref = mfu_flist_file_get_ref(dst_list, dst_index) + strlen(dest_path->path); | ||
|
|
||
| if(strcmp(src_ref, dst_ref)) { | ||
| /* take them as differ content */ | ||
| dsync_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_DIFFER); | ||
| dsync_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_DIFFER); | ||
|
|
||
| if (!options.dry_run) { | ||
| mfu_flist_file_copy(src_list, src_index, src_cp_list); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A segmentation fault happens in here. |
||
| mfu_flist_file_copy(dst_list, dst_index, dst_remove_list); | ||
| } | ||
| } | ||
| continue; | ||
| } | ||
|
|
||
| /* first check whether file sizes match */ | ||
| dsync_state state; | ||
| tmp_rc = dsync_strmap_item_state(src_map, key, DCMPF_SIZE, &state); | ||
|
|
@@ -1874,6 +2049,16 @@ static int dsync_strmap_compare( | |
| } | ||
| } | ||
|
|
||
| mfu_flist_summarize(dst_remove_list); | ||
|
|
||
| /* For all references (ie. regular files with nlink > 1) in dst_remove_list, | ||
| * select all hardlinks pointing to this reference for removal as well. */ | ||
| tmp_rc = dsync_remove_hardlinks_with_removed_refs(src_list, src_cp_list, | ||
| src_map, dst_list, dst_remove_list, dst_map, mfu_src_file, mfu_dst_file); | ||
| if (tmp_rc < 0) { | ||
| rc = -1; | ||
| } | ||
|
|
||
| /* wait for all procs to finish before stopping timer */ | ||
| MPI_Barrier(MPI_COMM_WORLD); | ||
|
|
||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This line causes a segfault, when all the refs of a file are removed from the FS source.
Here we iterate on the byte array instead of the reference array.
This should look like:
Maybe we should add a regression test for this case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indeed, the pointer arithmetic over the the MPI_allgatherv() was wrong with chars. Fixed in 79b2219.