|
1 | 1 | import math |
| 2 | +import os |
2 | 3 | import os.path as op |
3 | 4 | import re |
4 | 5 | import shutil |
| 6 | +import stat |
5 | 7 | import subprocess |
| 8 | +import time |
6 | 9 | import warnings |
7 | 10 |
|
8 | 11 | import datalad.api as dlapi |
|
11 | 14 | from babs.utils import get_git_show_ref_shasum |
12 | 15 |
|
13 | 16 |
|
| 17 | +def robust_rm_dir(path, max_retries=3, retry_delay=1): |
| 18 | + """ |
| 19 | + Robustly remove a directory tree, handling filesystem quirks and locked files. |
| 20 | +
|
| 21 | + For datalad datasets, this function prioritizes `datalad remove`. |
| 22 | + Falls back to shutil.rmtree() with retries if needed. |
| 23 | +
|
| 24 | + Parameters |
| 25 | + ---------- |
| 26 | + path : str |
| 27 | + Path to the directory to remove |
| 28 | + max_retries : int |
| 29 | + Maximum number of retry attempts for shutil.rmtree() |
| 30 | + retry_delay : float |
| 31 | + Delay in seconds between retries |
| 32 | + """ |
| 33 | + if not op.exists(path): |
| 34 | + return |
| 35 | + |
| 36 | + # Check if it's a datalad dataset (has .datalad or .git directory) |
| 37 | + is_datalad_dataset = op.exists(op.join(path, '.datalad')) or op.exists(op.join(path, '.git')) |
| 38 | + |
| 39 | + # For datalad datasets, try datalad remove first |
| 40 | + if is_datalad_dataset: |
| 41 | + try: |
| 42 | + dlapi.remove(path=path, reckless='availability', check=False) |
| 43 | + # datalad remove might not remove everything, check if path still exists |
| 44 | + if not op.exists(path): |
| 45 | + return |
| 46 | + # If path still exists, fall through to shutil.rmtree() below |
| 47 | + except Exception as e: |
| 48 | + print(f'Warning: datalad remove failed for {path}: {e}') |
| 49 | + print('Falling back to shutil.rmtree()...') |
| 50 | + |
| 51 | + # Fallback: use shutil.rmtree() with error handling and retries |
| 52 | + def handle_remove_readonly(func, path, _exc): |
| 53 | + """ |
| 54 | + Error handler for shutil.rmtree that attempts to fix permission issues. |
| 55 | + """ |
| 56 | + # Change file to be writable, readable, and executable |
| 57 | + os.chmod(path, stat.S_IWRITE | stat.S_IREAD | stat.S_IEXEC) |
| 58 | + try: |
| 59 | + func(path) |
| 60 | + except Exception as e: |
| 61 | + # If still fails, try to remove as file |
| 62 | + print(f'Warning: Failed to remove {path} after fixing permissions: {e}') |
| 63 | + try: |
| 64 | + os.remove(path) |
| 65 | + except Exception as e2: |
| 66 | + print(f'Warning: Failed to remove {path} as file: {e2}') |
| 67 | + |
| 68 | + # Try shutil.rmtree() with retries |
| 69 | + for attempt in range(max_retries): |
| 70 | + try: |
| 71 | + shutil.rmtree(path, onerror=handle_remove_readonly) |
| 72 | + return |
| 73 | + except OSError as e: |
| 74 | + if attempt < max_retries - 1: |
| 75 | + print( |
| 76 | + f'Warning: Failed to remove {path} (attempt {attempt + 1}/{max_retries}): {e}' |
| 77 | + ) |
| 78 | + time.sleep(retry_delay) |
| 79 | + continue |
| 80 | + else: |
| 81 | + # All retries failed, warn but don't crash since merge was successful |
| 82 | + warnings.warn( |
| 83 | + f"Failed to remove temporary directory '{path}' after {max_retries} attempts. " |
| 84 | + f'Error: {e}. ' |
| 85 | + 'The merge was successful, but you may need to manually remove ' |
| 86 | + 'this directory. You can safely delete it with: rm -rf ' + path, |
| 87 | + stacklevel=2, |
| 88 | + ) |
| 89 | + |
| 90 | + |
14 | 91 | class BABSMerge(BABS): |
15 | 92 | """BABSMerge is for merging results and provenance from finished jobs.""" |
16 | 93 |
|
@@ -279,7 +356,8 @@ def babs_merge(self, chunk_size=1000, trial_run=False): |
279 | 356 | print('\n`babs merge` was successful!') |
280 | 357 |
|
281 | 358 | # delete the merge_ds folder |
282 | | - shutil.rmtree(merge_ds_path) |
| 359 | + print('\nCleaning up merge_ds directory...') |
| 360 | + robust_rm_dir(merge_ds_path) |
283 | 361 |
|
284 | 362 | # Delete all the merged branches from the output RIA |
285 | 363 | for n_chunk, chunk in enumerate(all_chunks): |
|
0 commit comments