diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index 7191ced66a..7e94ada3e9 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -2852,11 +2852,10 @@ def extract_step(self): """ Unpack the source files. """ - for src in self.src: + for i, src in enumerate(self.src): self.log.info("Unpacking source %s" % src['name']) srcdir = extract_file(src['path'], self.builddir, cmd=src['cmd'], - extra_options=self.cfg['unpack_options'], change_into_dir=False) - change_dir(srcdir) + extra_options=self.cfg['unpack_options'], change_into_dir=(i == 0)) if srcdir: self.src[self.src.index(src)]['finalpath'] = srcdir else: diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index fcc9149de9..d540e29660 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -537,10 +537,24 @@ def extract_file(fn, dest, cmd=None, extra_options=None, overwrite=False, forced if extra_options: cmd = f"{cmd} {extra_options}" + previous_paths = set(_get_paths_purged(abs_dest)) run_shell_cmd(cmd, in_dry_run=forced, hidden=not trace) - - # note: find_base_dir also changes into the base dir! - base_dir = find_base_dir() + new_paths = set(_get_paths_purged(abs_dest)) - previous_paths + if len(new_paths) == 0: + _log.warning(f"No new folder/files found after extraction of {fn} in {abs_dest}, " + f"verify that the same file is not extracted multiple times! " + f"Existing paths: {', '.join(previous_paths)}") + base_dir = find_base_dir() # Fallback + elif len(new_paths) == 1: + new_path = new_paths.pop() + if os.path.isdir(new_path): + # note: find_base_dir also changes into the base dir! + base_dir = find_base_dir(new_path) + else: + base_dir = abs_dest + else: + base_dir = abs_dest + _log.debug(f"Multiple new folder/files found after extraction: {new_paths}. Using {base_dir} as base dir.") # if changing into obtained directory is not desired, # change back to where we came from (unless that was a non-existing directory) @@ -1444,36 +1458,39 @@ def is_sha256_checksum(value): return res -def find_base_dir(): - """ - Try to locate a possible new base directory - - this is typically a single subdir, e.g. from untarring a tarball - - when extracting multiple tarballs in the same directory, - expect only the first one to give the correct path +def _get_paths_purged(path): + """Find all files and folders in the folder + + Ignore hidden entries and e.g. log directories """ - def get_local_dirs_purged(): - # e.g. always purge the log directory - # and hidden directories - ignoredirs = ["easybuild"] + IGNORED_DIRS = ["easybuild"] - lst = os.listdir(get_cwd()) - lst = [d for d in lst if not d.startswith('.') and d not in ignoredirs] - return lst + lst = os.listdir(path) + lst = [p for p in lst if not p.startswith('.') and p not in IGNORED_DIRS] + return lst - lst = get_local_dirs_purged() - new_dir = get_cwd() + +def find_base_dir(path=None): + """ + Locate a possible new base directory from the current working directory or the specified one and change to it. + + This is typically a single subdir, e.g. from untarring a tarball. + It recurses into subfolders as long as that subfolder is the only child (file or folder) + and returns the current(ly processed) folder if multiple or no childs exist in it. + """ + new_dir = get_cwd() if path is None else os.path.abspath(path) + lst = _get_paths_purged(new_dir) while len(lst) == 1: - new_dir = os.path.join(get_cwd(), lst[0]) + new_dir = os.path.join(new_dir, lst[0]) if not os.path.isdir(new_dir): break - - change_dir(new_dir) - lst = get_local_dirs_purged() + lst = _get_paths_purged(new_dir) # make sure it's a directory, and not a (single) file that was in a tarball for example while not os.path.isdir(new_dir): new_dir = os.path.dirname(new_dir) + change_dir(new_dir) _log.debug("Last dir list %s" % lst) _log.debug("Possible new dir %s found" % new_dir) return new_dir diff --git a/easybuild/tools/github.py b/easybuild/tools/github.py index fa6e5519eb..9f4540678e 100644 --- a/easybuild/tools/github.py +++ b/easybuild/tools/github.py @@ -430,8 +430,10 @@ def download_repo(repo=GITHUB_EASYCONFIGS_REPO, branch=None, commit=None, accoun else: _log.debug("%s downloaded to %s, extracting now", base_name, path) - base_dir = extract_file(target_path, path, forced=True, change_into_dir=False, trace=False) - extracted_path = os.path.join(base_dir, extracted_dir_name) + extracted_path = extract_file(target_path, path, forced=True, change_into_dir=False, trace=False) + if extracted_path != expected_path: + raise EasyBuildError(f"Unexpected directory '{extracted_path} for extracted repo. Expected: {expected_path}", + exit_code=EasyBuildExit.FAIL_EXTRACT) # check if extracted_path exists if not os.path.isdir(extracted_path): diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 7e296803cc..b4b4a67d96 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -40,6 +40,7 @@ import shutil import stat import sys +import tarfile import tempfile import textwrap import time @@ -166,11 +167,39 @@ def test_find_base_dir(self): foodir = os.path.join(tmpdir, 'foo') os.mkdir(foodir) - os.mkdir(os.path.join(tmpdir, '.bar')) - os.mkdir(os.path.join(tmpdir, 'easybuild')) - + # No files os.chdir(tmpdir) self.assertTrue(os.path.samefile(foodir, ft.find_base_dir())) + # Uses specified path + os.chdir(self.test_prefix) + self.assertTrue(os.path.samefile(foodir, ft.find_base_dir(tmpdir))) + + # Only ignored files/folders + os.mkdir(os.path.join(tmpdir, '.bar')) + os.mkdir(os.path.join(tmpdir, 'easybuild')) + self.assertTrue(os.path.samefile(foodir, ft.find_base_dir(tmpdir))) + + # Subfolder + bardir = os.path.join(foodir, 'bar') + os.mkdir(bardir) + self.assertTrue(os.path.samefile(bardir, ft.find_base_dir(tmpdir))) + # With ignored folder in subfolder + os.mkdir(os.path.join(bardir, '.bar')) + self.assertTrue(os.path.samefile(bardir, ft.find_base_dir(tmpdir))) + + # Test recursiveness + subdir = os.path.join(bardir, 'sub') + os.mkdir(subdir) + self.assertTrue(os.path.samefile(subdir, ft.find_base_dir(tmpdir))) + # Only file(s) in subfolder + ft.write_file(os.path.join(subdir, 'src.c'), 'code') + self.assertTrue(os.path.samefile(subdir, ft.find_base_dir(tmpdir))) + ft.write_file(os.path.join(subdir, 'src2.c'), 'code') + self.assertTrue(os.path.samefile(subdir, ft.find_base_dir(tmpdir))) + + # Files and folders in subfolder + os.mkdir(os.path.join(bardir, 'subdir')) + self.assertTrue(os.path.samefile(bardir, ft.find_base_dir(tmpdir))) def test_find_glob_pattern(self): """test find_glob_pattern function""" @@ -1865,7 +1894,7 @@ def test_apply_patch(self): for with_backup in (True, False): update_build_option('backup_patched_files', with_backup) self.assertTrue(ft.apply_patch(toy_patch, path)) - src_file = os.path.join(path, 'toy-0.0', 'toy.source') + src_file = os.path.join(path, 'toy.source') backup_file = src_file + '.orig' patched = ft.read_file(src_file) pattern = "I'm a toy, and very proud of it" @@ -1882,7 +1911,7 @@ def test_apply_patch(self): toy_patch_gz = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'toy-0.0_gzip.patch.gz') with self.mocked_stdout_stderr(): self.assertTrue(ft.apply_patch(toy_patch_gz, path)) - patched_gz = ft.read_file(os.path.join(path, 'toy-0.0', 'toy.source')) + patched_gz = ft.read_file(os.path.join(path, 'toy.source')) pattern = "I'm a toy, and very very proud of it" self.assertIn(pattern, patched_gz) @@ -1893,7 +1922,7 @@ def test_apply_patch(self): with self.mocked_stdout_stderr(): ft.apply_patch(toy_patch_gz, path, options=' --reverse') # Change was really removed - self.assertNotIn(pattern, ft.read_file(os.path.join(path, 'toy-0.0', 'toy.source'))) + self.assertNotIn(pattern, ft.read_file(os.path.join(path, 'toy.source'))) # test copying of files, both to an existing directory and a non-existing location test_file = os.path.join(self.test_prefix, 'foo.txt') @@ -2471,31 +2500,46 @@ def test_change_dir(self): foo = os.path.join(self.test_prefix, 'foo') self.assertErrorRegex(EasyBuildError, "Failed to change from .* to %s" % foo, ft.change_dir, foo) + def create_new_tarball(self, folder, filename=None): + """Create new tarball with contents of folder and return path""" + if filename is None: + tarball = tempfile.mktemp(suffix='.tar.gz') + else: + tarball = os.path.join(tempfile.mkdtemp(), filename) + with tarfile.open(tarball, "w:gz") as tar: + for name in glob.glob(os.path.join(folder, '*')): + tar.add(name, arcname=os.path.basename(name)) + return tarball + def test_extract_file(self): """Test extract_file""" cwd = os.getcwd() - testdir = os.path.dirname(os.path.abspath(__file__)) - toy_tarball = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'toy-0.0.tar.gz') + test_src = tempfile.mkdtemp() + ft.mkdir(os.path.join(test_src, 'toy-0.0')) + ft.write_file(os.path.join(test_src, 'toy-0.0', 'toy.source'), 'content') + toy_tarball = self.create_new_tarball(test_src, filename='toy-0.0.tar.gz') - self.assertNotExists(os.path.join(self.test_prefix, 'toy-0.0', 'toy.source')) + extraction_path = os.path.join(self.test_prefix, 'extraction') # New directory + toy_path = os.path.join(extraction_path, 'toy-0.0') + self.assertNotExists(os.path.join(toy_path, 'toy.source')) with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball, self.test_prefix, change_into_dir=False) - self.assertExists(os.path.join(self.test_prefix, 'toy-0.0', 'toy.source')) - self.assertTrue(os.path.samefile(path, self.test_prefix)) + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) + self.assertExists(os.path.join(toy_path, 'toy.source')) + self.assertTrue(os.path.samefile(path, toy_path)) # still in same directory as before if change_into_dir is set to False self.assertTrue(os.path.samefile(os.getcwd(), cwd)) - shutil.rmtree(os.path.join(path, 'toy-0.0')) + ft.remove_dir(toy_path) toy_tarball_renamed = os.path.join(self.test_prefix, 'toy_tarball') shutil.copyfile(toy_tarball, toy_tarball_renamed) with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball_renamed, self.test_prefix, cmd="tar xfvz %s", change_into_dir=False) + path = ft.extract_file(toy_tarball_renamed, extraction_path, cmd="tar xfvz %s", change_into_dir=False) self.assertTrue(os.path.samefile(os.getcwd(), cwd)) - self.assertExists(os.path.join(self.test_prefix, 'toy-0.0', 'toy.source')) - self.assertTrue(os.path.samefile(path, self.test_prefix)) - shutil.rmtree(os.path.join(path, 'toy-0.0')) + self.assertExists(os.path.join(toy_path, 'toy.source')) + self.assertTrue(os.path.samefile(path, toy_path)) + ft.remove_dir(toy_path) # also test behaviour of extract_file under --dry-run build_options = { @@ -2505,47 +2549,112 @@ def test_extract_file(self): init_config(build_options=build_options) self.mock_stdout(True) - path = ft.extract_file(toy_tarball, self.test_prefix, change_into_dir=False) + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) txt = self.get_stdout() self.mock_stdout(False) self.assertTrue(os.path.samefile(os.getcwd(), cwd)) - self.assertTrue(os.path.samefile(path, self.test_prefix)) - self.assertNotExists(os.path.join(self.test_prefix, 'toy-0.0')) + self.assertTrue(os.path.samefile(path, extraction_path)) + self.assertNotExists(toy_path) self.assertTrue(re.search('running shell command "tar xzf .*/toy-0.0.tar.gz"', txt)) with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball, self.test_prefix, forced=True, change_into_dir=False) - self.assertExists(os.path.join(self.test_prefix, 'toy-0.0', 'toy.source')) - self.assertTrue(os.path.samefile(path, self.test_prefix)) + path = ft.extract_file(toy_tarball, extraction_path, forced=True, change_into_dir=False) + self.assertExists(os.path.join(toy_path, 'toy.source')) + self.assertTrue(os.path.samefile(path, toy_path)) self.assertTrue(os.path.samefile(os.getcwd(), cwd)) build_options['extended_dry_run'] = False init_config(build_options=build_options) - ft.remove_dir(os.path.join(self.test_prefix, 'toy-0.0')) + ft.remove_dir(toy_path) ft.change_dir(cwd) - self.assertFalse(os.path.samefile(os.getcwd(), self.test_prefix)) + self.assertFalse(os.path.samefile(os.getcwd(), extraction_path)) with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball, self.test_prefix, change_into_dir=True) + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=True) stdout = self.get_stdout() stderr = self.get_stderr() - self.assertTrue(os.path.samefile(path, self.test_prefix)) - self.assertTrue(os.path.samefile(os.getcwd(), self.test_prefix)) + self.assertTrue(os.path.samefile(path, toy_path)) + self.assertTrue(os.path.samefile(os.getcwd(), toy_path)) self.assertFalse(stderr) self.assertTrue("running shell command" in stdout) # check whether disabling trace output works with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball, self.test_prefix, change_into_dir=True, trace=False) + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=True, trace=False) stdout = self.get_stdout() stderr = self.get_stderr() self.assertFalse(stderr) self.assertFalse(stdout) + # Test tarball with multiple folders + test_src = tempfile.mkdtemp() + ft.mkdir(os.path.join(test_src, 'multi-1.0')) + ft.write_file(os.path.join(test_src, 'multi-1.0', 'src.c'), 'content') + ft.mkdir(os.path.join(test_src, 'multi-bonus')) + ft.write_file(os.path.join(test_src, 'multi-bonus', 'src.c'), 'content') + test_tarball = self.create_new_tarball(test_src) + # Start fresh + ft.remove_dir(extraction_path) + ft.change_dir(cwd) + with self.mocked_stdout_stderr(): + path = ft.extract_file(test_tarball, extraction_path, change_into_dir=True) + self.assertTrue(os.path.samefile(path, extraction_path)) + self.assertTrue(os.path.samefile(os.getcwd(), extraction_path)) # NOT a subfolder + self.assertExists(os.path.join(extraction_path, 'multi-1.0')) + self.assertExists(os.path.join(extraction_path, 'multi-bonus')) + + # Extract multiple files with single folder to same folder, and file only + test_src = tempfile.mkdtemp() + ft.mkdir(os.path.join(test_src, 'bar-0.0')) + ft.write_file(os.path.join(test_src, 'bar-0.0', 'bar.source'), 'content') + bar_tarball = self.create_new_tarball(test_src) + + test_src = tempfile.mkdtemp() + ft.write_file(os.path.join(test_src, 'main.source'), 'content') + file_tarball = self.create_new_tarball(test_src) + + ft.remove_dir(extraction_path) + ft.change_dir(cwd) + with self.mocked_stdout_stderr(): + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, toy_path)) + # Extracting the same tarball again detects the same folder + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, toy_path)) + + path = ft.extract_file(bar_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, os.path.join(extraction_path, 'bar-0.0'))) + + # Extracting the same tarball again now can't detect the same folder as there is another one next to it. + # Should fall back to the parent dir + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, extraction_path)) + + # Contains just a file + path = ft.extract_file(file_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, extraction_path)) + # Same behavior when only a file is extracted + ft.remove_dir(extraction_path) + path = ft.extract_file(file_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, extraction_path)) + + # Folder and file + test_src = tempfile.mkdtemp() + ft.mkdir(os.path.join(test_src, 'multi-1.0')) + ft.write_file(os.path.join(test_src, 'multi-1.0', 'src.c'), 'content') + ft.write_file(os.path.join(test_src, 'main.c'), 'content') + test_tarball = self.create_new_tarball(test_src) + # When there is only a file or a file next to the folder the parent dir is returned + for tarball in (file_tarball, test_tarball): + ft.remove_dir(extraction_path) + with self.mocked_stdout_stderr(): + path = ft.extract_file(tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, extraction_path)) + def test_empty_dir(self): """Test empty_dir function""" test_dir = os.path.join(self.test_prefix, 'test123') @@ -2578,8 +2687,7 @@ def test_empty_dir(self): txt = self.get_stdout() self.mock_stdout(False) - regex = re.compile("^directory [^ ]* emptied$") - self.assertTrue(regex.match(txt), f"Pattern '{regex.pattern}' found in: {txt}") + self.assertRegex(txt, "^directory [^ ]* emptied$") def test_remove(self): """Test remove_file, remove_dir and join remove functions.""" @@ -2882,7 +2990,7 @@ def test_search_file(self): # to avoid accidental matches in other files already present (log files, etc.) ec_dir = tempfile.mkdtemp() test_ec = os.path.join(ec_dir, 'netCDF-C++-4.2-foss-2019a.eb') - ft.write_file(test_ec, ''), + ft.write_file(test_ec, '') for pattern in ['netCDF-C++', 'CDF', 'C++', '^netCDF']: var_defs, hits = ft.search_file([ec_dir], pattern, terse=True, filename_only=True) self.assertEqual(var_defs, [], msg='For pattern ' + pattern)