Skip to content

Commit eaaa3b3

Browse files
committed
performance: avoid walk when subfolders are unneeded
Also changed some names and reduced indentation levels
1 parent f640184 commit eaaa3b3

File tree

4 files changed

+59
-43
lines changed

4 files changed

+59
-43
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ eggs/
99
*.egg-info/
1010
*.egg
1111
.tox/
12+
.pytest_cache
1213

1314
# Editor Temps
1415
.*.sw?
@@ -44,4 +45,5 @@ output.txt
4445
pydocstyle_report.txt
4546
response.txt
4647
test.txt
48+
time.txt
4749
value

cwltool/process.py

Lines changed: 55 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
import json
1010
import logging
1111
import os
12+
try:
13+
from os import scandir
14+
except ImportError:
15+
from scandir import scandir
1216
import shutil
1317
import stat
1418
import tempfile
@@ -266,7 +270,7 @@ def stageFiles(pm, stageFunc=None, ignoreWritable=False, symLink=True, secret_st
266270

267271
def relocateOutputs(outputObj, # type: Union[Dict[Text, Any],List[Dict[Text, Any]]]
268272
destination_path, # type: Text
269-
output_dirs, # type: Set[Text]
273+
source_directories, # type: Set[Text]
270274
action, # type: Text
271275
fs_access, # type: StdFsAccess
272276
compute_checksum=True # type: bool
@@ -292,28 +296,31 @@ def _collectDirEntries(obj):
292296
yield dir_entry
293297

294298
def _relocate(src, dst):
299+
if src == dst:
300+
return
301+
295302
if action == "move":
296-
for a in output_dirs:
297-
if src.startswith(a+"/"):
298-
_logger.debug("Moving %s to %s", src, dst)
299-
if os.path.isdir(src) and os.path.isdir(dst):
300-
# merge directories
301-
for root, dirs, files in os.walk(src):
302-
for f in dirs+files:
303-
_relocate(os.path.join(root, f), os.path.join(dst, f))
304-
else:
305-
shutil.move(src, dst)
303+
# do not move anything if we are trying to move an entity from
304+
# outside of the source directories
305+
if any(src.startswith(path + "/") for path in source_directories):
306+
_logger.debug("Moving %s to %s", src, dst)
307+
if os.path.isdir(src) and os.path.isdir(dst):
308+
# merge directories
309+
for dir_entry in scandir(src):
310+
_relocate(dir_entry, os.path.join(dst, dir_entry.name))
311+
else:
312+
shutil.move(src, dst)
306313
return
307-
if src != dst:
308-
_logger.debug("Copying %s to %s", src, dst)
309-
if os.path.isdir(src):
310-
if os.path.isdir(dst):
311-
shutil.rmtree(dst)
312-
elif os.path.isfile(dst):
313-
os.unlink(dst)
314-
shutil.copytree(src, dst)
315-
else:
316-
shutil.copy2(src, dst)
314+
315+
_logger.debug("Copying %s to %s", src, dst)
316+
if os.path.isdir(src):
317+
if os.path.isdir(dst):
318+
shutil.rmtree(dst)
319+
elif os.path.isfile(dst):
320+
os.unlink(dst)
321+
shutil.copytree(src, dst)
322+
else:
323+
shutil.copy2(src, dst)
317324

318325
outfiles = list(_collectDirEntries(outputObj))
319326
pm = PathMapper(outfiles, "", destination_path, separateDirs=False)
@@ -332,30 +339,35 @@ def _check_adjust(file):
332339
# If there are symlinks to intermediate output directories, we want to move
333340
# the real files into the final output location. If a file is linked more than once,
334341
# make an internal relative symlink.
342+
def relink(relinked, # type: Dict[Text, Text]
343+
root_path # type: Text
344+
):
345+
for dir_entry in scandir(root_path):
346+
path = dir_entry.path
347+
if os.path.islink(path):
348+
real_path = os.path.realpath(path)
349+
if real_path in relinked:
350+
link_name = relinked[real_path]
351+
if onWindows():
352+
if os.path.isfile(path):
353+
shutil.copy(os.path.relpath(link_name, path), path)
354+
elif os.path.exists(path) and os.path.isdir(path):
355+
shutil.rmtree(path)
356+
copytree_with_merge(os.path.relpath(link_name, path), path)
357+
else:
358+
os.unlink(path)
359+
os.symlink(os.path.relpath(link_name, path), path)
360+
else:
361+
if any(real_path.startswith(path + "/") for path in source_directories):
362+
os.unlink(path)
363+
os.rename(real_path, path)
364+
relinked[real_path] = path
365+
if os.path.isdir(path):
366+
relink(relinked, path)
367+
335368
if action == "move":
336369
relinked = {} # type: Dict[Text, Text]
337-
for root, dirs, files in os.walk(destination_path):
338-
for f in dirs+files:
339-
path = os.path.join(root, f)
340-
if os.path.islink(path):
341-
rp = os.path.realpath(path)
342-
if rp in relinked:
343-
if onWindows():
344-
if os.path.isfile(path):
345-
shutil.copy(os.path.relpath(relinked[rp], path), path)
346-
elif os.path.exists(path) and os.path.isdir(path):
347-
shutil.rmtree(path)
348-
copytree_with_merge(os.path.relpath(relinked[rp], path), path)
349-
else:
350-
os.unlink(path)
351-
os.symlink(os.path.relpath(relinked[rp], path), path)
352-
else:
353-
for od in output_dirs:
354-
if rp.startswith(od+"/"):
355-
os.unlink(path)
356-
os.rename(rp, path)
357-
relinked[rp] = path
358-
break
370+
relink(relinked, destination_path)
359371

360372
return outputObj
361373

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ prov==1.5.1
99
bagit==1.6.4
1010
mypy-extensions
1111
psutil
12+
scandir
1213
subprocess32 >= 3.5.0; os.name=="posix"
1314
typing-extensions

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
'mypy-extensions',
6060
'six >= 1.9.0', # >= 1.9.0 required by prov
6161
'psutil',
62+
'scandir',
6263
'prov == 1.5.1',
6364
'bagit >= 1.6.4',
6465
'typing-extensions',

0 commit comments

Comments
 (0)