Skip to content

Commit 46745d0

Browse files
committed
Avoid unnecessary file checking operations
Combines _can_bag and _can_read into one function. Instead of compiling a list of every single file and directory that cannot be read/written to, this new function throws an Exception as soon as a problematic file or directory comes up. At most, one walk() operation is completed now, instead of two.
1 parent 7498a5e commit 46745d0

File tree

1 file changed

+116
-143
lines changed

1 file changed

+116
-143
lines changed

bagit.py

Lines changed: 116 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -178,100 +178,78 @@ def make_bag(
178178
old_dir = os.path.abspath(os.path.curdir)
179179

180180
try:
181-
# TODO: These two checks are currently redundant since an unreadable directory will also
182-
# often be unwritable, and this code will require review when we add the option to
183-
# bag to a destination other than the source. It would be nice if we could avoid
184-
# walking the directory tree more than once even if most filesystems will cache it
181+
# An exception is raised if any permissions will prevent the bagging of this path
182+
_check_can_bag(bag_dir)
185183

186-
unbaggable = _can_bag(bag_dir)
184+
LOGGER.info(_("Creating data directory"))
187185

188-
if unbaggable:
189-
LOGGER.error(
190-
_("Unable to write to the following directories and files:\n%s"),
191-
unbaggable,
192-
)
193-
raise BagError(_("Missing permissions to move all files and directories"))
194-
195-
unreadable_dirs, unreadable_files = _can_read(bag_dir)
196-
197-
if unreadable_dirs or unreadable_files:
198-
if unreadable_dirs:
199-
LOGGER.error(
200-
_("The following directories do not have read permissions:\n%s"),
201-
unreadable_dirs,
202-
)
203-
if unreadable_files:
204-
LOGGER.error(
205-
_("The following files do not have read permissions:\n%s"),
206-
unreadable_files,
207-
)
208-
raise BagError(
209-
_("Read permissions are required to calculate file fixities")
210-
)
211-
else:
212-
LOGGER.info(_("Creating data directory"))
213-
214-
# FIXME: if we calculate full paths we won't need to deal with changing directories
215-
os.chdir(bag_dir)
216-
cwd = os.getcwd()
217-
temp_data = tempfile.mkdtemp(dir=cwd)
218-
219-
for f in os.listdir("."):
220-
if os.path.abspath(f) == temp_data:
221-
continue
222-
new_f = os.path.join(temp_data, f)
223-
LOGGER.info(
224-
_("Moving %(source)s to %(destination)s"),
225-
{"source": f, "destination": new_f},
226-
)
227-
os.rename(f, new_f)
186+
# FIXME: if we calculate full paths we won't need to deal with changing directories
187+
os.chdir(bag_dir)
188+
cwd = os.getcwd()
189+
temp_data = tempfile.mkdtemp(dir=cwd)
228190

191+
for f in os.listdir("."):
192+
if os.path.abspath(f) == temp_data:
193+
continue
194+
new_f = os.path.join(temp_data, f)
229195
LOGGER.info(
230196
_("Moving %(source)s to %(destination)s"),
231-
{"source": temp_data, "destination": "data"},
197+
{"source": f, "destination": new_f},
232198
)
233-
while True:
234-
try:
235-
os.rename(temp_data, "data")
236-
break
237-
except PermissionError as e:
238-
if hasattr(e, "winerror") and e.winerror == 5:
239-
LOGGER.warning(_("PermissionError [WinError 5] when renaming temp folder. Retrying in 10 seconds..."))
240-
time.sleep(10)
241-
else:
242-
raise
199+
os.rename(f, new_f)
243200

244-
# permissions for the payload directory should match those of the
245-
# original directory
246-
os.chmod("data", os.stat(cwd).st_mode)
201+
LOGGER.info(
202+
_("Moving %(source)s to %(destination)s"),
203+
{"source": temp_data, "destination": "data"},
204+
)
247205

248-
total_bytes, total_files = make_manifests(
249-
"data", processes, algorithms=checksums, encoding=encoding
206+
while True:
207+
try:
208+
os.rename(temp_data, "data")
209+
break
210+
except PermissionError as e:
211+
if hasattr(e, "winerror") and e.winerror == 5:
212+
LOGGER.warning(
213+
_(
214+
"PermissionError [WinError 5] when renaming temp folder. Retrying in 10 seconds..."
215+
)
216+
)
217+
time.sleep(10)
218+
else:
219+
raise
220+
221+
# permissions for the payload directory should match those of the
222+
# original directory
223+
os.chmod("data", os.stat(cwd).st_mode)
224+
225+
total_bytes, total_files = make_manifests(
226+
"data", processes, algorithms=checksums, encoding=encoding
227+
)
228+
229+
LOGGER.info(_("Creating bagit.txt"))
230+
txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n"""
231+
with open_text_file("bagit.txt", "w") as bagit_file:
232+
bagit_file.write(txt)
233+
234+
LOGGER.info(_("Creating bag-info.txt"))
235+
if bag_info is None:
236+
bag_info = {}
237+
238+
# allow 'Bagging-Date' and 'Bag-Software-Agent' to be overidden
239+
if "Bagging-Date" not in bag_info:
240+
bag_info["Bagging-Date"] = date.strftime(date.today(), "%Y-%m-%d")
241+
if "Bag-Software-Agent" not in bag_info:
242+
bag_info["Bag-Software-Agent"] = "bagit.py v%s <%s>" % (
243+
VERSION,
244+
PROJECT_URL,
250245
)
251246

252-
LOGGER.info(_("Creating bagit.txt"))
253-
txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n"""
254-
with open_text_file("bagit.txt", "w") as bagit_file:
255-
bagit_file.write(txt)
256-
257-
LOGGER.info(_("Creating bag-info.txt"))
258-
if bag_info is None:
259-
bag_info = {}
260-
261-
# allow 'Bagging-Date' and 'Bag-Software-Agent' to be overidden
262-
if "Bagging-Date" not in bag_info:
263-
bag_info["Bagging-Date"] = date.strftime(date.today(), "%Y-%m-%d")
264-
if "Bag-Software-Agent" not in bag_info:
265-
bag_info["Bag-Software-Agent"] = "bagit.py v%s <%s>" % (
266-
VERSION,
267-
PROJECT_URL,
268-
)
247+
bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)
248+
_make_tag_file("bag-info.txt", bag_info)
269249

270-
bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)
271-
_make_tag_file("bag-info.txt", bag_info)
250+
for c in checksums:
251+
_make_tagmanifest_file(c, bag_dir, encoding="utf-8")
272252

273-
for c in checksums:
274-
_make_tagmanifest_file(c, bag_dir, encoding="utf-8")
275253
except Exception:
276254
LOGGER.exception(_("An error occurred creating a bag in %s"), bag_dir)
277255
raise
@@ -482,31 +460,7 @@ def save(self, processes=1, manifests=False):
482460
% self.path
483461
)
484462

485-
unbaggable = _can_bag(self.path)
486-
if unbaggable:
487-
LOGGER.error(
488-
_(
489-
"Missing write permissions for the following directories and files:\n%s"
490-
),
491-
unbaggable,
492-
)
493-
raise BagError(_("Missing permissions to move all files and directories"))
494-
495-
unreadable_dirs, unreadable_files = _can_read(self.path)
496-
if unreadable_dirs or unreadable_files:
497-
if unreadable_dirs:
498-
LOGGER.error(
499-
_("The following directories do not have read permissions:\n%s"),
500-
unreadable_dirs,
501-
)
502-
if unreadable_files:
503-
LOGGER.error(
504-
_("The following files do not have read permissions:\n%s"),
505-
unreadable_files,
506-
)
507-
raise BagError(
508-
_("Read permissions are required to calculate file fixities")
509-
)
463+
_check_can_bag(self.path)
510464

511465
# Change working directory to bag directory so helper functions work
512466
old_dir = os.path.abspath(os.path.curdir)
@@ -1338,47 +1292,69 @@ def _walk(data_dir):
13381292
yield path
13391293

13401294

1341-
def _can_bag(test_dir):
1342-
"""Scan the provided directory for files which cannot be bagged due to insufficient permissions"""
1343-
unbaggable = []
1295+
def _check_can_bag(test_dir):
1296+
"""
1297+
Scan the provided directory to ensure all files and directories can be read
1298+
and alldirectories can be written.
13441299
1300+
If there are any permission issues, a BagError is raised.
1301+
"""
13451302
if not os.access(test_dir, os.R_OK):
1346-
# We cannot continue without permission to read the source directory
1347-
unbaggable.append(test_dir)
1348-
return unbaggable
1303+
LOGGER.error(_("Cannot read the source directory %s"), test_dir)
1304+
raise BagError(
1305+
_(
1306+
"Cannot read the source directory %s. Read permissions are "
1307+
"required to find files"
1308+
)
1309+
% test_dir
1310+
)
13491311

13501312
if not os.access(test_dir, os.W_OK):
1351-
unbaggable.append(test_dir)
1313+
LOGGER.error(_("Cannot write to the source directory %s"), test_dir)
1314+
raise BagError(
1315+
_(
1316+
"Cannot write to the source directory %s. Write permissions "
1317+
"are required to move files within the bag"
1318+
)
1319+
% test_dir
1320+
)
13521321

1353-
for dirpath, dirnames, filenames in os.walk(test_dir):
1354-
for directory in dirnames:
1355-
full_path = os.path.join(dirpath, directory)
1356-
if not os.access(full_path, os.W_OK):
1357-
unbaggable.append(full_path)
1322+
for dirpath, directories, filenames in os.walk(test_dir, topdown=True):
1323+
for filename in filenames:
1324+
full_path = os.path.join(dirpath, filename)
13581325

1359-
return unbaggable
1326+
if not os.access(full_path, os.R_OK):
1327+
LOGGER.error(_("Cannot read the file %s"), full_path)
1328+
raise BagError(
1329+
_(
1330+
"Cannot read the file %s. Read permissions are "
1331+
"required to calculate file fixities"
1332+
)
1333+
% full_path
1334+
)
13601335

1336+
for directory in directories:
1337+
full_path = os.path.join(dirpath, directory)
13611338

1362-
def _can_read(test_dir):
1363-
"""
1364-
returns ((unreadable_dirs), (unreadable_files))
1365-
"""
1366-
unreadable_dirs = []
1367-
unreadable_files = []
1339+
if not os.access(full_path, os.R_OK):
1340+
LOGGER.error(_("Cannot read the sub-directory %s"), full_path)
1341+
raise BagError(
1342+
_(
1343+
"Cannot read the sub-directory %s. Read permissions "
1344+
"are required to find files"
1345+
)
1346+
% full_path
1347+
)
13681348

1369-
if not os.access(test_dir, os.R_OK):
1370-
unreadable_dirs.append(test_dir)
1371-
else:
1372-
for dirpath, dirnames, filenames in os.walk(test_dir):
1373-
for dn in dirnames:
1374-
full_path = os.path.join(dirpath, dn)
1375-
if not os.access(full_path, os.R_OK):
1376-
unreadable_dirs.append(full_path)
1377-
for fn in filenames:
1378-
full_path = os.path.join(dirpath, fn)
1379-
if not os.access(full_path, os.R_OK):
1380-
unreadable_files.append(full_path)
1381-
return (tuple(unreadable_dirs), tuple(unreadable_files))
1349+
elif not os.access(full_path, os.W_OK):
1350+
LOGGER.error(_("Cannot write to the sub-directory %s"), full_path)
1351+
raise BagError(
1352+
_(
1353+
"Cannot write to the sub-directory %s. Write permissions "
1354+
"are required to move files within the bag"
1355+
)
1356+
% full_path
1357+
)
13821358

13831359

13841360
def generate_manifest_lines(filename, algorithms=DEFAULT_CHECKSUMS):
@@ -1489,10 +1465,7 @@ def _make_parser():
14891465

14901466
checksum_args = parser.add_argument_group(
14911467
_("Checksum Algorithms"),
1492-
_(
1493-
"Select the manifest algorithms to be used when creating bags"
1494-
" (default=%s)"
1495-
)
1468+
_("Select the manifest algorithms to be used when creating bags (default=%s)")
14961469
% ", ".join(DEFAULT_CHECKSUMS),
14971470
)
14981471

0 commit comments

Comments
 (0)