Skip to content

Commit bc828c5

Browse files
kenyaachonmmbuguapre-commit-ci[bot]fooblink1073
authored
Enable users to copy both files and directories (#1190)
Co-authored-by: mmbugua <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: foo <[email protected]> Co-authored-by: Steven Silvester <[email protected]>
1 parent 903942c commit bc828c5

File tree

4 files changed

+365
-15
lines changed

4 files changed

+365
-15
lines changed

jupyter_server/services/contents/filemanager.py

Lines changed: 249 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
# Copyright (c) Jupyter Development Team.
33
# Distributed under the terms of the Modified BSD License.
44
import errno
5+
import math
56
import mimetypes
67
import os
8+
import platform
79
import shutil
810
import stat
11+
import subprocess
912
import sys
1013
import warnings
1114
from datetime import datetime
@@ -16,7 +19,7 @@
1619
from jupyter_core.paths import exists, is_file_hidden, is_hidden
1720
from send2trash import send2trash
1821
from tornado import web
19-
from traitlets import Bool, TraitError, Unicode, default, validate
22+
from traitlets import Bool, Int, TraitError, Unicode, default, validate
2023

2124
from jupyter_server import _tz as tz
2225
from jupyter_server.base.handlers import AuthenticatedFileHandler
@@ -25,7 +28,7 @@
2528

2629
from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
2730
from .fileio import AsyncFileManagerMixin, FileManagerMixin
28-
from .manager import AsyncContentsManager, ContentsManager
31+
from .manager import AsyncContentsManager, ContentsManager, copy_pat
2932

3033
try:
3134
from os.path import samefile
@@ -41,6 +44,8 @@ class FileContentsManager(FileManagerMixin, ContentsManager):
4144

4245
root_dir = Unicode(config=True)
4346

47+
max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied")
48+
4449
@default("root_dir")
4550
def _default_root_dir(self):
4651
try:
@@ -600,6 +605,126 @@ def get_kernel_path(self, path, model=None):
600605
parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
601606
return parent_dir
602607

608+
def copy(self, from_path, to_path=None):
609+
"""
610+
Copy an existing file or directory and return its new model.
611+
If to_path not specified, it will be the parent directory of from_path.
612+
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
613+
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
614+
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
615+
from_path must be a full path to a file or directory.
616+
"""
617+
to_path_original = str(to_path)
618+
path = from_path.strip("/")
619+
if to_path is not None:
620+
to_path = to_path.strip("/")
621+
622+
if "/" in path:
623+
from_dir, from_name = path.rsplit("/", 1)
624+
else:
625+
from_dir = ""
626+
from_name = path
627+
628+
model = self.get(path)
629+
# limit the size of folders being copied to prevent a timeout error
630+
if model["type"] == "directory":
631+
self.check_folder_size(path)
632+
else:
633+
# let the super class handle copying files
634+
return super().copy(from_path=from_path, to_path=to_path)
635+
636+
is_destination_specified = to_path is not None
637+
to_name = copy_pat.sub(".", from_name)
638+
if not is_destination_specified:
639+
to_path = from_dir
640+
if self.dir_exists(to_path):
641+
name = copy_pat.sub(".", from_name)
642+
to_name = super().increment_filename(name, to_path, insert="-Copy")
643+
to_path = f"{to_path}/{to_name}"
644+
645+
return self._copy_dir(
646+
from_path=from_path,
647+
to_path_original=to_path_original,
648+
to_name=to_name,
649+
to_path=to_path,
650+
)
651+
652+
def _copy_dir(self, from_path, to_path_original, to_name, to_path):
653+
"""
654+
handles copying directories
655+
returns the model for the copied directory
656+
"""
657+
try:
658+
os_from_path = self._get_os_path(from_path.strip("/"))
659+
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
660+
shutil.copytree(os_from_path, os_to_path)
661+
model = self.get(to_path, content=False)
662+
except OSError as err:
663+
self.log.error(f"OSError in _copy_dir: {err}")
664+
raise web.HTTPError(
665+
400,
666+
f"Can't copy '{from_path}' into Folder '{to_path}'",
667+
) from err
668+
669+
return model
670+
671+
def check_folder_size(self, path):
672+
"""
673+
limit the size of folders being copied to be no more than the
674+
trait max_copy_folder_size_mb to prevent a timeout error
675+
"""
676+
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
677+
size = int(self._get_dir_size(self._get_os_path(path)))
678+
# convert from KB to Bytes for macOS
679+
size = size * 1024 if platform.system() == "Darwin" else size
680+
681+
if size > limit_bytes:
682+
raise web.HTTPError(
683+
400,
684+
f"""
685+
Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
686+
"{path}" is {self._human_readable_size(size)}
687+
""",
688+
)
689+
690+
def _get_dir_size(self, path="."):
691+
"""
692+
calls the command line program du to get the directory size
693+
"""
694+
try:
695+
if platform.system() == "Darwin":
696+
# retuns the size of the folder in KB
697+
result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split()
698+
else:
699+
result = subprocess.run(
700+
["du", "-s", "--block-size=1", path], capture_output=True
701+
).stdout.split()
702+
703+
self.log.info(f"current status of du command {result}")
704+
size = result[0].decode("utf-8")
705+
except Exception as err:
706+
self.log.error(f"Error during directory copy: {err}")
707+
raise web.HTTPError(
708+
400,
709+
f"""
710+
Unexpected error during copy operation,
711+
not able to get the size of the {path} directory
712+
""",
713+
) from err
714+
return size
715+
716+
def _human_readable_size(self, size):
717+
"""
718+
returns folder size in a human readable format
719+
"""
720+
if size == 0:
721+
return "0 Bytes"
722+
723+
units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
724+
order = int(math.log2(size) / 10) if size else 0
725+
726+
return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])
727+
603728

604729
class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
605730
"""An async file contents manager."""
@@ -955,3 +1080,125 @@ async def get_kernel_path(self, path, model=None):
9551080
return path
9561081
parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
9571082
return parent_dir
1083+
1084+
async def copy(self, from_path, to_path=None):
1085+
"""
1086+
Copy an existing file or directory and return its new model.
1087+
If to_path not specified, it will be the parent directory of from_path.
1088+
If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
1089+
Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
1090+
For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
1091+
from_path must be a full path to a file or directory.
1092+
"""
1093+
to_path_original = str(to_path)
1094+
path = from_path.strip("/")
1095+
if to_path is not None:
1096+
to_path = to_path.strip("/")
1097+
1098+
if "/" in path:
1099+
from_dir, from_name = path.rsplit("/", 1)
1100+
else:
1101+
from_dir = ""
1102+
from_name = path
1103+
1104+
model = await self.get(path)
1105+
# limit the size of folders being copied to prevent a timeout error
1106+
if model["type"] == "directory":
1107+
await self.check_folder_size(path)
1108+
else:
1109+
# let the super class handle copying files
1110+
return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)
1111+
1112+
is_destination_specified = to_path is not None
1113+
to_name = copy_pat.sub(".", from_name)
1114+
if not is_destination_specified:
1115+
to_path = from_dir
1116+
if await self.dir_exists(to_path):
1117+
name = copy_pat.sub(".", from_name)
1118+
to_name = await super().increment_filename(name, to_path, insert="-Copy")
1119+
to_path = f"{to_path}/{to_name}"
1120+
1121+
return await self._copy_dir(
1122+
from_path=from_path,
1123+
to_path_original=to_path_original,
1124+
to_name=to_name,
1125+
to_path=to_path,
1126+
)
1127+
1128+
async def _copy_dir(
1129+
self, from_path: str, to_path_original: str, to_name: str, to_path: str
1130+
) -> dict:
1131+
"""
1132+
handles copying directories
1133+
returns the model for the copied directory
1134+
"""
1135+
try:
1136+
os_from_path = self._get_os_path(from_path.strip("/"))
1137+
os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
1138+
shutil.copytree(os_from_path, os_to_path)
1139+
model = await self.get(to_path, content=False)
1140+
except OSError as err:
1141+
self.log.error(f"OSError in _copy_dir: {err}")
1142+
raise web.HTTPError(
1143+
400,
1144+
f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
1145+
) from err
1146+
1147+
return model
1148+
1149+
async def check_folder_size(self, path: str) -> None:
1150+
"""
1151+
limit the size of folders being copied to be no more than the
1152+
trait max_copy_folder_size_mb to prevent a timeout error
1153+
"""
1154+
limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
1155+
1156+
size = int(await self._get_dir_size(self._get_os_path(path)))
1157+
# convert from KB to Bytes for macOS
1158+
size = size * 1024 if platform.system() == "Darwin" else size
1159+
if size > limit_bytes:
1160+
raise web.HTTPError(
1161+
400,
1162+
f"""
1163+
Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
1164+
"{path}" is {await self._human_readable_size(size)}
1165+
""",
1166+
)
1167+
1168+
async def _get_dir_size(self, path: str = ".") -> str:
1169+
"""
1170+
calls the command line program du to get the directory size
1171+
"""
1172+
try:
1173+
if platform.system() == "Darwin":
1174+
# retuns the size of the folder in KB
1175+
result = subprocess.run(["du", "-sk", path], capture_output=True).stdout.split()
1176+
else:
1177+
result = subprocess.run(
1178+
["du", "-s", "--block-size=1", path], capture_output=True
1179+
).stdout.split()
1180+
1181+
self.log.info(f"current status of du command {result}")
1182+
size = result[0].decode("utf-8")
1183+
except Exception as err:
1184+
self.log.error(f"Error during directory copy: {err}")
1185+
raise web.HTTPError(
1186+
400,
1187+
f"""
1188+
Unexpected error during copy operation,
1189+
not able to get the size of the {path} directory
1190+
""",
1191+
) from err
1192+
return size
1193+
1194+
async def _human_readable_size(self, size: int) -> str:
1195+
"""
1196+
returns folder size in a human readable format
1197+
"""
1198+
if size == 0:
1199+
return "0 Bytes"
1200+
1201+
units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
1202+
order = int(math.log2(size) / 10) if size else 0
1203+
1204+
return "{:.4g} {}".format(size / (1 << (order * 10)), units[order])

jupyter_server/services/contents/manager.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,18 @@
1515
from nbformat import validate as validate_nb
1616
from nbformat.v4 import new_notebook
1717
from tornado.web import HTTPError, RequestHandler
18-
from traitlets import Any, Bool, Dict, Instance, List, TraitError, Type, Unicode, default, validate
18+
from traitlets import (
19+
Any,
20+
Bool,
21+
Dict,
22+
Instance,
23+
List,
24+
TraitError,
25+
Type,
26+
Unicode,
27+
default,
28+
validate,
29+
)
1930
from traitlets.config.configurable import LoggingConfigurable
2031

2132
from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI

tests/services/contents/test_api.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,27 @@ async def test_copy(jp_fetch, contents, contents_dir, _check_created):
494494
_check_created(r, str(contents_dir), path, copy3, type="notebook")
495495

496496

497+
async def test_copy_dir(jp_fetch, contents, contents_dir, _check_created):
498+
# created a nest copy of a the original folder
499+
dest_dir = "foo"
500+
path = "parent"
501+
response = await jp_fetch(
502+
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
503+
)
504+
505+
_check_created(response, str(contents_dir), path, dest_dir, type="directory")
506+
507+
# copy to a folder where a similar name exists
508+
dest_dir = "foo"
509+
path = "parent"
510+
copy_dir = f"{dest_dir}-Copy1"
511+
response = await jp_fetch(
512+
"api", "contents", path, method="POST", body=json.dumps({"copy_from": dest_dir})
513+
)
514+
515+
_check_created(response, str(contents_dir), path, copy_dir, type="directory")
516+
517+
497518
async def test_copy_path(jp_fetch, contents, contents_dir, _check_created):
498519
path1 = "foo"
499520
path2 = "å b"
@@ -577,18 +598,6 @@ async def test_copy_put_400_hidden(
577598
assert expected_http_error(e, 400)
578599

579600

580-
async def test_copy_dir_400(jp_fetch, contents, contents_dir, _check_created):
581-
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
582-
await jp_fetch(
583-
"api",
584-
"contents",
585-
"foo",
586-
method="POST",
587-
body=json.dumps({"copy_from": "å b"}),
588-
)
589-
assert expected_http_error(e, 400)
590-
591-
592601
@pytest.mark.skipif(sys.platform == "win32", reason="Disabled copying hidden files on Windows")
593602
async def test_copy_400_hidden(
594603
jp_fetch,

0 commit comments

Comments
 (0)