Skip to content

Commit bfb994f

Browse files
committed
add limit to number of files listed in backend contents manager
1 parent e87ba88 commit bfb994f

File tree

2 files changed

+23
-3
lines changed

2 files changed

+23
-3
lines changed

jupyter_drives/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
from sys import platform
33
import entrypoints
4-
from traitlets import Enum, Unicode, default
4+
from traitlets import Enum, Unicode, default, Int
55
from traitlets.config import Configurable
66

77
# Supported third-party services
@@ -59,7 +59,7 @@ class DrivesConfig(Configurable):
5959
help="Custom path of file where credentials are located. Extension automatically checks jupyter_notebook_config.py or directly in ~/.aws/credentials for AWS CLI users."
6060
)
6161

62-
max_files_shown = Unicode(
62+
max_files_shown = Int(
6363
None,
6464
config = True,
6565
allow_none = True,

jupyter_drives/manager.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,23 +193,43 @@ async def get_contents(self, drive_name, path):
193193
isDir = False
194194
emptyDir = True # assume we are dealing with an empty directory
195195

196+
chunk_size = 100
197+
if self._config.max_files_shown < chunk_size:
198+
chunk_size = self._config.max_files_shown
199+
no_batches = int(self._config.max_files_shown/chunk_size)
200+
196201
# using Arrow lists as they are recommended for large results
197202
# stream will be an async iterable of RecordBatch
198-
stream = obs.list(self._content_managers[drive_name]["store"], path, chunk_size=100, return_arrow=True)
203+
current_batch = 0
204+
stream = obs.list(self._content_managers[drive_name]["store"], path, chunk_size=chunk_size, return_arrow=True)
199205
async for batch in stream:
206+
current_batch += 1
207+
# reached last batch that can be shown (partially)
208+
if current_batch == no_batches + 1:
209+
remaining_files = self._config.max_files_shown - no_batches*chunk_size
210+
200211
# if content exists we are dealing with a directory
201212
if isDir is False and batch:
202213
isDir = True
203214
emptyDir = False
204215

205216
contents_list = pyarrow.record_batch(batch).to_pylist()
206217
for object in contents_list:
218+
# when listing the last batch (partially), make sure we don't exceed limit
219+
if current_batch == no_batches + 1:
220+
if remaining_files <= 0:
221+
break
222+
remaining_files -= 1
207223
data.append({
208224
"path": object["path"],
209225
"last_modified": object["last_modified"].isoformat(),
210226
"size": object["size"],
211227
})
212228

229+
# check if we reached the limit of files that can be listed
230+
if current_batch == no_batches + 1:
231+
break
232+
213233
# check if we are dealing with an empty drive
214234
if isDir is False and path != '':
215235
content = b""

0 commit comments

Comments
 (0)