add limit to number of files listed in backend contents manager

DenisaCG · DenisaCG · commit bfb994f3f46f · 2024-12-06T15:03:31.000+01:00
diff --git a/jupyter_drives/base.py b/jupyter_drives/base.py
@@ -1,7 +1,7 @@
 import os
 from sys import platform
 import entrypoints
-from traitlets import Enum, Unicode, default
+from traitlets import Enum, Unicode, default, Int
 from traitlets.config import Configurable
 
 # Supported third-party services
@@ -59,7 +59,7 @@ class DrivesConfig(Configurable):
         help="Custom path of file where credentials are located. Extension automatically checks jupyter_notebook_config.py or directly in ~/.aws/credentials for AWS CLI users."
     )
 
-    max_files_shown = Unicode(
+    max_files_shown = Int(
         None,
         config = True,
         allow_none = True,
diff --git a/jupyter_drives/manager.py b/jupyter_drives/manager.py
@@ -193,23 +193,43 @@ async def get_contents(self, drive_name, path):
             isDir = False
             emptyDir = True # assume we are dealing with an empty directory
 
+            chunk_size = 100
+            if self._config.max_files_shown < chunk_size:
+                chunk_size = self._config.max_files_shown
+            no_batches = int(self._config.max_files_shown/chunk_size)
+
             # using Arrow lists as they are recommended for large results
             # stream will be an async iterable of RecordBatch
-            stream = obs.list(self._content_managers[drive_name]["store"], path, chunk_size=100, return_arrow=True)
+            current_batch = 0
+            stream = obs.list(self._content_managers[drive_name]["store"], path, chunk_size=chunk_size, return_arrow=True)
             async for batch in stream:
+                current_batch += 1
+                # reached last batch that can be shown (partially)
+                if current_batch == no_batches + 1:
+                    remaining_files = self._config.max_files_shown - no_batches*chunk_size
+                    
                 # if content exists we are dealing with a directory
                 if isDir is False and batch: 
                     isDir = True
                     emptyDir = False
                     
                 contents_list = pyarrow.record_batch(batch).to_pylist()
                 for object in contents_list:
+                    # when listing the last batch (partially), make sure we don't exceed limit
+                    if current_batch == no_batches + 1:
+                        if remaining_files <= 0:
+                            break
+                        remaining_files -= 1
                     data.append({
                         "path": object["path"],
                         "last_modified": object["last_modified"].isoformat(),
                         "size": object["size"],
                     })
                 
+                # check if we reached the limit of files that can be listed
+                if current_batch == no_batches + 1:
+                    break
+                
             # check if we are dealing with an empty drive
             if isDir is False and path != '':
                 content = b""