add getContents endpoint

DenisaCG · DenisaCG · commit 20722796256b · 2024-11-19T18:05:30.000+01:00
diff --git a/jupyter_drives/handlers.py b/jupyter_drives/handlers.py
@@ -69,17 +69,17 @@ def initialize(self, logger: logging.Logger, manager: JupyterDrivesManager):
         return super().initialize(logger, manager)
     
     @tornado.web.authenticated
-    async def get(self, path: str = "", drive: str = ""):
+    async def get(self, drive: str = "", path: str = ""):
         result = await self._manager.get_contents(drive, path)
         self.finish(result)
 
     @tornado.web.authenticated
-    async def post(self, path: str = "", drive: str = ""):
+    async def post(self, drive: str = "", path: str = ""):
         result = await self._manager.new_file(drive, path)
         self.finish(result)
 
     @tornado.web.authenticated
-    async def patch(self, path: str = "", drive: str = ""):
+    async def patch(self, drive: str = "", path: str = ""):
         body = self.get_json_body()
         result = await self._manager.rename_file(drive, path, **body)
         self.finish(result)
diff --git a/jupyter_drives/manager.py b/jupyter_drives/manager.py
@@ -3,6 +3,7 @@
 import logging
 from typing import Dict, List, Optional, Tuple, Union, Any
 
+import os
 import tornado
 import httpx
 import traitlets
@@ -11,6 +12,7 @@
 import obstore as obs
 from libcloud.storage.types import Provider
 from libcloud.storage.providers import get_driver
+import pyarrow
 
 from .log import get_logger
 from .base import DrivesConfig
@@ -153,14 +155,63 @@ async def unmount_drive(self, drive_name: str):
         
         return
     
-    async def get_contents(self, drive_name, path, **kwargs):
+    async def get_contents(self, drive_name, path):
         """Get contents of a file or directory.
 
         Args:
             drive_name: name of drive to get the contents of
-            path: path to file or directory
+            path: path to file or directory (empty string for root listing)
         """
-        print('Get contents function called.')
+        print('!!!!!!!!!!!!!!!!!!!', drive_name, 'path: ', path)
+        if path == '/':
+            path = ''
+        drive_name = 'jupyter-drives-test-bucket-1'
+        try :
+            currentObject = os.path.basename(path) if os.path.basename(path) is not None else ''
+            print('currentObject: ', currentObject)
+            # check if we are listing contents of a directory
+            if currentObject.find('.') == -1:
+                print('in if')
+                print('store: ', self._content_managers)
+                data = []
+                # using Arrow lists as they are recommended for large results
+                # sream will be an async iterable of RecordBatch
+                stream = obs.list(self._content_managers[drive_name], path, chunk_size=100, return_arrow=True)
+                async for batch in stream:
+                    contents_list = pyarrow.record_batch(batch).to_pylist()
+                    for object in contents_list:
+                        data.append({
+                            "path": object["path"],
+                            "last_modified": object["last_modified"].isoformat(),
+                            "size": object["size"],
+                        })
+            else:
+                content = b""
+                # retrieve contents of object
+                obj = await obs.get_async(self._content_managers[drive_name], path)
+                stream = obj.stream(min_chunk_size=5 * 1024 * 1024) # 5MB sized chunks
+                async for buf in stream: 
+                    content += buf
+                
+                # retrieve metadata of object
+                metadata = await obs.head_async(self._content_managers[drive_name], path)
+                data = {
+                    "path": path, 
+                    "content": content,
+                    "last_modified": metadata["last_modified"].isoformat(),
+                    "size": metadata["size"]
+                }
+            print(data)
+            response = {
+                "data": data
+            }
+        except Exception as e:
+            raise tornado.web.HTTPError(
+            status_code= httpx.codes.BAD_REQUEST,
+            reason=f"The following error occured when retrieving the contents: {e}",
+            )
+        
+        return response
     
     async def new_file(self, drive_name, path, **kwargs):
         """Create a new file or directory at the given path.
diff --git a/src/contents.ts b/src/contents.ts
@@ -5,7 +5,7 @@ import { Signal, ISignal } from '@lumino/signaling';
 import { Contents, ServerConnection } from '@jupyterlab/services';
 import { PathExt } from '@jupyterlab/coreutils';
 import { IDriveInfo } from './token';
-import { mountDrive } from './requests';
+import { getContents, mountDrive } from './requests';
 
 let data: Contents.IModel = {
   name: '',
@@ -206,6 +206,9 @@ export class Drive implements Contents.IDrive {
         }
       }
 
+      const resp = await getContents(currentDrive.name, { path: '' });
+      console.log('resp: ', resp);
+
       data = {
         name: PathExt.basename(localPath),
         path: PathExt.basename(localPath),
diff --git a/src/requests.ts b/src/requests.ts
@@ -1,4 +1,5 @@
 import { ReadonlyJSONObject } from '@lumino/coreutils';
+
 import { requestAPI } from './handler';
 
 /**
@@ -24,3 +25,13 @@ export async function mountDrive(
   };
   return await requestAPI<any>('drives', 'POST', body);
 }
+
+export async function getContents(
+  driveName: string,
+  options: { path: string }
+) {
+  return await requestAPI<any>(
+    'drives/' + driveName + '/' + options.path,
+    'GET'
+  );
+}