1010
1111import azure .storage .blob as asb
1212import fsspec
13+ from aiopath import AsyncPath
1314from azure .core .exceptions import HttpResponseError
1415from azure .storage .blob import BlobBlock
1516from azure .storage .blob .aio import BlobClient
1819
1920if TYPE_CHECKING :
2021 import httpx
22+ from azure .storage .blob .aio import StorageStreamDownloader
2123 from azure .storage .queue import QueueMessage
2224
2325HTTPX_METHODS : TypeAlias = Literal ["GET" , "POST" ]
24- AIO_SERVE = QSC .from_connection_string (conn_str = os .environ ["AzureWebJobsStorage" ]) # noqa: SIM112
26+ AIO_SERVE = QSC .from_connection_string (conn_str = os .environ ["AzureWebJobsStorage" ])
2527
2628
2729async def peek_messages (queue : str , max_messages : int | None = None , ** kwargs ):
@@ -149,7 +151,7 @@ async def clear_messages(
149151
150152
151153class async_abfs :
152- def __init__ (self , connection_string = os .environ ["Synblob" ]): # noqa: SIM112
154+ def __init__ (self , connection_string = os .environ ["Synblob" ]):
153155 self .connection_string = connection_string
154156 self .sync = fsspec .filesystem ("abfss" , connection_string = self .connection_string )
155157 key_conv = {"AccountName" : "account_name" , "AccountKey" : "account_key" }
@@ -189,13 +191,12 @@ async def stream_dl(
189191 ** httpx_extras ,
190192 ) -> None :
191193 """
192- Help on method stream_dl .
194+ stream_dl will stream the contents of a url to a path in the cloud given an httpx Client .
193195
194- async stream_dl(client, method, url, **httpx_extras)
196+ async stream_dl(client, method, url, path, recurs, **httpx_extras)
195197 Download file streaming in chunks in async as downloader and to a Blob
196198
197- Parameters
198- ----------
199+ Args:
199200 client: httpx.AsyncClient
200201 The httpx Async Client object to use
201202 method:
@@ -204,7 +205,9 @@ async def stream_dl(
204205 The URL to download
205206 path:
206207 The full path to Azure file being saved
207- **httpx_extras
208+ recurs:
209+ To try again recursively
210+ httpx_extras
208211 Any extra arguments to be sent to client.stream
209212 """
210213 async with (
@@ -216,7 +219,7 @@ async def stream_dl(
216219 resp .raise_for_status ()
217220 block_list = []
218221 async for chunk in resp .aiter_bytes ():
219- chunk = cast (IO , chunk )
222+ chunk = cast ("IO" , chunk )
220223 block_id = uuid4 ().hex
221224 try :
222225 await target .stage_block (block_id = block_id , data = chunk )
@@ -244,60 +247,63 @@ async def stream_dl(
244247
245248 async def stream_up (
246249 self ,
247- local_path : str | Path ,
250+ local_path : str | Path | AsyncPath ,
248251 remote_path : str ,
249252 size : int = 16384 ,
250253 / ,
251254 recurs = False ,
252255 ) -> None :
253256 """
254- Help on method stream_dl .
257+ Help on method stream_up .
255258
256- async stream_dl(client, method, url, **httpx_extras )
259+ async stream_up(local_path, remote_path, size, recurs )
257260 Download file streaming in chunks in async as downloader and to a Blob
258261
259- Parameters
260- ----------
262+ Args:
261263 local_path:
262264 The full path to local path as str or Path
263265 remote_path:
264266 The full path to remote path as str
265267 size:
266268 The number of bytes read per iteration in read
269+ recurs:
270+ To try again recursively
267271 """
268- if isinstance (local_path , str ):
269- local_path = Path (local_path )
270- async with BlobClient .from_connection_string (
271- self .connection_string , * (remote_path .split ("/" , maxsplit = 1 ))
272- ) as target :
273- with local_path .open ("rb" ) as src :
274- block_list = []
275- while True :
276- chunk = src .read (size )
277- chunk = cast (IO , chunk )
278- if not chunk :
279- break
280- block_id = uuid4 ().hex
281- try :
282- await target .stage_block (block_id = block_id , data = chunk )
283- except HttpResponseError as err :
284- if "The specified blob or block content is invalid." not in str (
285- err
286- ):
287- raise
288- await asyncio .sleep (1 )
289- await target .commit_block_list ([])
290- await target .delete_blob ()
291- if recurs is False :
292- await self .stream_up (
293- local_path ,
294- remote_path ,
295- recurs = True ,
296- )
297- else :
298- raise
299- block_list .append (BlobBlock (block_id = block_id ))
300- await target .commit_block_list (block_list )
272+ if isinstance (local_path , (str , Path )):
273+ local_path = AsyncPath (local_path )
274+ async with (
275+ BlobClient .from_connection_string (
276+ self .connection_string , * (remote_path .split ("/" , maxsplit = 1 ))
277+ ) as target ,
278+ local_path .open ("rb" ) as src ,
279+ ):
280+ block_list = []
281+ while True :
282+ chunk = await src .read (size )
283+ chunk = cast ("IO" , chunk )
284+ if not chunk :
285+ break
286+ block_id = uuid4 ().hex
287+ try :
288+ await target .stage_block (block_id = block_id , data = chunk )
289+ except HttpResponseError as err :
290+ if "The specified blob or block content is invalid." not in str (
291+ err
292+ ):
293+ raise
294+ await asyncio .sleep (1 )
295+ await target .commit_block_list ([])
296+ await target .delete_blob ()
297+ if recurs is False :
298+ await self .stream_up (
299+ local_path ,
300+ remote_path ,
301+ recurs = True ,
302+ )
303+ else :
304+ raise
305+ block_list .append (BlobBlock (block_id = block_id ))
306+ await target .commit_block_list (block_list )
301307
302308 async def walk (self , path : str , maxdepth = None , ** kwargs ):
303309 """
@@ -312,16 +318,16 @@ async def walk(self, path: str, maxdepth=None, **kwargs):
312318 Note that the "files" outputted will include anything that is not
313319 a directory, such as links.
314320
315- Parameters
316- ----------
321+ Args:
317322 path: str
318323 Root to recurse into
319324
320325 maxdepth: int
321326 Maximum recursion depth. None means limitless, but not recommended
322327 on link-based file-systems.
323328
324- **kwargs are passed to ``ls``
329+ kwargs:
330+ dict of args passed to ``ls``
325331 """
326332 this_fs = fsspec .filesystem (
327333 "abfss" , connection_string = self .connection_string , asyncronous = True
@@ -359,8 +365,7 @@ async def details(
359365 AzureBlobFileSystem instance
360366 Return a list of dictionaries of specifying details about the contents
361367
362- Parameters
363- ----------
368+ Args:
364369 contents
365370
366371 delimiter: str
@@ -442,8 +447,7 @@ async def ls(
442447 versions: bool = False, **kwargs) method of adlfs.spec.AzureBlobFileSystem instance
443448 Create a list of blob names from a blob container
444449
445- Parameters
446- ----------
450+ Args:
447451 path: str
448452 Path to an Azure Blob with its container name
449453
@@ -486,8 +490,7 @@ async def rm(
486490 """
487491 Delete files.
488492
489- Parameters
490- ----------
493+ Args:
491494 path: str or list of str
492495 File(s) to delete.
493496 recursive: bool
@@ -538,3 +541,10 @@ def make_sas_link(self, filepath, expiry=None, write=False):
538541 expiry = expiry ,
539542 )
540543 return f"https://{ account_dict ['AccountName' ]} .blob.core.windows.net/{ filepath } ?{ sas } "
544+
545+ async def stream (self , path : str ) -> StorageStreamDownloader [bytes ]:
546+ blob = BlobClient .from_connection_string (
547+ self .connection_string , * (path .split ("/" , maxsplit = 1 ))
548+ )
549+ stream = await blob .download_blob ()
550+ return stream
0 commit comments