litestar-org
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎sqlspec/storage/backends/base.py‎
Lines changed: 63 additions & 1 deletion b/‎sqlspec/storage/backends/base.py‎
Lines changed: 63 additions & 1 deletion
diff --git a/‎sqlspec/storage/backends/fsspec.py‎
Lines changed: 42 additions & 8 deletions b/‎sqlspec/storage/backends/fsspec.py‎
Lines changed: 42 additions & 8 deletions
diff --git a/‎sqlspec/storage/backends/local.py‎
Lines changed: 31 additions & 8 deletions b/‎sqlspec/storage/backends/local.py‎
Lines changed: 31 additions & 8 deletions
diff --git a/‎sqlspec/storage/backends/obstore.py‎
Lines changed: 54 additions & 29 deletions b/‎sqlspec/storage/backends/obstore.py‎
Lines changed: 54 additions & 29 deletions
@@ -24,7 +24,7 @@ maintainers = [{ name = "Litestar Developers", email = "[email protected]" }]
 name = "sqlspec"
 readme = "README.md"
 requires-python = ">=3.10, <4.0"
-version = "0.38.1"
+version = "0.38.2"
 
 [project.urls]
 Discord = "https://discord.gg/litestar"
@@ -240,7 +240,7 @@ opt_level = "3"   # Maximum optimization (0-3)
 allow_dirty = true
 commit = false
 commit_args = "--no-verify"
-current_version = "0.38.1"
+current_version = "0.38.2"
 ignore_missing_files = false
 ignore_missing_version = false
 message = "chore(release): bump to v{new_version}"
 
@@ -8,7 +8,7 @@
 
 from sqlspec.typing import ArrowRecordBatch, ArrowTable
 
-__all__ = ("AsyncArrowBatchIterator", "AsyncBytesIterator", "ObjectStoreBase")
+__all__ = ("AsyncArrowBatchIterator", "AsyncBytesIterator", "AsyncChunkedBytesIterator", "ObjectStoreBase")
 
 
 class AsyncArrowBatchIterator:
@@ -58,6 +58,9 @@ class AsyncBytesIterator:
 
     The class wraps a synchronous iterator and exposes it as an async iterator,
     enabling usage with `async for` syntax.
+
+    Note: This class blocks the event loop during I/O. For non-blocking streaming,
+    use AsyncChunkedBytesIterator with pre-loaded data instead.
     """
 
     __slots__ = ("_sync_iter",)
@@ -89,6 +92,65 @@ async def __anext__(self) -> bytes:
             raise StopAsyncIteration from None
 
 
+class AsyncChunkedBytesIterator:
+    """Async iterator that yields pre-loaded bytes data in chunks.
+
+    This class implements the async iterator protocol without using async generators,
+    allowing it to be compiled by mypyc (which doesn't support async generators).
+
+    Unlike AsyncBytesIterator, this class works with pre-loaded data and yields
+    control to the event loop between chunks via asyncio.sleep(0), ensuring
+    the event loop is not blocked during iteration.
+
+    Usage pattern:
+        # Load data in thread pool to avoid blocking
+        data = await asyncio.to_thread(read_bytes, path)
+        # Stream chunks without blocking event loop
+        return AsyncChunkedBytesIterator(data, chunk_size=65536)
+    """
+
+    __slots__ = ("_chunk_size", "_data", "_offset")
+
+    def __init__(self, data: bytes, chunk_size: int = 65536) -> None:
+        """Initialize the chunked bytes iterator.
+
+        Args:
+            data: The bytes data to iterate over in chunks.
+            chunk_size: Size of each chunk to yield (default: 65536 bytes).
+        """
+        self._data = data
+        self._chunk_size = chunk_size
+        self._offset = 0
+
+    def __aiter__(self) -> "AsyncChunkedBytesIterator":
+        """Return self as the async iterator."""
+        return self
+
+    async def __anext__(self) -> bytes:
+        """Get the next chunk of bytes asynchronously.
+
+        Yields control to the event loop via asyncio.sleep(0) before returning
+        each chunk, ensuring other tasks can run during iteration.
+
+        Returns:
+            The next chunk of bytes.
+
+        Raises:
+            StopAsyncIteration: When all data has been yielded.
+        """
+        import asyncio
+
+        if self._offset >= len(self._data):
+            raise StopAsyncIteration
+
+        # Yield to event loop to allow other tasks to run
+        await asyncio.sleep(0)
+
+        chunk = self._data[self._offset : self._offset + self._chunk_size]
+        self._offset += self._chunk_size
+        return chunk
+
+
 @mypyc_attr(allow_interpreted_subclasses=True)
 class ObjectStoreBase(ABC):
     """Base class for storage backends."""
 
@@ -77,11 +77,18 @@ def __init__(self, uri: str, **kwargs: Any) -> None:
             uri: Filesystem URI (protocol://path).
             **kwargs: Additional fsspec configuration options, including an optional base_path.
 
-        For cloud URIs such as S3/GS/Azure, we derive a default base_path from the bucket/path when no explicit base_path is provided.
+        For cloud URIs (S3/GS/Azure) and file:// URIs, we derive a default base_path from the
+        URI path when no explicit base_path is provided. When both URI and base_path are provided,
+        they are combined (base_path is appended to URI-derived path).
+
+        Examples:
+            - FSSpecBackend("s3://bucket/prefix") -> base_path = "bucket/prefix"
+            - FSSpecBackend("file:///home/user/storage") -> base_path = "/home/user/storage"
+            - FSSpecBackend("file:///home/user", base_path="subdir") -> base_path = "/home/user/subdir"
         """
         ensure_fsspec()
 
-        base_path = kwargs.pop("base_path", "")
+        explicit_base_path = kwargs.pop("base_path", "")
 
         if "://" in uri:
             self.protocol = uri.split("://", maxsplit=1)[0]
@@ -93,13 +100,24 @@ def __init__(self, uri: str, **kwargs: Any) -> None:
                     uri_base_path = parsed.netloc
                     if parsed.path and parsed.path != "/":
                         uri_base_path = f"{uri_base_path}{parsed.path}"
-                    if not base_path:
-                        base_path = uri_base_path
+                    # Combine URI path with explicit base_path if both provided
+                    if explicit_base_path:
+                        uri_base_path = f"{uri_base_path.rstrip('/')}/{explicit_base_path.lstrip('/')}"
+                    explicit_base_path = uri_base_path
+            elif self.protocol == "file":
+                parsed = urlparse(uri)
+                if parsed.path and parsed.path != "/":
+                    # For file protocol, keep the path as-is (preserve leading slash for absolute paths)
+                    uri_base_path = parsed.path
+                    # Combine URI path with explicit base_path if both provided
+                    if explicit_base_path:
+                        uri_base_path = f"{uri_base_path.rstrip('/')}/{explicit_base_path.lstrip('/')}"
+                    explicit_base_path = uri_base_path
         else:
             self.protocol = uri
             self._fs_uri = f"{uri}://"
 
-        self.base_path = base_path.rstrip("/") if base_path else ""
+        self.base_path = explicit_base_path.rstrip("/") if explicit_base_path else ""
 
         import fsspec
 
@@ -453,10 +471,26 @@ async def write_bytes_async(self, path: "str | Path", data: bytes, **kwargs: Any
     async def stream_read_async(
         self, path: "str | Path", chunk_size: "int | None" = None, **kwargs: Any
     ) -> AsyncIterator[bytes]:
-        """Stream bytes from storage asynchronously."""
-        from sqlspec.storage.backends.base import AsyncBytesIterator
+        """Stream bytes from storage asynchronously.
+
+        Uses asyncio.to_thread() to run blocking I/O in a thread pool,
+        ensuring the event loop is not blocked during read operations.
+
+        Args:
+            path: Path to the file to read.
+            chunk_size: Size of chunks to yield (default: 65536 bytes).
+            **kwargs: Additional arguments passed to read_bytes.
+
+        Returns:
+            AsyncIterator yielding chunks of bytes.
+        """
+        import asyncio
+
+        from sqlspec.storage.backends.base import AsyncChunkedBytesIterator
 
-        return AsyncBytesIterator(self.stream_read(path, chunk_size, **kwargs))
+        # Pass original path - read_bytes handles path resolution
+        data = await asyncio.to_thread(self.read_bytes, path, **kwargs)
+        return AsyncChunkedBytesIterator(data, chunk_size or 65536)
 
     def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator["ArrowRecordBatch"]:
         """Stream Arrow record batches from storage asynchronously.
 
@@ -72,10 +72,14 @@ def __init__(self, uri: str = "", **kwargs: Any) -> None:
 
         Args:
             uri: File URI or path (e.g., "file:///path" or "/path")
-            **kwargs: Additional options (base_path for relative operations)
-
-        The URI may be a file:// path (Windows style like file:///C:/path is supported),
-        and an explicit base_path override will take precedence before we ensure the directory exists.
+            **kwargs: Additional options including:
+                - base_path: Subdirectory relative to URI path. If relative, it's combined
+                  with the URI path. If absolute, it takes precedence (backward compatible).
+
+        The URI may be a file:// path (Windows style like file:///C:/path is supported).
+        When both URI and base_path are provided, they are combined:
+        - file:///home/user/storage + base_path="subdir" -> /home/user/storage/subdir
+        - file:///home/user/storage + base_path="/other" -> /other (absolute takes precedence)
         """
         if uri.startswith("file://"):
             parsed = urlparse(uri)
@@ -89,7 +93,9 @@ def __init__(self, uri: str = "", **kwargs: Any) -> None:
             self.base_path = Path.cwd()
 
         if "base_path" in kwargs:
-            self.base_path = Path(kwargs["base_path"]).resolve()
+            # Combine URI path with base_path (Path division handles absolute paths correctly)
+            # If base_path is absolute, it takes precedence (backward compatible)
+            self.base_path = (self.base_path / kwargs["base_path"]).resolve()
 
         if not self.base_path.exists():
             self.base_path.mkdir(parents=True, exist_ok=True)
@@ -377,10 +383,27 @@ async def write_text_async(self, path: "str | Path", data: str, encoding: str =
     async def stream_read_async(
         self, path: "str | Path", chunk_size: "int | None" = None, **kwargs: Any
     ) -> AsyncIterator[bytes]:
-        """Stream bytes from file asynchronously."""
-        from sqlspec.storage.backends.base import AsyncBytesIterator
+        """Stream bytes from file asynchronously.
+
+        Uses asyncio.to_thread() to run blocking file I/O in a thread pool,
+        ensuring the event loop is not blocked during read operations.
 
-        return AsyncBytesIterator(self.stream_read(path, chunk_size, **kwargs))
+        Args:
+            path: Path to the file to read.
+            chunk_size: Size of chunks to yield (default: 65536 bytes).
+            **kwargs: Additional arguments (unused).
+
+        Returns:
+            AsyncIterator yielding chunks of bytes.
+        """
+        import asyncio
+
+        from sqlspec.storage.backends.base import AsyncChunkedBytesIterator
+
+        resolved = self._resolve_path(path)
+        # Run blocking I/O in thread pool to avoid blocking event loop
+        data = await asyncio.to_thread(resolved.read_bytes)
+        return AsyncChunkedBytesIterator(data, chunk_size or 65536)
 
     async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> "list[str]":
         """List objects asynchronously."""
 
@@ -90,8 +90,19 @@ def __init__(self, uri: str, **kwargs: Any) -> None:
         """Initialize obstore backend.
 
         Args:
-            uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
-            **kwargs: Additional options including base_path and obstore configuration
+            uri: Storage URI. Supported formats:
+                - file:///absolute/path - Local filesystem
+                - s3://bucket/prefix - AWS S3
+                - gs://bucket/prefix - Google Cloud Storage
+                - az://container/prefix - Azure Blob Storage
+                - memory:// - In-memory storage (for testing)
+            **kwargs: Additional options:
+                - base_path (str): For local files (file://), this is combined with
+                  the URI path to form the storage root. For example:
+                  uri="file:///data" + base_path="uploads" → /data/uploads
+                  If base_path is absolute, it overrides the URI path (backward compat).
+                  For cloud storage, base_path is used as an object key prefix.
+                - Other obstore configuration options (timeouts, credentials, etc.)
 
         """
         ensure_obstore()
@@ -123,7 +134,9 @@ def __init__(self, uri: str, **kwargs: Any) -> None:
                 if path_obj.is_file():
                     path_str = str(path_obj.parent)
 
-                local_store_root = self.base_path or path_str
+                # Combine URI path with base_path for correct storage location
+                # If base_path is absolute, Path division will use it directly (backward compat)
+                local_store_root = str(Path(path_str) / self.base_path) if self.base_path else path_str
 
                 self._is_local_store = True
                 self._local_store_root = local_store_root
@@ -228,11 +241,14 @@ def write_text(self, path: "str | Path", data: str, encoding: str = "utf-8", **k
 
     def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> "list[str]":  # pyright: ignore[reportUnusedParameter]
         """List objects using obstore."""
-        resolved_prefix = (
-            resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
-            if prefix
-            else self.base_path or ""
-        )
+        # For LocalStore, the base_path is already included in the store root,
+        # so we use empty prefix when none is given. For cloud stores, use base_path.
+        if prefix:
+            resolved_prefix = resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
+        elif self._is_local_store:
+            resolved_prefix = ""
+        else:
+            resolved_prefix = self.base_path or ""
         items = self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
         paths = sorted(item["path"] for batch in items for item in batch)
         _log_storage_event(
@@ -629,37 +645,46 @@ async def write_bytes_async(self, path: "str | Path", data: bytes, **kwargs: Any
     async def stream_read_async(
         self, path: "str | Path", chunk_size: "int | None" = None, **kwargs: Any
     ) -> AsyncIterator[bytes]:
-        """Stream bytes from storage asynchronously."""
+        """Stream bytes from storage asynchronously.
+
+        Uses asyncio.to_thread() to ensure the event loop is not blocked
+        during I/O operations with cloud storage backends. This prevents
+        heartbeat timeouts and allows concurrent async tasks to execute
+        during large file downloads.
+        """
+        import asyncio
+
+        from sqlspec.storage.backends.base import AsyncChunkedBytesIterator
+
         if self._is_local_store:
             resolved_path = self._resolve_path_for_local_store(path)
         else:
             resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
 
-        result = await self.store.get_async(resolved_path)
-        stream = result.stream()
+        # Run blocking I/O in thread pool to avoid blocking event loop
+        data = await asyncio.to_thread(self.read_bytes, resolved_path)
 
-        async def _generator() -> AsyncIterator[bytes]:
-            async for chunk in stream:
-                yield bytes(chunk)
-
-            _log_storage_event(
-                "storage.read",
-                backend_type=self.backend_type,
-                protocol=self.protocol,
-                operation="stream_read",
-                mode="async",
-                path=resolved_path,
-            )
+        _log_storage_event(
+            "storage.read",
+            backend_type=self.backend_type,
+            protocol=self.protocol,
+            operation="stream_read",
+            mode="async",
+            path=resolved_path,
+        )
 
-        return _generator()
+        return AsyncChunkedBytesIterator(data, chunk_size or 65536)
 
     async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> "list[str]":  # pyright: ignore[reportUnusedParameter]
         """List objects in storage asynchronously."""
-        resolved_prefix = (
-            resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
-            if prefix
-            else self.base_path or ""
-        )
+        # For LocalStore, the base_path is already included in the store root,
+        # so we use empty prefix when none is given. For cloud stores, use base_path.
+        if prefix:
+            resolved_prefix = resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
+        elif self._is_local_store:
+            resolved_prefix = ""
+        else:
+            resolved_prefix = self.base_path or ""
 
         objects: list[str] = []
         async for batch in self.store.list_async(resolved_prefix):  # pyright: ignore[reportAttributeAccessIssue]