Skip to content

Commit 794476d

Browse files
author
eh-steve
committed
Lazy load cloud provider modules to improve import time
Implements lazy loading via __getattr__ for all cloud provider classes. This defers loading heavy cloud SDKs (google-cloud-storage, boto3, azure-storage-blob) until they are actually accessed, significantly reducing import time for applications that only use a subset of providers. Key changes: - cloudpathlib/__init__.py: Use __getattr__ to lazy-load all path/client classes - cloudpathlib/s3/__init__.py: Use __getattr__ to lazy-load S3Client, S3Path - cloudpathlib/gs/__init__.py: Use __getattr__ to lazy-load GSClient, GSPath - cloudpathlib/azure/__init__.py: Use __getattr__ to lazy-load AzureBlobClient, AzureBlobPath - cloudpathlib/cloudpath.py: Change absolute import to relative; move anypath import to function-local - tests/test_import_time.py: Add tests verifying lazy loading behavior TYPE_CHECKING blocks preserve static type hints for IDE support.
1 parent 5124aa0 commit 794476d

File tree

7 files changed

+403
-23
lines changed

7 files changed

+403
-23
lines changed

HISTORY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## UNRELEASED
44
- Added support for Pydantic serialization (Issue [#537](https://github.com/drivendataorg/cloudpathlib/issues/537), PR [#538](https://github.com/drivendataorg/cloudpathlib/pull/538))
5+
- Improved import time by lazy-loading cloud provider modules (Issue [#544](https://github.com/drivendataorg/cloudpathlib/issues/544), PR [#TBD](https://github.com/drivendataorg/cloudpathlib/pull/TBD))
56

67
## v0.23.0 (2025-10-07)
78

cloudpathlib/__init__.py

Lines changed: 80 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,30 @@
11
import os
22
import sys
3+
from typing import TYPE_CHECKING
34

4-
from .anypath import AnyPath
5-
from .azure.azblobclient import AzureBlobClient
6-
from .azure.azblobpath import AzureBlobPath
7-
from .cloudpath import CloudPath, implementation_registry
8-
from .patches import patch_open, patch_os_functions, patch_glob, patch_all_builtins
9-
from .gs.gsclient import GSClient
10-
from .gs.gspath import GSPath
11-
from .http.httpclient import HttpClient, HttpsClient
12-
from .http.httppath import HttpPath, HttpsPath
13-
from .s3.s3client import S3Client
14-
from .s3.s3path import S3Path
5+
# Lazy imports for cloud providers to avoid loading heavy SDKs at import time
6+
# Google Cloud SDK alone adds ~200ms to import time
7+
8+
if TYPE_CHECKING:
9+
from .anypath import AnyPath as AnyPath
10+
from .azure.azblobclient import AzureBlobClient as AzureBlobClient
11+
from .azure.azblobpath import AzureBlobPath as AzureBlobPath
12+
from .cloudpath import (
13+
CloudPath as CloudPath,
14+
implementation_registry as implementation_registry,
15+
)
16+
from .patches import (
17+
patch_open as patch_open,
18+
patch_os_functions as patch_os_functions,
19+
patch_glob as patch_glob,
20+
patch_all_builtins as patch_all_builtins,
21+
)
22+
from .gs.gsclient import GSClient as GSClient
23+
from .gs.gspath import GSPath as GSPath
24+
from .http.httpclient import HttpClient as HttpClient, HttpsClient as HttpsClient
25+
from .http.httppath import HttpPath as HttpPath, HttpsPath as HttpsPath
26+
from .s3.s3client import S3Client as S3Client
27+
from .s3.s3path import S3Path as S3Path
1528

1629
if sys.version_info[:2] >= (3, 8):
1730
import importlib.metadata as importlib_metadata
@@ -43,14 +56,66 @@
4356
]
4457

4558

59+
# Lazy loading implementation
60+
_LAZY_IMPORTS = {
61+
# Core
62+
"AnyPath": ".anypath",
63+
"CloudPath": ".cloudpath",
64+
"implementation_registry": ".cloudpath",
65+
# Patches
66+
"patch_open": ".patches",
67+
"patch_os_functions": ".patches",
68+
"patch_glob": ".patches",
69+
"patch_all_builtins": ".patches",
70+
# S3
71+
"S3Client": ".s3.s3client",
72+
"S3Path": ".s3.s3path",
73+
# GCS
74+
"GSClient": ".gs.gsclient",
75+
"GSPath": ".gs.gspath",
76+
# Azure
77+
"AzureBlobClient": ".azure.azblobclient",
78+
"AzureBlobPath": ".azure.azblobpath",
79+
# HTTP
80+
"HttpClient": ".http.httpclient",
81+
"HttpsClient": ".http.httpclient",
82+
"HttpPath": ".http.httppath",
83+
"HttpsPath": ".http.httppath",
84+
}
85+
86+
87+
def __getattr__(name: str):
88+
if name in _LAZY_IMPORTS:
89+
import importlib
90+
91+
module_path = _LAZY_IMPORTS[name]
92+
module = importlib.import_module(module_path, __name__)
93+
return getattr(module, name)
94+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
95+
96+
97+
def __dir__():
98+
return __all__
99+
100+
101+
# Handle environment-variable-based patching
102+
# These need to be checked at import time, so we do them lazily only if env vars are set
46103
if bool(os.environ.get("CLOUDPATHLIB_PATCH_OPEN", "")):
47-
patch_open()
104+
from .patches import patch_open as _patch_open
105+
106+
_patch_open()
48107

49108
if bool(os.environ.get("CLOUDPATHLIB_PATCH_OS", "")):
50-
patch_os_functions()
109+
from .patches import patch_os_functions as _patch_os_functions
110+
111+
_patch_os_functions()
51112

52113
if bool(os.environ.get("CLOUDPATHLIB_PATCH_GLOB", "")):
53-
patch_glob()
114+
from .patches import patch_glob as _patch_glob
115+
116+
_patch_glob()
54117

55118
if bool(os.environ.get("CLOUDPATHLIB_PATCH_ALL", "")):
56-
patch_all_builtins()
119+
from .patches import patch_all_builtins as _patch_all_builtins
120+
121+
_patch_all_builtins()

cloudpathlib/azure/__init__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,22 @@
1-
from .azblobclient import AzureBlobClient
2-
from .azblobpath import AzureBlobPath
1+
from typing import TYPE_CHECKING
2+
3+
if TYPE_CHECKING:
4+
from .azblobclient import AzureBlobClient as AzureBlobClient
5+
from .azblobpath import AzureBlobPath as AzureBlobPath
36

47
__all__ = [
58
"AzureBlobClient",
69
"AzureBlobPath",
710
]
11+
12+
13+
def __getattr__(name: str):
14+
if name == "AzureBlobClient":
15+
from .azblobclient import AzureBlobClient
16+
17+
return AzureBlobClient
18+
if name == "AzureBlobPath":
19+
from .azblobpath import AzureBlobPath
20+
21+
return AzureBlobPath
22+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

cloudpathlib/cloudpath.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,8 @@ def _make_selector(pattern_parts, _flavour, case_sensitive=True): # noqa: F811
8080
from .legacy.glob import _make_selector # noqa: F811
8181

8282

83-
from cloudpathlib.enums import FileCacheMode
83+
from .enums import FileCacheMode
8484

85-
from . import anypath
8685
from .exceptions import (
8786
ClientMismatchError,
8887
CloudPathFileExistsError,
@@ -1143,6 +1142,8 @@ def _copy(
11431142
force_overwrite_to_cloud: Optional[bool] = None,
11441143
remove_src: bool = False,
11451144
) -> Union[Path, Self]:
1145+
from . import anypath
1146+
11461147
if not self.exists():
11471148
raise ValueError(f"Path {self} must exist to copy.")
11481149

@@ -1275,6 +1276,8 @@ def copy_into(
12751276
force_overwrite_to_cloud: Optional[bool] = None,
12761277
) -> Union[Path, Self]:
12771278
"""Copy self into target directory, preserving the filename."""
1279+
from . import anypath
1280+
12781281
target_path = anypath.to_anypath(target_dir) / self.name
12791282

12801283
result = self._copy(
@@ -1312,6 +1315,8 @@ def copytree(
13121315

13131316
def copytree(self, destination, force_overwrite_to_cloud=None, ignore=None):
13141317
"""Copy self to a directory, if self is a directory."""
1318+
from . import anypath
1319+
13151320
if not self.is_dir():
13161321
raise CloudPathNotADirectoryError(
13171322
f"Origin path {self} must be a directory. To copy a single file use the method copy."
@@ -1427,6 +1432,8 @@ def move_into(
14271432
force_overwrite_to_cloud: Optional[bool] = None,
14281433
) -> Union[Path, Self]:
14291434
"""Move self into target directory, preserving the filename and removing the source."""
1435+
from . import anypath
1436+
14301437
target_path = anypath.to_anypath(target_dir) / self.name
14311438

14321439
result = self._copy(

cloudpathlib/gs/__init__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,22 @@
1-
from .gsclient import GSClient
2-
from .gspath import GSPath
1+
from typing import TYPE_CHECKING
2+
3+
if TYPE_CHECKING:
4+
from .gsclient import GSClient as GSClient
5+
from .gspath import GSPath as GSPath
36

47
__all__ = [
58
"GSClient",
69
"GSPath",
710
]
11+
12+
13+
def __getattr__(name: str):
14+
if name == "GSClient":
15+
from .gsclient import GSClient
16+
17+
return GSClient
18+
if name == "GSPath":
19+
from .gspath import GSPath
20+
21+
return GSPath
22+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

cloudpathlib/s3/__init__.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,22 @@
1-
from .s3client import S3Client
2-
from .s3path import S3Path
1+
from typing import TYPE_CHECKING
2+
3+
if TYPE_CHECKING:
4+
from .s3client import S3Client as S3Client
5+
from .s3path import S3Path as S3Path
36

47
__all__ = [
58
"S3Client",
69
"S3Path",
710
]
11+
12+
13+
def __getattr__(name: str):
14+
if name == "S3Client":
15+
from .s3client import S3Client
16+
17+
return S3Client
18+
if name == "S3Path":
19+
from .s3path import S3Path
20+
21+
return S3Path
22+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

0 commit comments

Comments
 (0)