Skip to content

Commit 299ac0d

Browse files
perf: implement lazy imports for heavy dependencies (#1321)
* perf: implement lazy imports for heavy dependencies Defer loading of heavy dependencies (networkx, matplotlib, click, pymysql) until their associated features are accessed: - dj.Diagram, dj.Di, dj.ERD -> loads diagram.py (networkx, matplotlib) - dj.kill -> loads admin.py (pymysql via connection) - dj.cli -> loads cli.py (click) This reduces `import datajoint` time significantly, especially on macOS where import overhead is higher. Core functionality (Schema, Table, Connection, etc.) remains immediately available. Closes #1220 Co-Authored-By: Claude Opus 4.5 <[email protected]> * fix: cache lazy imports correctly and expose diagram module - Cache lazy imports in globals() to override the submodule that importlib automatically sets on the parent module - Add dj.diagram to lazy modules (returns module for diagram_active access) - Add tests for cli callable and diagram module access Co-Authored-By: Claude Opus 4.5 <[email protected]> --------- Co-authored-by: Claude Opus 4.5 <[email protected]>
1 parent 1270d90 commit 299ac0d

File tree

2 files changed

+159
-5
lines changed

2 files changed

+159
-5
lines changed

src/datajoint/__init__.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,18 @@
6060
"ValidationResult",
6161
]
6262

63+
# =============================================================================
64+
# Eager imports — core functionality needed immediately
65+
# =============================================================================
6366
from . import errors
6467
from . import migrate
65-
from .admin import kill
6668
from .codecs import (
6769
Codec,
6870
get_codec,
6971
list_codecs,
7072
)
7173
from .blob import MatCell, MatStruct
72-
from .cli import cli
7374
from .connection import Connection, conn
74-
from .diagram import Diagram
7575
from .errors import DataJointError
7676
from .expression import AndList, Not, Top, U
7777
from .hash import key_hash
@@ -83,5 +83,38 @@
8383
from .user_tables import Computed, Imported, Lookup, Manual, Part
8484
from .version import __version__
8585

86-
ERD = Di = Diagram # Aliases for Diagram
87-
schema = Schema # Aliases for Schema
86+
schema = Schema # Alias for Schema
87+
88+
# =============================================================================
89+
# Lazy imports — heavy dependencies loaded on first access
90+
# =============================================================================
91+
# These modules import heavy dependencies (networkx, matplotlib, click, pymysql)
92+
# that slow down `import datajoint`. They are loaded on demand.
93+
94+
_lazy_modules = {
95+
# Diagram imports networkx and matplotlib
96+
"Diagram": (".diagram", "Diagram"),
97+
"Di": (".diagram", "Diagram"),
98+
"ERD": (".diagram", "Diagram"),
99+
"diagram": (".diagram", None), # Return the module itself
100+
# kill imports pymysql via connection
101+
"kill": (".admin", "kill"),
102+
# cli imports click
103+
"cli": (".cli", "cli"),
104+
}
105+
106+
107+
def __getattr__(name: str):
108+
"""Lazy import for heavy dependencies."""
109+
if name in _lazy_modules:
110+
module_path, attr_name = _lazy_modules[name]
111+
import importlib
112+
113+
module = importlib.import_module(module_path, __package__)
114+
# If attr_name is None, return the module itself
115+
attr = module if attr_name is None else getattr(module, attr_name)
116+
# Cache in module __dict__ to avoid repeated __getattr__ calls
117+
# and to override the submodule that importlib adds automatically
118+
globals()[name] = attr
119+
return attr
120+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

tests/unit/test_lazy_imports.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""
2+
Tests for lazy import behavior.
3+
4+
These tests verify that heavy dependencies (networkx, matplotlib, click)
5+
are not loaded until their associated features are accessed.
6+
"""
7+
8+
import sys
9+
10+
11+
def test_lazy_diagram_import():
12+
"""Diagram module should not be loaded until dj.Diagram is accessed."""
13+
# Remove datajoint from sys.modules to get fresh import
14+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
15+
for mod in modules_to_remove:
16+
del sys.modules[mod]
17+
18+
# Import datajoint
19+
import datajoint as dj
20+
21+
# Diagram module should not be loaded yet
22+
assert "datajoint.diagram" not in sys.modules, "diagram module loaded eagerly"
23+
24+
# Access Diagram - should trigger lazy load
25+
Diagram = dj.Diagram
26+
assert "datajoint.diagram" in sys.modules, "diagram module not loaded after access"
27+
assert Diagram.__name__ == "Diagram"
28+
29+
30+
def test_lazy_admin_import():
31+
"""Admin module should not be loaded until dj.kill is accessed."""
32+
# Remove datajoint from sys.modules to get fresh import
33+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
34+
for mod in modules_to_remove:
35+
del sys.modules[mod]
36+
37+
# Import datajoint
38+
import datajoint as dj
39+
40+
# Admin module should not be loaded yet
41+
assert "datajoint.admin" not in sys.modules, "admin module loaded eagerly"
42+
43+
# Access kill - should trigger lazy load
44+
kill = dj.kill
45+
assert "datajoint.admin" in sys.modules, "admin module not loaded after access"
46+
assert callable(kill)
47+
48+
49+
def test_lazy_cli_import():
50+
"""CLI module should not be loaded until dj.cli is accessed."""
51+
# Remove datajoint from sys.modules to get fresh import
52+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
53+
for mod in modules_to_remove:
54+
del sys.modules[mod]
55+
56+
# Import datajoint
57+
import datajoint as dj
58+
59+
# CLI module should not be loaded yet
60+
assert "datajoint.cli" not in sys.modules, "cli module loaded eagerly"
61+
62+
# Access cli - should trigger lazy load and return the function
63+
cli_func = dj.cli
64+
assert "datajoint.cli" in sys.modules, "cli module not loaded after access"
65+
assert callable(cli_func), "dj.cli should be callable (the cli function)"
66+
67+
68+
def test_diagram_module_access():
69+
"""dj.diagram should return the diagram module for accessing module-level attrs."""
70+
# Remove datajoint from sys.modules to get fresh import
71+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
72+
for mod in modules_to_remove:
73+
del sys.modules[mod]
74+
75+
import datajoint as dj
76+
77+
# Access dj.diagram should return the module
78+
diagram_module = dj.diagram
79+
assert hasattr(diagram_module, "diagram_active"), "diagram module should have diagram_active"
80+
assert hasattr(diagram_module, "Diagram"), "diagram module should have Diagram class"
81+
82+
83+
def test_diagram_aliases():
84+
"""Di and ERD should be aliases for Diagram."""
85+
# Remove datajoint from sys.modules to get fresh import
86+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
87+
for mod in modules_to_remove:
88+
del sys.modules[mod]
89+
90+
import datajoint as dj
91+
92+
# All aliases should resolve to the same class
93+
assert dj.Diagram is dj.Di
94+
assert dj.Diagram is dj.ERD
95+
96+
97+
def test_core_imports_available():
98+
"""Core functionality should be available immediately after import."""
99+
# Remove datajoint from sys.modules to get fresh import
100+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
101+
for mod in modules_to_remove:
102+
del sys.modules[mod]
103+
104+
import datajoint as dj
105+
106+
# Core classes should be available without triggering lazy loads
107+
assert hasattr(dj, "Schema")
108+
assert hasattr(dj, "Table")
109+
assert hasattr(dj, "Manual")
110+
assert hasattr(dj, "Lookup")
111+
assert hasattr(dj, "Computed")
112+
assert hasattr(dj, "Imported")
113+
assert hasattr(dj, "Part")
114+
assert hasattr(dj, "Connection")
115+
assert hasattr(dj, "config")
116+
assert hasattr(dj, "errors")
117+
118+
# Heavy modules should still not be loaded
119+
assert "datajoint.diagram" not in sys.modules
120+
assert "datajoint.admin" not in sys.modules
121+
assert "datajoint.cli" not in sys.modules

0 commit comments

Comments
 (0)