Skip to content

Commit 2c51c5c

Browse files
perf: implement lazy imports for heavy dependencies
Defer loading of heavy dependencies (networkx, matplotlib, click, pymysql) until their associated features are accessed: - dj.Diagram, dj.Di, dj.ERD -> loads diagram.py (networkx, matplotlib) - dj.kill -> loads admin.py (pymysql via connection) - dj.cli -> loads cli.py (click) This reduces `import datajoint` time significantly, especially on macOS where import overhead is higher. Core functionality (Schema, Table, Connection, etc.) remains immediately available. Closes #1220 Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent c1b36f0 commit 2c51c5c

File tree

2 files changed

+137
-5
lines changed

2 files changed

+137
-5
lines changed

src/datajoint/__init__.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,18 @@
6060
"ValidationResult",
6161
]
6262

63+
# =============================================================================
64+
# Eager imports — core functionality needed immediately
65+
# =============================================================================
6366
from . import errors
6467
from . import migrate
65-
from .admin import kill
6668
from .codecs import (
6769
Codec,
6870
get_codec,
6971
list_codecs,
7072
)
7173
from .blob import MatCell, MatStruct
72-
from .cli import cli
7374
from .connection import Connection, conn
74-
from .diagram import Diagram
7575
from .errors import DataJointError
7676
from .expression import AndList, Not, Top, U
7777
from .hash import key_hash
@@ -83,5 +83,32 @@
8383
from .user_tables import Computed, Imported, Lookup, Manual, Part
8484
from .version import __version__
8585

86-
ERD = Di = Diagram # Aliases for Diagram
87-
schema = Schema # Aliases for Schema
86+
schema = Schema # Alias for Schema
87+
88+
# =============================================================================
89+
# Lazy imports — heavy dependencies loaded on first access
90+
# =============================================================================
91+
# These modules import heavy dependencies (networkx, matplotlib, click, pymysql)
92+
# that slow down `import datajoint`. They are loaded on demand.
93+
94+
_lazy_modules = {
95+
# Diagram imports networkx and matplotlib
96+
"Diagram": (".diagram", "Diagram"),
97+
"Di": (".diagram", "Diagram"),
98+
"ERD": (".diagram", "Diagram"),
99+
# kill imports pymysql via connection
100+
"kill": (".admin", "kill"),
101+
# cli imports click
102+
"cli": (".cli", "cli"),
103+
}
104+
105+
106+
def __getattr__(name: str):
107+
"""Lazy import for heavy dependencies."""
108+
if name in _lazy_modules:
109+
module_path, attr_name = _lazy_modules[name]
110+
import importlib
111+
112+
module = importlib.import_module(module_path, __package__)
113+
return getattr(module, attr_name)
114+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

tests/unit/test_lazy_imports.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""
2+
Tests for lazy import behavior.
3+
4+
These tests verify that heavy dependencies (networkx, matplotlib, click)
5+
are not loaded until their associated features are accessed.
6+
"""
7+
8+
import sys
9+
10+
11+
def test_lazy_diagram_import():
12+
"""Diagram module should not be loaded until dj.Diagram is accessed."""
13+
# Remove datajoint from sys.modules to get fresh import
14+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
15+
for mod in modules_to_remove:
16+
del sys.modules[mod]
17+
18+
# Import datajoint
19+
import datajoint as dj
20+
21+
# Diagram module should not be loaded yet
22+
assert "datajoint.diagram" not in sys.modules, "diagram module loaded eagerly"
23+
24+
# Access Diagram - should trigger lazy load
25+
Diagram = dj.Diagram
26+
assert "datajoint.diagram" in sys.modules, "diagram module not loaded after access"
27+
assert Diagram.__name__ == "Diagram"
28+
29+
30+
def test_lazy_admin_import():
31+
"""Admin module should not be loaded until dj.kill is accessed."""
32+
# Remove datajoint from sys.modules to get fresh import
33+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
34+
for mod in modules_to_remove:
35+
del sys.modules[mod]
36+
37+
# Import datajoint
38+
import datajoint as dj
39+
40+
# Admin module should not be loaded yet
41+
assert "datajoint.admin" not in sys.modules, "admin module loaded eagerly"
42+
43+
# Access kill - should trigger lazy load
44+
kill = dj.kill
45+
assert "datajoint.admin" in sys.modules, "admin module not loaded after access"
46+
assert callable(kill)
47+
48+
49+
def test_lazy_cli_import():
50+
"""CLI module should not be loaded until dj.cli is accessed."""
51+
# Remove datajoint from sys.modules to get fresh import
52+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
53+
for mod in modules_to_remove:
54+
del sys.modules[mod]
55+
56+
# Import datajoint
57+
import datajoint as dj
58+
59+
# CLI module should not be loaded yet
60+
assert "datajoint.cli" not in sys.modules, "cli module loaded eagerly"
61+
62+
# Access cli - should trigger lazy load
63+
_ = dj.cli
64+
assert "datajoint.cli" in sys.modules, "cli module not loaded after access"
65+
66+
67+
def test_diagram_aliases():
68+
"""Di and ERD should be aliases for Diagram."""
69+
# Remove datajoint from sys.modules to get fresh import
70+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
71+
for mod in modules_to_remove:
72+
del sys.modules[mod]
73+
74+
import datajoint as dj
75+
76+
# All aliases should resolve to the same class
77+
assert dj.Diagram is dj.Di
78+
assert dj.Diagram is dj.ERD
79+
80+
81+
def test_core_imports_available():
82+
"""Core functionality should be available immediately after import."""
83+
# Remove datajoint from sys.modules to get fresh import
84+
modules_to_remove = [key for key in sys.modules if key.startswith("datajoint")]
85+
for mod in modules_to_remove:
86+
del sys.modules[mod]
87+
88+
import datajoint as dj
89+
90+
# Core classes should be available without triggering lazy loads
91+
assert hasattr(dj, "Schema")
92+
assert hasattr(dj, "Table")
93+
assert hasattr(dj, "Manual")
94+
assert hasattr(dj, "Lookup")
95+
assert hasattr(dj, "Computed")
96+
assert hasattr(dj, "Imported")
97+
assert hasattr(dj, "Part")
98+
assert hasattr(dj, "Connection")
99+
assert hasattr(dj, "config")
100+
assert hasattr(dj, "errors")
101+
102+
# Heavy modules should still not be loaded
103+
assert "datajoint.diagram" not in sys.modules
104+
assert "datajoint.admin" not in sys.modules
105+
assert "datajoint.cli" not in sys.modules

0 commit comments

Comments
 (0)