Skip to content

Commit 2bd7c03

Browse files
authored
Make imports of heavy packages lazy (#491)
* Lazy imports to make pandas, pyarrow, polars, and duckdb not import on VegaFusion import * Update package.jsons * Add lazy import test * Revert "Update package.jsons" This reverts commit afd2f71. * Add altair to lazy import checks * return it 🤦
1 parent 6c3a004 commit 2bd7c03

File tree

18 files changed

+188
-351
lines changed

18 files changed

+188
-351
lines changed

.github/workflows/build_test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,9 @@ jobs:
320320
python -m pip install vegafusion-*.whl
321321
python -m pip install vegafusion_python_embed-*manylinux_2_17_x86_64*.whl
322322
python -m pip install pytest vega-datasets polars-lts-cpu duckdb==0.9.2 "vl-convert-python>=1.0.1rc1" scikit-image pandas==2.0
323+
- name: Test lazy imports
324+
working-directory: python/vegafusion/
325+
run: python checks/check_lazy_imports.py
323326
- name: Test vegafusion
324327
working-directory: python/vegafusion/
325328
run: pytest

python/vegafusion/checks/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
## Checks
2+
Python scripts to run on CI that should not be run as part of the pytest test suite.
3+
In particular, to check lazy imports.
4+
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import sys
2+
import vegafusion as vf
3+
import vegafusion_embed
4+
5+
for mod in ["polars", "pandas", "pyarrow", "duckdb", "altair"]:
6+
assert mod not in sys.modules, f"{mod} module should be imported lazily"

python/vegafusion/setup.cfg

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ packages = find:
2727
python_requires = >=3.7
2828
include_package_data = True
2929
install_requires =
30-
altair>=4.2.0
30+
altair>=5.2.0
3131
pyarrow>=5
3232
pandas
3333
psutil
@@ -38,3 +38,10 @@ embed =
3838
vegafusion-python-embed==1.6.8
3939
vl-convert-python>=0.7.0
4040

41+
[options.entry_points]
42+
altair.vegalite.v5.renderer =
43+
vegafusion-mime = vegafusion.renderer:vegafusion_mime_renderer
44+
45+
altair.vegalite.v5.data_transformer =
46+
vegafusion-feather = vegafusion.transformer:feather_transformer
47+
vegafusion-inline = vegafusion.transformer:inline_data_transformer

python/vegafusion/vegafusion/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
from .evaluation import transformed_data
66
from .save import save_html, save_vega, save_png, save_svg
77
from . import renderer
8-
from .compilers import vegalite_compilers
9-
import altair as alt
8+
109

1110
from ._version import __version__
1211
from .utils import RendererTransformerEnabler
@@ -59,6 +58,7 @@ def enable(mimetype="html", row_limit=10000, embed_options=None):
5958
Dictionary of options to pass to the vega-embed. Default
6059
entry is {'mode': 'vega'}.
6160
"""
61+
import altair as alt
6262
embed_options = embed_options if embed_options is not None else {}
6363
return RendererTransformerEnabler(
6464
renderer_ctx=alt.renderers.enable(
@@ -115,6 +115,7 @@ def disable():
115115
116116
This does not affect the behavior of VegaFusionWidget
117117
"""
118+
import altair as alt
118119
return RendererTransformerEnabler(
119120
renderer_ctx=alt.renderers.enable('default'),
120121
data_transformer_ctx=alt.data_transformers.enable('default'),

python/vegafusion/vegafusion/compilers.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

python/vegafusion/vegafusion/connection/__init__.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
from typing import Dict, Optional
2-
3-
import pandas as pd
4-
import pyarrow as pa
5-
1+
from typing import Dict, Optional, TYPE_CHECKING
62
from dataclasses import dataclass
73
from abc import ABC, abstractmethod
84

5+
if TYPE_CHECKING:
6+
from pyarrow import Schema, Table
7+
from pandas import DataFrame
8+
99

1010
@dataclass
1111
class CsvReadOptions:
@@ -15,7 +15,7 @@ class CsvReadOptions:
1515
has_header: bool
1616
delimeter: str
1717
file_extension: str
18-
schema: Optional[pa.Schema]
18+
schema: Optional["Schema"]
1919

2020

2121
class RegistrationNotSupportedError(RuntimeError):
@@ -46,7 +46,7 @@ def dialect(cls) -> str:
4646
raise NotImplementedError()
4747

4848
@abstractmethod
49-
def tables(self) -> Dict[str, pa.Schema]:
49+
def tables(self) -> Dict[str, "Schema"]:
5050
"""
5151
Returns the names and schema for the tables that are provided by the connection.
5252
These are the tables that may be referenced by SQL queries passed to the
@@ -57,7 +57,7 @@ def tables(self) -> Dict[str, pa.Schema]:
5757
raise NotImplementedError()
5858

5959
@abstractmethod
60-
def fetch_query(self, query: str, schema: pa.Schema) -> pa.Table:
60+
def fetch_query(self, query: str, schema: "Schema") -> "Table":
6161
"""
6262
Returns the result of evaluating the requested query. The resulting pa.Table
6363
should have a schema matching the provided schema
@@ -77,7 +77,7 @@ def fallback(self) -> bool:
7777
"""
7878
return True
7979

80-
def register_pandas(self, name: str, df: pd.DataFrame, temporary: bool = False):
80+
def register_pandas(self, name: str, df: "DataFrame", temporary: bool = False):
8181
"""
8282
Register the provided pandas DataFrame as a table with the provided name
8383
@@ -88,7 +88,7 @@ def register_pandas(self, name: str, df: pd.DataFrame, temporary: bool = False):
8888
"""
8989
raise RegistrationNotSupportedError("Connection does not support registration of pandas datasets")
9090

91-
def register_arrow(self, name: str, table: pa.Table, temporary: bool = False):
91+
def register_arrow(self, name: str, table: "Table", temporary: bool = False):
9292
"""
9393
Register the provided pyarrow Table as a table with the provided name
9494
:param name: Table name

python/vegafusion/vegafusion/dataset/dataframe.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from abc import ABC, abstractmethod
2-
import pyarrow as pa
3-
from vegafusion.proto.datafusion_pb2 import LogicalExprNode
2+
from typing import Optional, List, Literal, Union, Any, TYPE_CHECKING
43

5-
from typing import Optional, List, Literal, Union, Any
4+
if TYPE_CHECKING:
5+
from pyarrow import Schema, Table
6+
from vegafusion.proto.datafusion_pb2 import LogicalExprNode
67

78

89
class DataFrameOperationNotSupportedError(RuntimeError):
@@ -19,12 +20,12 @@ class DataFrameDataset(ABC):
1920
"""
2021

2122
@abstractmethod
22-
def schema(self) -> pa.Schema:
23+
def schema(self) -> "Schema":
2324
"""DataFrame's pyarrow schema"""
2425
raise NotImplementedError()
2526

2627
@abstractmethod
27-
def collect(self) -> pa.Table:
28+
def collect(self) -> "Table":
2829
"""Return DataFrame's value as a pyarrow Table"""
2930
raise NotImplementedError()
3031

@@ -47,7 +48,7 @@ def main_thread(self) -> bool:
4748
return True
4849

4950
def sort(
50-
self, exprs: List[LogicalExprNode], limit: Optional[int]
51+
self, exprs: List["LogicalExprNode"], limit: Optional[int]
5152
) -> "DataFrameDataset":
5253
"""
5354
Sort and optionally limit dataset
@@ -58,7 +59,7 @@ def sort(
5859
"""
5960
raise DataFrameOperationNotSupportedError()
6061

61-
def select(self, exprs: List[LogicalExprNode]) -> "DataFrameDataset":
62+
def select(self, exprs: List["LogicalExprNode"]) -> "DataFrameDataset":
6263
"""
6364
Select columns from Dataset. Selection expressions may include column names,
6465
column expressions, or window expressions
@@ -69,7 +70,7 @@ def select(self, exprs: List[LogicalExprNode]) -> "DataFrameDataset":
6970
raise DataFrameOperationNotSupportedError()
7071

7172
def aggregate(
72-
self, group_exprs: List[LogicalExprNode], agg_exprs: List[LogicalExprNode]
73+
self, group_exprs: List["LogicalExprNode"], agg_exprs: List["LogicalExprNode"]
7374
) -> "DataFrameDataset":
7475
"""
7576
Perform dataset aggregation. Resulting dataset includes grouping
@@ -82,7 +83,7 @@ def aggregate(
8283
raise DataFrameOperationNotSupportedError()
8384

8485
def joinaggregate(
85-
self, group_exprs: List[LogicalExprNode], agg_exprs: List[LogicalExprNode]
86+
self, group_exprs: List["LogicalExprNode"], agg_exprs: List["LogicalExprNode"]
8687
) -> "DataFrameDataset":
8788
"""
8889
Perform joinaggregate dataset operation.
@@ -95,7 +96,7 @@ def joinaggregate(
9596
"""
9697
raise DataFrameOperationNotSupportedError()
9798

98-
def filter(self, predicate: LogicalExprNode) -> "DataFrameDataset":
99+
def filter(self, predicate: "LogicalExprNode") -> "DataFrameDataset":
99100
"""
100101
Filter dataset by predicate expression
101102
@@ -135,7 +136,7 @@ def fold(
135136
def stack(
136137
self,
137138
field: str,
138-
orderby: List[LogicalExprNode],
139+
orderby: List["LogicalExprNode"],
139140
groupby: List[str],
140141
start_field: str,
141142
stop_field: str,

python/vegafusion/vegafusion/dataset/dfi.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import pyarrow as pa
21
from functools import cached_property
3-
from typing import Sequence, Optional
2+
from typing import Sequence, Optional, TYPE_CHECKING
43
from .sql import SqlDataset
54

5+
if TYPE_CHECKING:
6+
from pyarrow import Table, Schema
67

78
class SqlDatasetDataFrame:
89
"""An implementation of the dataframe interchange protocol.
@@ -22,7 +23,7 @@ def __init__(
2223
dataset: SqlDataset,
2324
nan_as_null: bool = False,
2425
allow_copy: bool = True,
25-
pyarrow_table: Optional[pa.Table] = None,
26+
pyarrow_table: Optional["Table"] = None,
2627
):
2728
self._dataset = dataset
2829
self._nan_as_null = nan_as_null
@@ -55,7 +56,7 @@ def _empty_pyarrow_df(self):
5556
return schema.empty_table().__dataframe__()
5657

5758
@property
58-
def _schema(self) -> pa.Schema:
59+
def _schema(self) -> "Schema":
5960
return self._dataset.table_schema()
6061

6162
def _get_dtype(self, name):

python/vegafusion/vegafusion/dataset/sql.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from abc import ABC, abstractmethod
2-
import pyarrow as pa
32
from typing import TYPE_CHECKING
43

54
if TYPE_CHECKING:
65
from .dfi import SqlDatasetDataFrame
6+
import pyarrow as pa
77

88

99
class SqlDataset(ABC):
@@ -35,12 +35,12 @@ def table_name(self) -> str:
3535
raise NotImplementedError()
3636

3737
@abstractmethod
38-
def table_schema(self) -> pa.Schema:
38+
def table_schema(self) -> "pa.Schema":
3939
"""Schema of source table"""
4040
raise NotImplementedError()
4141

4242
@abstractmethod
43-
def fetch_query(self, query: str, schema: pa.Schema) -> pa.Table:
43+
def fetch_query(self, query: str, schema: "pa.Schema") -> "pa.Table":
4444
"""
4545
Returns the result of evaluating the requested query. The resulting pa.Table
4646
should have a schema matching the provided schema

0 commit comments

Comments
 (0)