Skip to content

Commit acecc9c

Browse files
authored
Optimize import speed for Mars package (#3022)
1 parent 3adefad commit acecc9c

File tree

85 files changed

+601
-439
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+601
-439
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright 1999-2022 Alibaba Group Holding Ltd.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import subprocess
16+
import sys
17+
18+
# make sure necessary pyc files generated
19+
import mars.dataframe as md
20+
import mars.tensor as mt
21+
22+
del md, mt
23+
24+
25+
class ImportPackageSuite:
26+
"""
27+
Benchmark that times performance of chunk graph builder
28+
"""
29+
30+
def time_import_mars(self):
31+
proc = subprocess.Popen([sys.executable, "-c", "import mars"])
32+
proc.wait(120)
33+
34+
def time_import_mars_tensor(self):
35+
proc = subprocess.Popen([sys.executable, "-c", "import mars.tensor"])
36+
proc.wait(120)
37+
38+
def time_import_mars_dataframe(self):
39+
proc = subprocess.Popen([sys.executable, "-c", "import mars.dataframe"])
40+
proc.wait(120)

benchmarks/asv_bench/benchmarks/serialize.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646
from mars.utils import tokenize
4747

4848

49+
# do warmup
50+
serialize(None)
51+
52+
4953
class SerializableChild(Serializable):
5054
str_field = StringField("str_field")
5155
int_field = Int64Field("int_field")

benchmarks/asv_bench/benchmarks/tokenize.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ class MockOperand(Operand, TensorOperandMixin):
4747

4848
class TokenizeOperandSuite:
4949
def setup(self):
50+
# do some warm up
51+
tokenize(None)
52+
5053
chunks = []
5154
for idx in range(1000):
5255
op = MockOperand(

mars/_utils.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ cdef class TypeDispatcher:
1717
cdef dict _handlers
1818
cdef dict _lazy_handlers
1919
cdef dict _inherit_handlers
20+
cpdef object __weakref__
2021

2122
cpdef void register(self, object type_, object handler)
2223
cpdef void unregister(self, object type_)

mars/_utils.pyx

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import uuid
2222
from datetime import date, datetime, timedelta, tzinfo
2323
from enum import Enum
2424
from functools import lru_cache, partial
25+
from weakref import WeakSet
2526

2627
import numpy as np
2728
import pandas as pd
@@ -31,17 +32,13 @@ try:
3132
from pandas.tseries.offsets import Tick as PDTick
3233
except ImportError:
3334
PDTick = None
34-
try:
35-
from sqlalchemy.sql import Selectable as SASelectable
36-
from sqlalchemy.sql.sqltypes import TypeEngine as SATypeEngine
37-
except ImportError:
38-
SASelectable, SATypeEngine = None, None
3935

4036
from .lib.mmh3 import hash as mmh_hash, hash_bytes as mmh_hash_bytes, \
4137
hash_from_buffer as mmh3_hash_from_buffer
4238

4339
cdef bint _has_cupy = bool(pkgutil.find_loader('cupy'))
4440
cdef bint _has_cudf = bool(pkgutil.find_loader('cudf'))
41+
cdef bint _has_sqlalchemy = bool(pkgutil.find_loader('sqlalchemy'))
4542

4643

4744
cpdef str to_str(s, encoding='utf-8'):
@@ -83,29 +80,41 @@ cpdef unicode to_text(s, encoding='utf-8'):
8380
raise TypeError(f"Could not convert from {s} to unicode.")
8481

8582

83+
_type_dispatchers = WeakSet()
84+
85+
8686
cdef class TypeDispatcher:
8787
def __init__(self):
8888
self._handlers = dict()
8989
self._lazy_handlers = dict()
9090
# store inherited handlers to facilitate unregistering
9191
self._inherit_handlers = dict()
9292

93+
_type_dispatchers.add(self)
94+
9395
cpdef void register(self, object type_, object handler):
9496
if isinstance(type_, str):
9597
self._lazy_handlers[type_] = handler
98+
elif isinstance(type_, tuple):
99+
for t in type_:
100+
self.register(t, handler)
96101
else:
97102
self._handlers[type_] = handler
98103

99104
cpdef void unregister(self, object type_):
100-
self._lazy_handlers.pop(type_, None)
101-
self._handlers.pop(type_, None)
102-
self._inherit_handlers.clear()
105+
if isinstance(type_, tuple):
106+
for t in type_:
107+
self.unregister(t)
108+
else:
109+
self._lazy_handlers.pop(type_, None)
110+
self._handlers.pop(type_, None)
111+
self._inherit_handlers.clear()
103112

104113
cdef _reload_lazy_handlers(self):
105114
for k, v in self._lazy_handlers.items():
106115
mod_name, obj_name = k.rsplit('.', 1)
107116
mod = importlib.import_module(mod_name, __name__)
108-
self._handlers[getattr(mod, obj_name)] = v
117+
self.register(getattr(mod, obj_name), v)
109118
self._lazy_handlers = dict()
110119

111120
cpdef get_handler(self, object type_):
@@ -134,6 +143,11 @@ cdef class TypeDispatcher:
134143
def __call__(self, object obj, *args, **kwargs):
135144
return self.get_handler(type(obj))(obj, *args, **kwargs)
136145

146+
@staticmethod
147+
def reload_all_lazy_handlers():
148+
for dispatcher in _type_dispatchers:
149+
(<TypeDispatcher>dispatcher)._reload_lazy_handlers()
150+
137151

138152
cdef inline build_canonical_bytes(tuple args, kwargs):
139153
if kwargs:
@@ -376,10 +390,13 @@ if _has_cudf:
376390

377391
if PDTick is not None:
378392
tokenize_handler.register(PDTick, tokenize_pandas_tick)
379-
if SATypeEngine is not None:
380-
tokenize_handler.register(SATypeEngine, tokenize_sqlalchemy_data_type)
381-
if SASelectable is not None:
382-
tokenize_handler.register(SASelectable, tokenize_sqlalchemy_selectable)
393+
if _has_sqlalchemy:
394+
tokenize_handler.register(
395+
"sqlalchemy.sql.sqltypes.TypeEngine", tokenize_sqlalchemy_data_type
396+
)
397+
tokenize_handler.register(
398+
"sqlalchemy.sql.Selectable", tokenize_sqlalchemy_selectable
399+
)
383400

384401
cpdef register_tokenizer(cls, handler):
385402
tokenize_handler.register(cls, handler)

mars/contrib/dask/tests/test_dask.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
from ....utils import lazy_import
1818
from .. import convert_dask_collection, mars_scheduler
1919

20-
dask_installed = lazy_import("dask", globals=globals()) is not None
21-
mimesis_installed = lazy_import("mimesis", globals=globals()) is not None
20+
dask_installed = lazy_import("dask") is not None
21+
mimesis_installed = lazy_import("mimesis") is not None
2222

2323

2424
@pytest.mark.skipif(not dask_installed, reason="dask not installed")

mars/core/entrypoints.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
import warnings
1717
import functools
1818

19-
from pkg_resources import iter_entry_points
20-
2119
logger = logging.getLogger(__name__)
2220

2321

@@ -28,6 +26,8 @@ def init_extension_entrypoints():
2826
"""Execute all `mars_extensions` entry points with the name `init`
2927
If extensions have already been initialized, this function does nothing.
3028
"""
29+
from pkg_resources import iter_entry_points
30+
3131
for entry_point in iter_entry_points("mars_extensions", "init"):
3232
logger.info("Loading extension: %s", entry_point)
3333
try:

mars/dataframe/base/_duplicate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from ..initializer import DataFrame as asdataframe
2727
from ..operands import DataFrameOperandMixin, DataFrameShuffleProxy
2828

29-
cudf = lazy_import("cudf", globals=globals())
29+
cudf = lazy_import("cudf")
3030

3131

3232
class DuplicateOperand(MapReduceOperand, DataFrameOperandMixin):

mars/dataframe/base/drop_duplicates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
)
2929
from ._duplicate import DuplicateOperand, validate_subset
3030

31-
cudf = lazy_import("cudf", globals=globals())
31+
cudf = lazy_import("cudf")
3232

3333

3434
class DataFrameDropDuplicates(DuplicateOperand):

mars/dataframe/base/memory_usage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from ..core import IndexValue
2828
from ..utils import parse_index
2929

30-
cudf = lazy_import("cudf", globals=globals())
30+
cudf = lazy_import("cudf")
3131

3232

3333
class DataFrameMemoryUsage(DataFrameOperand, DataFrameOperandMixin):

0 commit comments

Comments
 (0)