Skip to content

Commit b05ef81

Browse files
authored
Check initialization of serializables on CI (#3007)
1 parent 03ed810 commit b05ef81

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+328
-1015
lines changed

.github/workflows/benchmark-ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ jobs:
4848
- name: Run ASV benchmarks
4949
run: |
5050
source ./ci/reload-env.sh
51+
unset CI
5152
cd benchmarks/asv_bench
5253
asv check -E existing
5354
git remote add upstream https://github.com/mars-project/mars.git

azure-pipelines.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ jobs:
8787
- bash: |
8888
set -e
8989
source ci/reload-env.sh
90+
export CI=true
9091
mkdir -p build
9192
pytest $PYTEST_CONFIG mars/$(mars.test.module)
9293
mv .coverage build/.coverage.main.file

mars/dataframe/base/value_counts.py

Lines changed: 18 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -30,94 +30,37 @@
3030
class DataFrameValueCounts(DataFrameOperand, DataFrameOperandMixin):
3131
_op_type_ = opcodes.VALUE_COUNTS
3232

33-
_input = KeyField("input")
34-
_normalize = BoolField("normalize")
35-
_sort = BoolField("sort")
36-
_ascending = BoolField("ascending")
37-
_bins = Int64Field("bins")
38-
_dropna = BoolField("dropna")
39-
_method = StringField("method")
40-
_convert_index_to_interval = BoolField("convert_index_to_interval")
41-
_nrows = Int64Field("nrows")
42-
43-
def __init__(
44-
self,
45-
normalize=None,
46-
sort=None,
47-
ascending=None,
48-
bins=None,
49-
dropna=None,
50-
method=None,
51-
convert_index_to_interval=None,
52-
nrows=None,
53-
**kw
54-
):
55-
super().__init__(
56-
_normalize=normalize,
57-
_sort=sort,
58-
_ascending=ascending,
59-
_bins=bins,
60-
_dropna=dropna,
61-
_method=method,
62-
_convert_index_to_interval=convert_index_to_interval,
63-
_nrows=nrows,
64-
**kw
65-
)
33+
input = KeyField("input")
34+
normalize = BoolField("normalize")
35+
sort = BoolField("sort")
36+
ascending = BoolField("ascending")
37+
bins = Int64Field("bins")
38+
dropna = BoolField("dropna")
39+
method = StringField("method")
40+
convert_index_to_interval = BoolField("convert_index_to_interval", default=None)
41+
nrows = Int64Field("nrows", default=None)
42+
43+
def __init__(self, **kw):
44+
super().__init__(**kw)
6645
self.output_types = [OutputType.series]
6746

68-
@property
69-
def input(self):
70-
return self._input
71-
72-
@property
73-
def normalize(self):
74-
return self._normalize
75-
76-
@property
77-
def sort(self):
78-
return self._sort
79-
80-
@property
81-
def ascending(self):
82-
return self._ascending
83-
84-
@property
85-
def bins(self):
86-
return self._bins
87-
88-
@property
89-
def dropna(self):
90-
return self._dropna
91-
92-
@property
93-
def method(self):
94-
return self._method
95-
96-
@property
97-
def convert_index_to_interval(self):
98-
return self._convert_index_to_interval
99-
100-
@property
101-
def nrows(self):
102-
return self._nrows
103-
10447
def _set_inputs(self, inputs):
10548
super()._set_inputs(inputs)
106-
self._input = self._inputs[0]
49+
self.input = self._inputs[0]
10750

10851
def __call__(self, inp):
10952
test_series = build_series(inp).value_counts(normalize=self.normalize)
110-
if self._bins is not None:
53+
if self.bins is not None:
11154
from .cut import cut
11255

11356
# cut
11457
try:
115-
inp = cut(inp, self._bins, include_lowest=True)
58+
inp = cut(inp, self.bins, include_lowest=True)
11659
except TypeError: # pragma: no cover
11760
raise TypeError("bins argument only works with numeric data.")
11861

119-
self._bins = None
120-
self._convert_index_to_interval = True
62+
self.bins = None
63+
self.convert_index_to_interval = True
12164
return self.new_series(
12265
[inp],
12366
shape=(np.nan,),
@@ -174,7 +117,7 @@ def tile(cls, op: "DataFrameValueCounts"):
174117

175118
if op.nrows:
176119
# set to sort_values
177-
inp.op._nrows = op.nrows
120+
inp.op.nrows = op.nrows
178121
elif op.nrows:
179122
inp = inp.iloc[: op.nrows]
180123

mars/dataframe/datasource/core.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,7 @@ class HeadOptimizedDataSource(DataFrameOperand, DataFrameOperandMixin):
3636
# First, it will try to trigger first_chunk.head() and raise TilesError,
3737
# When iterative tiling is triggered,
3838
# check if the first_chunk.head() meets requirements.
39-
_nrows = Int64Field("nrows", default=None)
40-
41-
@property
42-
def nrows(self):
43-
return self._nrows
39+
nrows = Int64Field("nrows", default=None)
4440

4541
@property
4642
def first_chunk(self):
@@ -67,7 +63,7 @@ def _tile_head(cls, op: "HeadOptimizedDataSource"):
6763
tileds[0]._shape = chunk_shape
6864
else:
6965
for chunk in tileds[0].chunks:
70-
chunk.op._nrows = None
66+
chunk.op.nrows = None
7167
# otherwise
7268
tiled = yield from recursive_tile(tileds[0].iloc[: op.nrows])
7369
tileds = [tiled]

mars/dataframe/datasource/from_index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from ... import opcodes
1616
from ...core import recursive_tile
17-
from ...serialization.serializables import KeyField, StringField
17+
from ...serialization.serializables import AnyField, KeyField
1818
from ..initializer import Index
1919
from ..operands import DataFrameOperand, DataFrameOperandMixin
2020

@@ -24,7 +24,7 @@ class SeriesFromIndex(DataFrameOperand, DataFrameOperandMixin):
2424

2525
input_ = KeyField("input_")
2626
index = KeyField("index")
27-
name = StringField("name", default=None)
27+
name = AnyField("name", default=None)
2828

2929
def _set_inputs(self, inputs):
3030
super()._set_inputs(inputs)

mars/dataframe/datasource/from_records.py

Lines changed: 7 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,46 +28,15 @@
2828
class DataFrameFromRecords(DataFrameOperand, DataFrameOperandMixin):
2929
_op_type_ = OperandDef.DATAFRAME_FROM_RECORDS
3030

31-
_columns = ListField("columns")
32-
_exclude = ListField("exclude")
33-
_coerce_float = BoolField("coerce_float")
34-
_nrows = Int32Field("nrows")
35-
36-
def __init__(
37-
self,
38-
index=None,
39-
columns=None,
40-
exclude=None,
41-
coerce_float=False,
42-
nrows=None,
43-
**kw
44-
):
31+
columns = ListField("columns", default=None)
32+
exclude = ListField("exclude", default=None)
33+
coerce_float = BoolField("coerce_float", default=False)
34+
nrows = Int32Field("nrows", default=None)
35+
36+
def __init__(self, index=None, columns=None, **kw):
4537
if index is not None or columns is not None:
4638
raise NotImplementedError("Specifying index value is not supported for now")
47-
super().__init__(
48-
_exclude=exclude,
49-
_columns=columns,
50-
_coerce_float=coerce_float,
51-
_nrows=nrows,
52-
_output_types=[OutputType.dataframe],
53-
**kw
54-
)
55-
56-
@property
57-
def columns(self):
58-
return self._columns
59-
60-
@property
61-
def exclude(self):
62-
return self._exclude
63-
64-
@property
65-
def coerce_float(self):
66-
return self._coerce_float
67-
68-
@property
69-
def nrows(self):
70-
return self._nrows
39+
super().__init__(columns=columns, _output_types=[OutputType.dataframe], **kw)
7140

7241
def __call__(self, data):
7342
if self.nrows is None:

mars/dataframe/datasource/read_raydataset.py

Lines changed: 8 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -46,33 +46,10 @@ class DataFrameReadRayDataset(
4646
):
4747
_op_type_ = OperandDef.READ_RAYDATASET
4848

49-
_refs = AnyField("refs")
50-
_columns = ListField("columns")
51-
_incremental_index = BoolField("incremental_index")
52-
_nrows = Int64Field("nrows")
53-
54-
def __init__(
55-
self, refs=None, columns=None, incremental_index=None, nrows=None, **kw
56-
):
57-
super().__init__(
58-
_refs=refs,
59-
_columns=columns,
60-
_incremental_index=incremental_index,
61-
_nrows=nrows,
62-
**kw,
63-
)
64-
65-
@property
66-
def refs(self):
67-
return self._refs
68-
69-
@property
70-
def columns(self):
71-
return self._columns
72-
73-
@property
74-
def incremental_index(self):
75-
return self._incremental_index
49+
refs = AnyField("refs", default=None)
50+
columns = ListField("columns", default=None)
51+
incremental_index = BoolField("incremental_index", default=None)
52+
nrows = Int64Field("nrows", default=None)
7653

7754
@classmethod
7855
def _tile_partitioned(cls, op: "DataFrameReadRayDataset"):
@@ -157,20 +134,12 @@ def read_raydataset(*args, **kwargs):
157134

158135
class DataFrameReadMLDataset(HeadOptimizedDataSource):
159136
_op_type_ = OperandDef.READ_MLDATASET
160-
_mldataset = ReferenceField("mldataset", "ray.util.data.MLDataset")
161-
_columns = ListField("columns")
162137

163-
def __init__(self, mldataset=None, columns=None, **kw):
164-
super().__init__(
165-
_mldataset=mldataset,
166-
_columns=columns,
167-
_output_types=[OutputType.dataframe],
168-
**kw,
169-
)
138+
mldataset = ReferenceField("mldataset", "ray.util.data.MLDataset", default=None)
139+
columns = ListField("columns", default=None)
170140

171-
@property
172-
def mldataset(self):
173-
return self._mldataset
141+
def __init__(self, **kw):
142+
super().__init__(_output_types=[OutputType.dataframe], **kw)
174143

175144
def _update_key(self):
176145
"""We can't direct generate token for mldataset when we use

mars/dataframe/datasource/read_sql.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@ def _get_logic_key_token_values(self):
5151
fields_to_tokenize = [
5252
getattr(self, k, None)
5353
for k in [
54-
"_table_or_sql",
55-
"_schema",
56-
"_coerce_float",
57-
"_parse_dates",
58-
"_columns",
59-
"_method",
60-
"_incremental_index",
61-
"_use_arrow_dtype",
62-
"_partition_col",
54+
"table_or_sql",
55+
"schema",
56+
"coerce_float",
57+
"parse_dates",
58+
"columns",
59+
"method",
60+
"incremental_index",
61+
"use_arrow_dtype",
62+
"partition_col",
6363
]
6464
]
6565
return super()._get_logic_key_token_values() + fields_to_tokenize
@@ -104,7 +104,7 @@ class DataFrameReadSQL(
104104
nrows = Int64Field("nrows", default=None)
105105

106106
def get_columns(self):
107-
return self._columns
107+
return self.columns
108108

109109
def set_pruned_columns(self, columns, *, keep_order=None):
110110
self.columns = columns

0 commit comments

Comments
 (0)