Skip to content

Commit 21b1c1a

Browse files
authored
Remove should_be_monotonic property (#2949)
1 parent adb4161 commit 21b1c1a

File tree

10 files changed

+32
-103
lines changed

10 files changed

+32
-103
lines changed

mars/dataframe/arithmetic/core.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -493,21 +493,17 @@ def _calc_properties(cls, x1, x2=None, axis="columns"):
493493
index=x1.dtypes.index,
494494
)
495495
columns = copy.copy(x1.columns_value)
496-
columns.value.should_be_monotonic = False
497496
column_shape = len(dtypes)
498497
elif x1.dtypes is not None and x2.dtypes is not None:
499498
dtypes = infer_dtypes(x1.dtypes, x2.dtypes, cls._operator)
500499
columns = parse_index(dtypes.index, store_data=True)
501-
columns.value.should_be_monotonic = True
502500
column_shape = len(dtypes)
503501
if x1.index_value is not None and x2.index_value is not None:
504502
if x1.index_value.key == x2.index_value.key:
505503
index = copy.copy(x1.index_value)
506-
index.value.should_be_monotonic = False
507504
index_shape = x1.shape[0]
508505
else:
509506
index = infer_index_value(x1.index_value, x2.index_value)
510-
index.value.should_be_monotonic = True
511507
if index.key == x1.index_value.key == x2.index_value.key and (
512508
not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
513509
):
@@ -539,12 +535,10 @@ def _calc_properties(cls, x1, x2=None, axis="columns"):
539535
index=x1.dtypes.index,
540536
)
541537
columns = copy.copy(x1.columns_value)
542-
columns.value.should_be_monotonic = False
543538
column_shape = len(dtypes)
544539
else: # pragma: no cover
545540
dtypes = x1.dtypes # FIXME
546541
columns = infer_index_value(x1.columns_value, x2.index_value)
547-
columns.value.should_be_monotonic = True
548542
column_shape = np.nan
549543
else:
550544
assert axis == "index" or axis == 0
@@ -562,7 +556,6 @@ def _calc_properties(cls, x1, x2=None, axis="columns"):
562556
index=x1.dtypes.index,
563557
)
564558
index = copy.copy(x1.index_value)
565-
index.value.should_be_monotonic = False
566559
index_shape = x1.shape[0]
567560
else:
568561
if x1.dtypes is not None:
@@ -574,7 +567,6 @@ def _calc_properties(cls, x1, x2=None, axis="columns"):
574567
index=x1.dtypes.index,
575568
)
576569
index = infer_index_value(x1.index_value, x2.index_value)
577-
index.value.should_be_monotonic = True
578570
index_shape = np.nan
579571
return {
580572
"shape": (index_shape, column_shape),
@@ -592,11 +584,9 @@ def _calc_properties(cls, x1, x2=None, axis="columns"):
592584
if x1.index_value is not None and x2.index_value is not None:
593585
if x1.index_value.key == x2.index_value.key:
594586
index = copy.copy(x1.index_value)
595-
index.value.should_be_monotonic = False
596587
index_shape = x1.shape[0]
597588
else:
598589
index = infer_index_value(x1.index_value, x2.index_value)
599-
index.value.should_be_monotonic = True
600590
if index.key == x1.index_value.key == x2.index_value.key and (
601591
not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
602592
):

mars/dataframe/arithmetic/tests/test_arithmetic.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,7 @@ def test_without_shuffle(func_name, func_opts):
176176
pd.testing.assert_index_equal(
177177
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
178178
)
179-
assert df3.columns_value.should_be_monotonic is True
180179
assert isinstance(df3.index_value.value, IndexValue.Int64Index)
181-
assert df3.index_value.should_be_monotonic is True
182180
pd.testing.assert_index_equal(
183181
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
184182
)
@@ -192,9 +190,7 @@ def test_without_shuffle(func_name, func_opts):
192190
pd.testing.assert_index_equal(
193191
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
194192
)
195-
assert df3.columns_value.should_be_monotonic is True
196193
assert isinstance(df3.index_value.value, IndexValue.Int64Index)
197-
assert df3.index_value.should_be_monotonic is True
198194
pd.testing.assert_index_equal(
199195
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
200196
)
@@ -419,7 +415,6 @@ def test_dataframe_and_series_with_shuffle(func_name, func_opts):
419415
df2.columns_value.to_pandas(), pd.Index([], dtype=np.int64)
420416
)
421417
assert df2.columns_value.key != df1.columns_value.key
422-
assert df2.columns_value.should_be_monotonic is True
423418

424419
df1, df2, s1 = tile(df1, df2, s1)
425420

@@ -614,7 +609,6 @@ def test_series_and_series_with_shuffle(func_name, func_opts):
614609
pd.testing.assert_index_equal(
615610
s3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
616611
)
617-
assert s3.index_value.should_be_monotonic is True
618612

619613
s1, s2, s3 = tile(s1, s2, s3)
620614

@@ -673,9 +667,7 @@ def test_identical_index_and_columns(func_name, func_opts):
673667
pd.testing.assert_index_equal(
674668
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
675669
)
676-
assert df3.columns_value.should_be_monotonic is False
677670
assert isinstance(df3.index_value.value, IndexValue.RangeIndex)
678-
assert df3.index_value.should_be_monotonic is False
679671
pd.testing.assert_index_equal(df3.index_value.to_pandas(), pd.RangeIndex(0, 10))
680672
assert df3.index_value.key == df1.index_value.key
681673
assert df3.index_value.key == df2.index_value.key
@@ -734,9 +726,7 @@ def test_with_one_shuffle(func_name, func_opts):
734726
pd.testing.assert_index_equal(
735727
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
736728
)
737-
assert df3.columns_value.should_be_monotonic is True
738729
assert isinstance(df3.index_value.value, IndexValue.Int64Index)
739-
assert df3.index_value.should_be_monotonic is True
740730
pd.testing.assert_index_equal(
741731
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
742732
)
@@ -868,9 +858,7 @@ def test_with_all_shuffle(func_name, func_opts):
868858
pd.testing.assert_index_equal(
869859
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
870860
)
871-
assert df3.columns_value.should_be_monotonic is True
872861
assert isinstance(df3.index_value.value, IndexValue.Int64Index)
873-
assert df3.index_value.should_be_monotonic is True
874862
pd.testing.assert_index_equal(
875863
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
876864
)
@@ -970,9 +958,7 @@ def test_with_all_shuffle(func_name, func_opts):
970958
pd.testing.assert_index_equal(
971959
df6.columns_value.to_pandas(), func_opts.func(data4, data5).columns
972960
)
973-
assert df6.columns_value.should_be_monotonic is True
974961
assert isinstance(df6.index_value.value, IndexValue.Int64Index)
975-
assert df6.index_value.should_be_monotonic is True
976962
pd.testing.assert_index_equal(
977963
df6.index_value.to_pandas(), pd.Index([], dtype=np.int64)
978964
)
@@ -1077,9 +1063,7 @@ def test_without_shuffle_and_with_one_chunk(func_name, func_opts):
10771063
pd.testing.assert_index_equal(
10781064
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
10791065
)
1080-
assert df3.columns_value.should_be_monotonic is True
10811066
assert isinstance(df3.index_value.value, IndexValue.Int64Index)
1082-
assert df3.index_value.should_be_monotonic is True
10831067
pd.testing.assert_index_equal(
10841068
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
10851069
)
@@ -1191,9 +1175,7 @@ def test_both_one_chunk(func_name, func_opts):
11911175
pd.testing.assert_index_equal(
11921176
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
11931177
)
1194-
assert df3.columns_value.should_be_monotonic is True
11951178
assert isinstance(df3.index_value.value, IndexValue.Int64Index)
1196-
assert df3.index_value.should_be_monotonic is True
11971179
pd.testing.assert_index_equal(
11981180
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
11991181
)
@@ -1237,9 +1219,7 @@ def test_with_shuffle_and_one_chunk(func_name, func_opts):
12371219
pd.testing.assert_index_equal(
12381220
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
12391221
)
1240-
assert df3.columns_value.should_be_monotonic is True
12411222
assert isinstance(df3.index_value.value, IndexValue.Int64Index)
1242-
assert df3.index_value.should_be_monotonic is True
12431223
pd.testing.assert_index_equal(
12441224
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
12451225
)
@@ -1332,9 +1312,7 @@ def test_on_same_dataframe(func_name, func_opts):
13321312
pd.testing.assert_index_equal(
13331313
df2.columns_value.to_pandas(), func_opts.func(data, data).columns
13341314
)
1335-
assert df2.columns_value.should_be_monotonic is False
13361315
assert isinstance(df2.index_value.value, IndexValue.Int64Index)
1337-
assert df2.index_value.should_be_monotonic is False
13381316
pd.testing.assert_index_equal(
13391317
df2.index_value.to_pandas(), pd.Index([], dtype=np.int64)
13401318
)

mars/dataframe/arithmetic/tests/test_arithmetic_execution.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import operator
1616
from dataclasses import dataclass
1717
from functools import partial
18-
from typing import Callable
18+
from typing import Callable, Union
1919

2020
import numpy as np
2121
import pandas as pd
@@ -47,6 +47,16 @@ class FunctionOptions:
4747
)
4848

4949

50+
def sort_dataframe(
51+
df: Union[pd.DataFrame, pd.Series], index: bool = True, columns: bool = True
52+
):
53+
if index:
54+
df.sort_index(inplace=True)
55+
if columns and isinstance(df, pd.DataFrame):
56+
df.sort_index(axis=1, inplace=True)
57+
return df
58+
59+
5060
def to_boolean_if_needed(func_name, value, split_value=0.5):
5161
if func_name in ["__and__", "__or__", "__xor__"]:
5262
return value > split_value
@@ -81,7 +91,7 @@ def test_without_shuffle_execution(setup, func_name, func_opts):
8191
expected = func_opts.func(data1, data2)
8292
result = df3.execute().fetch()
8393

84-
pd.testing.assert_frame_equal(expected, result)
94+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
8595

8696

8797
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
@@ -113,7 +123,7 @@ def test_with_one_shuffle_execution(setup, func_name, func_opts):
113123
expected = func_opts.func(data1, data2)
114124
result = df3.execute().fetch()
115125

116-
pd.testing.assert_frame_equal(expected, result)
126+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
117127

118128
# only 1 axis is monotonic
119129
# data1 with columns split into [0...4], [5...9],
@@ -138,7 +148,7 @@ def test_with_one_shuffle_execution(setup, func_name, func_opts):
138148
expected = func_opts.func(data1, data2)
139149
result = df3.execute().fetch()
140150

141-
pd.testing.assert_frame_equal(expected, result)
151+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
142152

143153

144154
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
@@ -168,7 +178,7 @@ def test_with_all_shuffle_execution(setup, func_name, func_opts):
168178
expected = func_opts.func(data1, data2)
169179
result = df3.execute().fetch()
170180

171-
pd.testing.assert_frame_equal(expected, result)
181+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
172182

173183

174184
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
@@ -257,7 +267,7 @@ def test_without_shuffle_and_with_one_chunk(setup, func_name, func_opts):
257267
expected = func_opts.func(data1, data2)
258268
result = df3.execute().fetch()
259269

260-
pd.testing.assert_frame_equal(expected, result)
270+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
261271

262272
# only 1 axis is monotonic
263273
# data1 with columns split into [0...4], [5...9],
@@ -282,7 +292,7 @@ def test_without_shuffle_and_with_one_chunk(setup, func_name, func_opts):
282292
expected = func_opts.func(data1, data2)
283293
result = df3.execute().fetch()
284294

285-
pd.testing.assert_frame_equal(expected, result)
295+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
286296

287297

288298
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
@@ -312,7 +322,7 @@ def test_with_shuffle_and_with_one_chunk(setup, func_name, func_opts):
312322
expected = func_opts.func(data1, data2)
313323
result = df3.execute().fetch()
314324

315-
pd.testing.assert_frame_equal(expected, result)
325+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
316326

317327
# only 1 axis is monotonic
318328
# data1 with columns split into [0...4], [5...9],
@@ -337,7 +347,7 @@ def test_with_shuffle_and_with_one_chunk(setup, func_name, func_opts):
337347
expected = func_opts.func(data1, data2)
338348
result = df3.execute().fetch()
339349

340-
pd.testing.assert_frame_equal(expected, result)
350+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
341351

342352

343353
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
@@ -497,7 +507,7 @@ def test_with_shuffle_on_string_index(setup, func_name, func_opts):
497507
expected = func_opts.func(data1, data2)
498508
result = df3.execute().fetch()
499509

500-
pd.testing.assert_frame_equal(expected, result)
510+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
501511

502512

503513
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
@@ -527,23 +537,23 @@ def test_dataframe_and_series(setup, func_name, func_opts):
527537

528538
expected = getattr(data1[[1]], func_opts.func_name)(data2[1], axis="index")
529539
result = r1.execute().fetch()
530-
pd.testing.assert_frame_equal(expected, result)
540+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
531541

532542
# operate on dataframe and series without shuffle
533543
df2 = from_pandas(data1, chunk_size=(5, 5))
534544
r2 = getattr(df2, func_opts.func_name)(s1, axis="index")
535545

536546
expected = getattr(data1, func_opts.func_name)(data2[1], axis="index")
537547
result = r2.execute().fetch()
538-
pd.testing.assert_frame_equal(expected, result)
548+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
539549

540550
# operate on dataframe and series with shuffle
541551
df3 = from_pandas(data1, chunk_size=(5, 5))
542552
r3 = getattr(df3, func_opts.func_name)(s1, axis="columns")
543553

544554
expected = getattr(data1, func_opts.func_name)(data2[1], axis="columns")
545555
result = r3.execute().fetch()
546-
pd.testing.assert_frame_equal(expected, result)
556+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
547557

548558
# test both one chunk, axis=0
549559
pdf = pd.DataFrame({"ca": [1, 3, 2], "cb": [360, 180, 2]}, index=[1, 2, 3])
@@ -553,7 +563,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
553563
mars_series = from_pandas_series(series)
554564
result = getattr(df, func_opts.func_name)(mars_series, axis=0).execute().fetch()
555565
expected = getattr(pdf, func_opts.func_name)(series, axis=0)
556-
pd.testing.assert_frame_equal(expected, result)
566+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
557567

558568
# test different number of chunks, axis=0
559569
pdf = pd.DataFrame({"ca": [1, 3, 2], "cb": [360, 180, 2]}, index=[1, 2, 3])
@@ -563,7 +573,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
563573
mars_series = from_pandas_series(series)
564574
result = getattr(df, func_opts.func_name)(mars_series, axis=0).execute().fetch()
565575
expected = getattr(pdf, func_opts.func_name)(series, axis=0)
566-
pd.testing.assert_frame_equal(expected, result)
576+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
567577

568578
# test with row shuffle, axis=0
569579
pdf = pd.DataFrame({"ca": [1, 3, 2], "cb": [360, 180, 2]}, index=[2, 1, 3])
@@ -575,7 +585,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
575585
expected = getattr(pdf, func_opts.func_name)(series, axis=0).reindex([3, 1, 2])
576586
# modify the order of rows
577587
result = result.reindex(index=[3, 1, 2])
578-
pd.testing.assert_frame_equal(expected, result)
588+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
579589

580590
# test both one chunk, axis=1
581591
pdf = pd.DataFrame(
@@ -587,7 +597,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
587597
mars_series = from_pandas_series(series)
588598
result = getattr(df, func_opts.func_name)(mars_series, axis=1).execute().fetch()
589599
expected = getattr(pdf, func_opts.func_name)(series, axis=1)
590-
pd.testing.assert_frame_equal(expected, result)
600+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
591601

592602
# test different number of chunks, axis=1
593603
pdf = pd.DataFrame(
@@ -599,7 +609,7 @@ def test_dataframe_and_series(setup, func_name, func_opts):
599609
mars_series = from_pandas_series(series)
600610
result = getattr(df, func_opts.func_name)(mars_series, axis=1).execute().fetch()
601611
expected = getattr(pdf, func_opts.func_name)(series, axis=1)
602-
pd.testing.assert_frame_equal(expected, result)
612+
pd.testing.assert_frame_equal(sort_dataframe(expected), sort_dataframe(result))
603613

604614
# test with row shuffle, axis=1
605615
pdf = pd.DataFrame(
@@ -665,7 +675,7 @@ def test_series(setup, func_name, func_opts):
665675
)
666676
result = r.execute().fetch()
667677
expected = func_opts.func(s1, s2)
668-
pd.testing.assert_series_equal(expected, result)
678+
pd.testing.assert_series_equal(sort_dataframe(expected), sort_dataframe(result))
669679

670680
if func_opts.func_name in ["__and__", "__or__", "__xor__"]:
671681
# bitwise logical operators doesn\'t support floating point scalars

0 commit comments

Comments
 (0)