Skip to content

Commit 996ce47

Browse files
authored
Fix stats tests and pin sphinx version (#3313)
1 parent bde43bb commit 996ce47

File tree

4 files changed

+91
-3
lines changed

4 files changed

+91
-3
lines changed

docs/requirements-doc.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pytest-cov>=2.5.0
1010
pytest-timeout>=1.2.0
1111
cloudpickle>=1.0.0
1212
sqlalchemy>=1.2.0
13-
sphinx>=3.0.0
13+
sphinx<6.0.0
1414
pydata-sphinx-theme>=0.3.0
1515
sphinx-intl>=0.9.9
1616
ipython>=4.0

mars/dataframe/datasource/date_range.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import numpy as np
1919
import pandas as pd
20+
from pandas import Timestamp, NaT
2021
from pandas.tseries.frequencies import to_offset
2122
from pandas.tseries.offsets import Tick
2223
from pandas._libs.tslibs import timezones
@@ -51,6 +52,72 @@ def normalize_date(dt): # from pandas/_libs/tslibs/conversion.pyx
5152
_date_range_use_inclusive = pd_release_version[:2] >= (1, 4)
5253

5354

55+
# adapted from pandas.core.arrays.datetimes.generate_range
56+
def generate_range_count(
57+
start=None, end=None, periods=None, offset=None
58+
): # pragma: no cover
59+
offset = to_offset(offset)
60+
61+
start = Timestamp(start)
62+
start = start if start is not NaT else None
63+
end = Timestamp(end)
64+
end = end if end is not NaT else None
65+
66+
if start and not offset.is_on_offset(start):
67+
start = offset.rollforward(start)
68+
69+
elif end and not offset.is_on_offset(end):
70+
end = offset.rollback(end)
71+
72+
if periods is None and end < start and offset.n >= 0:
73+
end = None
74+
periods = 0
75+
76+
if end is None:
77+
end = start + (periods - 1) * offset
78+
79+
if start is None:
80+
start = end - (periods - 1) * offset
81+
82+
cur = start
83+
count = 0
84+
if offset.n >= 0:
85+
while cur <= end:
86+
count += 1
87+
88+
if cur == end:
89+
# GH#24252 avoid overflows by not performing the addition
90+
# in offset.apply unless we have to
91+
break
92+
93+
# faster than cur + offset
94+
try:
95+
next_date = offset._apply(cur)
96+
except AttributeError:
97+
next_date = cur + offset
98+
if next_date <= cur:
99+
raise ValueError(f"Offset {offset} did not increment date")
100+
cur = next_date
101+
else:
102+
while cur >= end:
103+
count += 1
104+
105+
if cur == end:
106+
# GH#24252 avoid overflows by not performing the addition
107+
# in offset.apply unless we have to
108+
break
109+
110+
# faster than cur + offset
111+
try:
112+
next_date = offset._apply(cur)
113+
except AttributeError:
114+
next_date = cur + offset
115+
if next_date >= cur:
116+
raise ValueError(f"Offset {offset} did not decrement date")
117+
cur = next_date
118+
return count
119+
120+
54121
class DataFrameDateRange(DataFrameOperand, DataFrameOperandMixin):
55122
_op_type_ = OperandDef.DATE_RANGE
56123

@@ -511,7 +578,7 @@ def date_range(
511578
inclusive = "both"
512579
else:
513580
if periods is None:
514-
periods = size = int((end - start) / freq + 1)
581+
periods = size = generate_range_count(start, end, periods, freq)
515582
else:
516583
size = periods
517584
if inclusive in ("left", "right"):

mars/dataframe/datasource/tests/test_datasource_execution.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,6 +1007,11 @@ def test_date_range_execution(setup):
10071007
expected = pd.date_range(start="1/1/2018", periods=5, freq="M")
10081008
pd.testing.assert_index_equal(result, expected)
10091009

1010+
dr = md.date_range(start="2018/01/01", end="2018/07/01", freq="M")
1011+
result = dr.execute().fetch()
1012+
expected = pd.date_range(start="2018/01/01", end="2018/07/01", freq="M")
1013+
pd.testing.assert_index_equal(result, expected)
1014+
10101015

10111016
parquet_engines = ["auto"]
10121017
if pa is not None:

mars/tensor/stats/tests/test_stats_execution.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ def test_t_test_execution(setup):
205205
functools.partial(mt_from_stats, equal_var=False),
206206
functools.partial(sp_from_stats, equal_var=False),
207207
),
208-
(ttest_1samp, sp_ttest_1samp),
209208
]
210209

211210
fa_raw = np.array([16, 18, 16, 14, 12, 12])
@@ -233,6 +232,23 @@ def test_t_test_execution(setup):
233232
np.testing.assert_almost_equal(expected[0], result[0])
234233
np.testing.assert_almost_equal(expected[1], result[1])
235234

235+
# second param size must be 1 for ttest_1samp
236+
fb_raw = np.array([16])
237+
fb = tensor(fb_raw)
238+
for alt in alternatives:
239+
if parse_version(scipy.__version__) >= parse_version("1.6.0"):
240+
r = ttest_1samp(fa, fb, alternative=alt)
241+
else:
242+
r = ttest_1samp(fa, fb)
243+
result = r.execute().fetch()
244+
245+
if parse_version(scipy.__version__) >= parse_version("1.6.0"):
246+
expected = sp_ttest_1samp(fa_raw, fb_raw, alternative=alt)
247+
else:
248+
expected = sp_ttest_1samp(fa_raw, fb_raw)
249+
np.testing.assert_almost_equal(expected[0], result[0])
250+
np.testing.assert_almost_equal(expected[1], result[1])
251+
236252

237253
@pytest.mark.parametrize("chunk_size", [5, 15])
238254
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)