Skip to content

Commit f879523

Browse files
authored
Merge branch 'main' into example-correction-groupby
2 parents e3c9efa + 9b375be commit f879523

25 files changed

+54
-48
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,7 @@ I/O
584584
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
585585
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
586586
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
587+
- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
587588

588589
Period
589590
^^^^^^

pandas/io/stata.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1817,10 +1817,19 @@ def read(
18171817
return data
18181818

18191819
def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFrame:
1820+
# missing code for double was different in version 105 and prior
1821+
old_missingdouble = float.fromhex("0x1.0p333")
1822+
18201823
# Check for missing values, and replace if found
18211824
replacements = {}
18221825
for i in range(len(data.columns)):
18231826
fmt = self._typlist[i]
1827+
# recode instances of the old missing code to the currently used value
1828+
if self._format_version <= 105 and fmt == "d":
1829+
data.iloc[:, i] = data.iloc[:, i].replace(
1830+
old_missingdouble, self.MISSING_VALUES["d"]
1831+
)
1832+
18241833
if self._format_version <= 111:
18251834
if fmt not in self.OLD_VALID_RANGE:
18261835
continue

pandas/tests/arithmetic/test_object.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import using_string_dtype
12-
1311
import pandas.util._test_decorators as td
1412

1513
import pandas as pd
@@ -303,7 +301,6 @@ def test_iadd_string(self):
303301
index += "_x"
304302
assert "a_x" in index
305303

306-
@pytest.mark.xfail(using_string_dtype(), reason="add doesn't work")
307304
def test_add(self):
308305
index = pd.Index([str(i) for i in range(10)])
309306
expected = pd.Index(index.values * 2)

pandas/tests/base/test_unique.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
import pandas._testing as tm
86
from pandas.tests.base.common import allow_na_ops
@@ -100,12 +98,11 @@ def test_nunique_null(null_obj, index_or_series_obj):
10098

10199

102100
@pytest.mark.single_cpu
103-
@pytest.mark.xfail(using_string_dtype(), reason="decoding fails")
104101
def test_unique_bad_unicode(index_or_series):
105102
# regression test for #34550
106103
uval = "\ud83d" # smiley emoji
107104

108-
obj = index_or_series([uval] * 2)
105+
obj = index_or_series([uval] * 2, dtype=object)
109106
result = obj.unique()
110107

111108
if isinstance(obj, pd.Index):

pandas/tests/frame/constructors/test_from_dict.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def test_constructor_single_row(self):
4444
)
4545
tm.assert_frame_equal(result, expected)
4646

47-
@pytest.mark.skipif(using_string_dtype(), reason="columns inferring logic broken")
47+
@pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken")
4848
def test_constructor_list_of_series(self):
4949
data = [
5050
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
@@ -108,6 +108,7 @@ def test_constructor_list_of_series(self):
108108
expected = DataFrame.from_dict(sdict, orient="index")
109109
tm.assert_frame_equal(result, expected)
110110

111+
@pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken")
111112
def test_constructor_orient(self, float_string_frame):
112113
data_dict = float_string_frame.T._series
113114
recons = DataFrame.from_dict(data_dict, orient="index")

pandas/tests/frame/constructors/test_from_records.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,7 @@ def test_from_records_with_datetimes(self):
5757
expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]")
5858
tm.assert_frame_equal(result, expected)
5959

60-
@pytest.mark.skipif(
61-
using_string_dtype(), reason="dtype checking logic doesn't work"
62-
)
60+
@pytest.mark.xfail(using_string_dtype(), reason="dtype checking logic doesn't work")
6361
def test_from_records_sequencelike(self):
6462
df = DataFrame(
6563
{

pandas/tests/frame/methods/test_fillna.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def test_fillna_datetime(self, datetime_frame):
6565
with pytest.raises(TypeError, match=msg):
6666
datetime_frame.fillna()
6767

68+
# TODO(infer_string) test as actual error instead of xfail
6869
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
6970
def test_fillna_mixed_type(self, float_string_frame):
7071
mf = float_string_frame
@@ -537,6 +538,7 @@ def test_fillna_col_reordering(self):
537538
filled = df.ffill()
538539
assert df.columns.tolist() == filled.columns.tolist()
539540

541+
# TODO(infer_string) test as actual error instead of xfail
540542
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
541543
def test_fill_corner(self, float_frame, float_string_frame):
542544
mf = float_string_frame

pandas/tests/frame/methods/test_info.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas import (
1616
CategoricalIndex,
1717
DataFrame,
18+
Index,
1819
MultiIndex,
1920
Series,
2021
date_range,
@@ -360,7 +361,7 @@ def test_info_memory_usage():
360361
df = DataFrame(data)
361362
df.columns = dtypes
362363

363-
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
364+
df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
364365
df_with_object_index.info(buf=buf, memory_usage=True)
365366
res = buf.getvalue().splitlines()
366367
assert re.match(r"memory usage: [^+]+\+", res[-1])
@@ -398,25 +399,25 @@ def test_info_memory_usage():
398399

399400
@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
400401
def test_info_memory_usage_deep_not_pypy():
401-
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
402+
df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
402403
assert (
403404
df_with_object_index.memory_usage(index=True, deep=True).sum()
404405
> df_with_object_index.memory_usage(index=True).sum()
405406
)
406407

407-
df_object = DataFrame({"a": ["a"]})
408+
df_object = DataFrame({"a": Series(["a"], dtype=object)})
408409
assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()
409410

410411

411412
@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
412413
def test_info_memory_usage_deep_pypy():
413-
df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
414+
df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
414415
assert (
415416
df_with_object_index.memory_usage(index=True, deep=True).sum()
416417
== df_with_object_index.memory_usage(index=True).sum()
417418
)
418419

419-
df_object = DataFrame({"a": ["a"]})
420+
df_object = DataFrame({"a": Series(["a"], dtype=object)})
420421
assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum()
421422

422423

pandas/tests/frame/methods/test_interpolate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def test_interpolate_inplace(self, frame_or_series, request):
6464
assert np.shares_memory(orig, obj.values)
6565
assert orig.squeeze()[1] == 1.5
6666

67+
# TODO(infer_string) raise proper TypeError in case of string dtype
6768
@pytest.mark.xfail(
6869
using_string_dtype(), reason="interpolate doesn't work for string"
6970
)

pandas/tests/frame/test_arithmetic.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
import numpy as np
1212
import pytest
1313

14-
from pandas._config import using_string_dtype
15-
1614
import pandas as pd
1715
from pandas import (
1816
DataFrame,
@@ -251,7 +249,6 @@ def test_timestamp_compare(self, left, right):
251249
with pytest.raises(TypeError, match=msg):
252250
right_f(pd.Timestamp("nat"), df)
253251

254-
@pytest.mark.xfail(using_string_dtype(), reason="can't compare string and int")
255252
def test_mixed_comparison(self):
256253
# GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
257254
# not raise TypeError

0 commit comments

Comments
 (0)