Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def float_string_frame():
df = DataFrame(
np.random.default_rng(2).standard_normal((30, 4)),
index=Index([f"foo_{i}" for i in range(30)], dtype=object),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
)
df["foo"] = "bar"
return df
Expand Down
36 changes: 10 additions & 26 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
Categorical,
Expand Down Expand Up @@ -162,21 +160,7 @@ def test_constructor_with_convert(self):
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_construction_with_mixed(self, float_string_frame, using_infer_string):
# test construction edge cases with mixed types
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume something got lost in refactor, but the code removed here is not actually being tested. Both the result and expected variables are getting shadowed before they are compared


# f7u12, this does not work without extensive workaround
data = [
[datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)],
[datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)],
]
df = DataFrame(data)

# check dtypes
result = df.dtypes
expected = Series({"datetime64[us]": 3})

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would adding a tm.assertsimething pass here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No - adding tm.assert_series_equal(result, expected) doesn't work on main irrespective of the string data type work.

The data provided is 3 dimensional so I'm not sure why the expected Series is just one element, unless it meant to do list multiplication instead of creating a dictionary

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm happy to make that change if we want the list multiplication. I just can't figure out why its here in the first place though, so figured a good removal candidate

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The data provided is 3 dimensional

The data is 2 dimensional, so it creates the dataframe just fine?

In [12]:         data = [
    ...:             [datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)],
    ...:             [datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)],
    ...:         ]
    ...:         df = DataFrame(data)

In [13]: df
Out[13]: 
           0          1          2
0 2001-01-05        NaT 2001-01-02
1 2000-01-02 2000-01-03 2000-01-01

In [14]: df.dtypes
Out[14]: 
0    datetime64[us]
1    datetime64[us]
2    datetime64[us]
dtype: object

But indeed the series should have three elements.

And agreed that it looks like out of place to being tested here, I am fine with removing this

# mixed-type frames
float_string_frame["datetime"] = datetime.now()
float_string_frame["timedelta"] = timedelta(days=1, seconds=1)
Expand All @@ -196,13 +180,11 @@ def test_construction_with_mixed(self, float_string_frame, using_infer_string):
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_construction_with_conversions(self):
# convert from a numpy array of non-ns timedelta64; as of 2.0 this does
# *not* convert
arr = np.array([1, 2, 3], dtype="timedelta64[s]")
df = DataFrame(index=range(3))
df["A"] = arr
df = DataFrame({"A": arr})
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The failures in this module go back to the discussion in #60338

I don't think it is important for these tests to use that construction pattern

expected = DataFrame(
{"A": pd.timedelta_range("00:00:01", periods=3, freq="s")}, index=range(3)
)
Expand All @@ -220,11 +202,11 @@ def test_construction_with_conversions(self):
assert expected.dtypes["dt1"] == "M8[s]"
assert expected.dtypes["dt2"] == "M8[s]"

df = DataFrame(index=range(3))
df["dt1"] = np.datetime64("2013-01-01")
df["dt2"] = np.array(
dt1 = np.datetime64("2013-01-01")
dt2 = np.array(
["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]"
)
df = DataFrame({"dt1": dt1, "dt2": dt2})

# df['dt3'] = np.array(['2013-01-01 00:00:01','2013-01-01
# 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]')
Expand Down Expand Up @@ -401,14 +383,16 @@ def test_update_inplace_sets_valid_block_values():
assert isinstance(df._mgr.blocks[0].values, Categorical)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_nonconsolidated_item_cache_take():
# https://github.com/pandas-dev/pandas/issues/35521

# create non-consolidated dataframe with object dtype columns
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this still non-consolidated?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah nice catch - let me fix that

df = DataFrame()
df["col1"] = Series(["a"], dtype=object)
df["col2"] = Series([0], dtype=object)
df = DataFrame(
{
"col1": Series(["a"], dtype=object),
"col2": Series([0], dtype=object),
}
)

# access column (item cache)
df["col1"] == "A"
Expand Down
Loading