Skip to content

BUG: Test failures with 2.2.3 on Fedora Rawhide #60589

@opoplawski

Description

@opoplawski

Pandas version checks

  • I have checked that this issue has not already been reported.

  • I have confirmed this bug exists on the latest version of pandas.

  • I have confirmed this bug exists on the main branch of pandas.

Reproducible Example

============================= test session starts ==============================
platform linux -- Python 3.13.1, pytest-8.3.4, pluggy-1.5.0 -- /usr/bin/python3
cachedir: /builddir/build/BUILD/python-pandas-2.2.3-build/pandas-2.2.3/_empty/pytest-cache
hypothesis profile 'ci' -> deadline=None, suppress_health_check=[HealthCheck.too_slow, HealthCheck.differing_executors], database=DirectoryBasedExampleDatabase(PosixPath('/builddir/build/BUILD/python-pandas-2.2.3-build/pandas-2.2.3/_empty/.hypothesis/examples'))
rootdir: /builddir/build/BUILD/python-pandas-2.2.3-build/BUILDROOT/usr/lib64/python3.13/site-packages/pandas
configfile: pyproject.toml
plugins: asyncio-0.24.0, xdist-3.6.1, hypothesis-6.104.2
asyncio: mode=Mode.STRICT, default_loop_scope=None
created: 1/1 worker
=================================== FAILURES ===================================
____________________ test_array_inference[data7-expected7] _____________________
[gw0] linux -- Python 3.13.1 /usr/bin/python3
data = [datetime.datetime(2000, 1, 1, 0, 0, tzinfo=<DstTzInfo 'CET' LMT+0:18:00 STD>), datetime.datetime(2001, 1, 1, 0, 0, tzinfo=<DstTzInfo 'CET' LMT+0:18:00 STD>)]
expected = <DatetimeArray>
['2000-01-01 00:00:00+01:00', '2001-01-01 00:00:00+01:00']
Length: 2, dtype: datetime64[ns, CET]
    @pytest.mark.parametrize(
        "data, expected",
        [
            # period
            (
                [pd.Period("2000", "D"), pd.Period("2001", "D")],
                period_array(["2000", "2001"], freq="D"),
            ),
            # interval
            ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2])),
            # datetime
            (
                [pd.Timestamp("2000"), pd.Timestamp("2001")],
                DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
            ),
            (
                [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
                DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
            ),
            (
                np.array([1, 2], dtype="M8[ns]"),
                DatetimeArray._from_sequence(np.array([1, 2], dtype="M8[ns]")),
            ),
            (
                np.array([1, 2], dtype="M8[us]"),
                DatetimeArray._simple_new(
                    np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]")
                ),
            ),
            # datetimetz
            (
                [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
                DatetimeArray._from_sequence(
                    ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="ns")
                ),
            ),
            (
                [
                    datetime.datetime(2000, 1, 1, tzinfo=cet),
                    datetime.datetime(2001, 1, 1, tzinfo=cet),
                ],
                DatetimeArray._from_sequence(
                    ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="ns")
                ),
            ),
            # timedelta
            (
                [pd.Timedelta("1h"), pd.Timedelta("2h")],
                TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
            ),
            (
                np.array([1, 2], dtype="m8[ns]"),
                TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
            ),
            (
                np.array([1, 2], dtype="m8[us]"),
                TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
            ),
            # integer
            ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
            ([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")),
            ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")),
            ([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")),
            # float
            ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")),
            ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
            ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
            ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
            # integer-like float
            ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
            ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
            ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
            ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
            # mixed-integer-float
            ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
            (
                [1, np.nan, 2.0],
                FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"),
            ),
            # string
            (
                ["a", "b"],
                pd.StringDtype()
                .construct_array_type()
                ._from_sequence(["a", "b"], dtype=pd.StringDtype()),
            ),
            (
                ["a", None],
                pd.StringDtype()
                .construct_array_type()
                ._from_sequence(["a", None], dtype=pd.StringDtype()),
            ),
            # Boolean
            ([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")),
            ([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")),
        ],
    )
    def test_array_inference(data, expected):
        result = pd.array(data)
>       tm.assert_equal(result, expected)
../../BUILDROOT/usr/lib64/python3.13/site-packages/pandas/tests/arrays/test_array.py:377: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
left = array(['1999-12-31T23:42:00.000000000', '2000-12-31T23:42:00.000000000'],
      dtype='datetime64[ns]')
right = array(['1999-12-31T23:00:00.000000000', '2000-12-31T23:00:00.000000000'],
      dtype='datetime64[ns]')
err_msg = None
    def _raise(left, right, err_msg) -> NoReturn:
        if err_msg is None:
            if left.shape != right.shape:
                raise_assert_detail(
                    obj, f"{obj} shapes are different", left.shape, right.shape
                )
    
            diff = 0
            for left_arr, right_arr in zip(left, right):
                # count up differences
                if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
                    diff += 1
    
            diff = diff * 100.0 / left.size
            msg = f"{obj} values are different ({np.round(diff, 5)} %)"
>           raise_assert_detail(obj, msg, left, right, index_values=index_values)
E           AssertionError: DatetimeArray._ndarray are different
E           
E           DatetimeArray._ndarray values are different (100.0 %)
E           [left]:  [1999-12-31T23:42:00.000000000, 2000-12-31T23:42:00.000000000]
E           [right]: [1999-12-31T23:00:00.000000000, 2000-12-31T23:00:00.000000000]
../../BUILDROOT/usr/lib64/python3.13/site-packages/pandas/_testing/asserters.py:684: AssertionError

Full build log with more test failures is here: https://kojipkgs.fedoraproject.org//work/tasks/9043/126999043/build.log



### Issue Description

We are updating Fedora to pandas 2.2.3 and numpy 2.0.5 but are getting test failures.

### Expected Behavior

No test failures

### Installed Versions

<details>

2.2.3

</details>

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions