Skip to content

Pandas unit test broken from v3.0.0 #599

@bhavya2109sharma

Description

@bhavya2109sharma

When running pandas.test.Dockerfile which runs Unit test cases for pandas experiencing below errors which could be an issue with pyarrow :

__________ test_from_arrow_respecting_given_dtype_unsafe ___________

    def test_from_arrow_respecting_given_dtype_unsafe():
        array = pa.array([1.5, 2.5], type=pa.float64())
        with pytest.raises(pa.ArrowInvalid, match="Float value 1.5 was truncated"):
>           array.to_pandas(types_mapper={pa.float64(): ArrowDtype(pa.int64())}.get)

pandas/tests/extension/test_arrow.py:1704: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
pyarrow/array.pxi:889: in pyarrow.lib.PandasConvertible.to_pandas
    return self._to_pandas(options, categories=categories,
pyarrow/array.pxi:1565: in pyarrow.lib.Array._to_pandas
    return _array_like_to_pandas(self, options, types_mapper=types_mapper)
pyarrow/array.pxi:2109: in pyarrow.lib._array_like_to_pandas
    arr = dtype.from_arrow(obj)
pandas/core/dtypes/dtypes.py:2347: in from_arrow
    arr = array.cast(self.pyarrow_dtype, safe=True)
pyarrow/array.pxi:1004: in pyarrow.lib.Array.cast
    return _pc().cast(self, target_type, safe=safe,
pyarrow/compute.py:405: in cast
    return call_function("cast", [arr], options, memory_pool)
pyarrow/_compute.pyx:598: in pyarrow._compute.call_function
    return func.call(args, options=options, memory_pool=memory_pool,
pyarrow/_compute.pyx:393: in pyarrow._compute.Function.call
    result = GetResultValue(
pyarrow/error.pxi:155: in pyarrow.lib.pyarrow_internal_check_status
    return check_status(status) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

>   raise convert_status(status)
E   pyarrow.lib.ArrowInvalid: Float value 1.500000 was truncated converting to int64

pyarrow/error.pxi:92: ArrowInvalid
____________ TestParquetPyArrow.test_roundtrip_decimal _____________

self = <pandas.tests.io.test_parquet.TestParquetPyArrow object at 0x7fdfbd3f6150>
tmp_path = PosixPath('/tmp/pytest-of-sagemaker-user/pytest-0/test_roundtrip_decimal0')
pa = <module 'pyarrow' from '/opt/conda/lib/python3.12/site-packages/pyarrow/init.py'>

    @pytest.mark.skipif(pa_version_under11p0, reason="not supported before 11.0")
    def test_roundtrip_decimal(self, tmp_path, pa):
        # GH#54768
        import pyarrow as pa
        path = tmp_path / "decimal.p"
        df = pd.DataFrame({"a": [Decimal("123.00")]}, dtype="string[pyarrow]")
        df.to_parquet(path, schema=pa.schema([("a", pa.decimal128(5))]))
        result = read_parquet(path)
        expected = pd.DataFrame({"a": ["123"]}, dtype="string[python]")
>       tm.assert_frame_equal(result, expected)
E       AssertionError: Attributes of DataFrame.iloc[:, 0] (column name="a") are different
E       
E       Attribute "dtype" are different
E       [left]:  object
E       [right]: string[python]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions