-
Notifications
You must be signed in to change notification settings - Fork 86
Open
Description
When running pandas.test.Dockerfile which runs Unit test cases for pandas experiencing below errors which could be an issue with pyarrow :
__________ test_from_arrow_respecting_given_dtype_unsafe ___________
def test_from_arrow_respecting_given_dtype_unsafe():
array = pa.array([1.5, 2.5], type=pa.float64())
with pytest.raises(pa.ArrowInvalid, match="Float value 1.5 was truncated"):
> array.to_pandas(types_mapper={pa.float64(): ArrowDtype(pa.int64())}.get)
pandas/tests/extension/test_arrow.py:1704:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pyarrow/array.pxi:889: in pyarrow.lib.PandasConvertible.to_pandas
return self._to_pandas(options, categories=categories,
pyarrow/array.pxi:1565: in pyarrow.lib.Array._to_pandas
return _array_like_to_pandas(self, options, types_mapper=types_mapper)
pyarrow/array.pxi:2109: in pyarrow.lib._array_like_to_pandas
arr = dtype.from_arrow(obj)
pandas/core/dtypes/dtypes.py:2347: in from_arrow
arr = array.cast(self.pyarrow_dtype, safe=True)
pyarrow/array.pxi:1004: in pyarrow.lib.Array.cast
return _pc().cast(self, target_type, safe=safe,
pyarrow/compute.py:405: in cast
return call_function("cast", [arr], options, memory_pool)
pyarrow/_compute.pyx:598: in pyarrow._compute.call_function
return func.call(args, options=options, memory_pool=memory_pool,
pyarrow/_compute.pyx:393: in pyarrow._compute.Function.call
result = GetResultValue(
pyarrow/error.pxi:155: in pyarrow.lib.pyarrow_internal_check_status
return check_status(status) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> raise convert_status(status)
E pyarrow.lib.ArrowInvalid: Float value 1.500000 was truncated converting to int64
pyarrow/error.pxi:92: ArrowInvalid
____________ TestParquetPyArrow.test_roundtrip_decimal _____________
self = <pandas.tests.io.test_parquet.TestParquetPyArrow object at 0x7fdfbd3f6150>
tmp_path = PosixPath('/tmp/pytest-of-sagemaker-user/pytest-0/test_roundtrip_decimal0')
pa = <module 'pyarrow' from '/opt/conda/lib/python3.12/site-packages/pyarrow/init.py'>
@pytest.mark.skipif(pa_version_under11p0, reason="not supported before 11.0")
def test_roundtrip_decimal(self, tmp_path, pa):
# GH#54768
import pyarrow as pa
path = tmp_path / "decimal.p"
df = pd.DataFrame({"a": [Decimal("123.00")]}, dtype="string[pyarrow]")
df.to_parquet(path, schema=pa.schema([("a", pa.decimal128(5))]))
result = read_parquet(path)
expected = pd.DataFrame({"a": ["123"]}, dtype="string[python]")
> tm.assert_frame_equal(result, expected)
E AssertionError: Attributes of DataFrame.iloc[:, 0] (column name="a") are different
E
E Attribute "dtype" are different
E [left]: object
E [right]: string[python]
Metadata
Metadata
Assignees
Labels
No labels