|
7 | 7 | from typing import ( |
8 | 8 | TYPE_CHECKING, |
9 | 9 | ContextManager, |
10 | | - cast, |
11 | 10 | ) |
12 | 11 |
|
13 | 12 | import numpy as np |
|
21 | 20 |
|
22 | 21 | from pandas.compat import pa_version_under10p1 |
23 | 22 |
|
24 | | -from pandas.core.dtypes.common import is_string_dtype |
25 | | - |
26 | 23 | import pandas as pd |
27 | 24 | from pandas import ( |
28 | 25 | ArrowDtype, |
|
77 | 74 | with_csv_dialect, |
78 | 75 | ) |
79 | 76 | from pandas.core.arrays import ( |
| 77 | + ArrowExtensionArray, |
80 | 78 | BaseMaskedArray, |
81 | | - ExtensionArray, |
82 | 79 | NumpyExtensionArray, |
83 | 80 | ) |
84 | 81 | from pandas.core.arrays._mixins import NDArrayBackedExtensionArray |
|
92 | 89 | NpDtype, |
93 | 90 | ) |
94 | 91 |
|
95 | | - from pandas.core.arrays import ArrowExtensionArray |
96 | 92 |
|
97 | 93 | UNSIGNED_INT_NUMPY_DTYPES: list[NpDtype] = ["uint8", "uint16", "uint32", "uint64"] |
98 | 94 | UNSIGNED_INT_EA_DTYPES: list[Dtype] = ["UInt8", "UInt16", "UInt32", "UInt64"] |
@@ -512,24 +508,18 @@ def shares_memory(left, right) -> bool: |
512 | 508 | if isinstance(left, pd.core.arrays.IntervalArray): |
513 | 509 | return shares_memory(left._left, right) or shares_memory(left._right, right) |
514 | 510 |
|
515 | | - if ( |
516 | | - isinstance(left, ExtensionArray) |
517 | | - and is_string_dtype(left.dtype) |
518 | | - and left.dtype.storage == "pyarrow" # type: ignore[attr-defined] |
519 | | - ): |
520 | | - # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 |
521 | | - left = cast("ArrowExtensionArray", left) |
522 | | - if ( |
523 | | - isinstance(right, ExtensionArray) |
524 | | - and is_string_dtype(right.dtype) |
525 | | - and right.dtype.storage == "pyarrow" # type: ignore[attr-defined] |
526 | | - ): |
527 | | - right = cast("ArrowExtensionArray", right) |
| 511 | + if isinstance(left, ArrowExtensionArray): |
| 512 | + if isinstance(right, ArrowExtensionArray): |
| 513 | + # https://github.com/pandas-dev/pandas/pull/43930#discussion_r736862669 |
528 | 514 | left_pa_data = left._pa_array |
529 | 515 | right_pa_data = right._pa_array |
530 | 516 | left_buf1 = left_pa_data.chunk(0).buffers()[1] |
531 | 517 | right_buf1 = right_pa_data.chunk(0).buffers()[1] |
532 | | - return left_buf1 == right_buf1 |
| 518 | + return left_buf1.address == right_buf1.address |
| 519 | + else: |
| 520 | + # if we have one one ArrowExtensionArray and one other array, assume |
| 521 | + # they can only share memory if they share the same numpy buffer |
| 522 | + return np.shares_memory(left, right) |
533 | 523 |
|
534 | 524 | if isinstance(left, BaseMaskedArray) and isinstance(right, BaseMaskedArray): |
535 | 525 | # By convention, we'll say these share memory if they share *either* |
|
0 commit comments