Skip to content

Commit f93dc66

Browse files
committed
Try to manage date with pyarrow
1 parent e94997a commit f93dc66

File tree

3 files changed

+61
-21
lines changed

3 files changed

+61
-21
lines changed

pandas/_libs/lib.pyx

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,6 +1300,7 @@ cdef class Seen:
13001300
bint object_ # seen_object
13011301
bint complex_ # seen_complex
13021302
bint datetime_ # seen_datetime
1303+
bint date_ # seen_date
13031304
bint coerce_numeric # coerce data to numeric
13041305
bint timedelta_ # seen_timedelta
13051306
bint datetimetz_ # seen_datetimetz
@@ -1328,6 +1329,7 @@ cdef class Seen:
13281329
self.object_ = False
13291330
self.complex_ = False
13301331
self.datetime_ = False
1332+
self.date_ = False
13311333
self.timedelta_ = False
13321334
self.datetimetz_ = False
13331335
self.period_ = False
@@ -2613,6 +2615,13 @@ def maybe_convert_objects(ndarray[object] objects,
26132615
else:
26142616
seen.object_ = True
26152617
break
2618+
elif PyDate_Check(val):
2619+
if convert_non_numeric:
2620+
seen.date_ = True
2621+
break
2622+
else:
2623+
seen.object_ = True
2624+
break
26162625
elif is_period_object(val):
26172626
if convert_non_numeric:
26182627
seen.period_ = True
@@ -2656,21 +2665,46 @@ def maybe_convert_objects(ndarray[object] objects,
26562665

26572666
# we try to coerce datetime w/tz but must all have the same tz
26582667
if seen.datetimetz_:
2659-
if is_datetime_with_singletz_array(objects):
2660-
from pandas import DatetimeIndex
2668+
if storage == "pyarrow":
2669+
from pandas.core.dtypes.dtypes import ArrowDtype
26612670

2662-
try:
2663-
dti = DatetimeIndex(objects)
2664-
except OutOfBoundsDatetime:
2665-
# e.g. test_to_datetime_cache_coerce_50_lines_outofbounds
2666-
pass
2671+
if isinstance(val, datetime):
2672+
objects[mask] = None
26672673
else:
2668-
# unbox to DatetimeArray
2669-
return dti._data
2670-
seen.object_ = True
2674+
objects[mask] = np.datetime64("NaT")
2675+
datetime64_array = objects.astype(val.dtype)
2676+
pa_array = pa.array(datetime64_array)
2677+
dtype = ArrowDtype(pa_array.type)
2678+
return dtype.construct_array_type()._from_sequence(pa_array, dtype=dtype)
2679+
2680+
else:
2681+
if is_datetime_with_singletz_array(objects):
2682+
from pandas import DatetimeIndex
2683+
2684+
try:
2685+
dti = DatetimeIndex(objects)
2686+
except OutOfBoundsDatetime:
2687+
# e.g. test_to_datetime_cache_coerce_50_lines_outofbounds
2688+
pass
2689+
else:
2690+
# unbox to DatetimeArray
2691+
return dti._data
2692+
seen.object_ = True
26712693

26722694
elif seen.datetime_:
2673-
if is_datetime_or_datetime64_array(objects):
2695+
if storage == "pyarrow":
2696+
from pandas.core.dtypes.dtypes import ArrowDtype
2697+
2698+
if isinstance(val, datetime):
2699+
objects[mask] = None
2700+
else:
2701+
objects[mask] = np.datetime64("NaT")
2702+
datetime64_array = objects.astype(val.dtype)
2703+
pa_array = pa.array(datetime64_array)
2704+
dtype = ArrowDtype(pa_array.type)
2705+
return dtype.construct_array_type()._from_sequence(pa_array, dtype=dtype)
2706+
2707+
elif is_datetime_or_datetime64_array(objects):
26742708
from pandas import DatetimeIndex
26752709

26762710
try:
@@ -2682,6 +2716,16 @@ def maybe_convert_objects(ndarray[object] objects,
26822716
return dti._data._ndarray
26832717
seen.object_ = True
26842718

2719+
elif seen.date_:
2720+
if storage == "pyarrow":
2721+
2722+
from pandas.core.dtypes.dtypes import ArrowDtype
2723+
2724+
objects[mask] = None
2725+
pa_array = pa.array(objects)
2726+
dtype = ArrowDtype(pa_array.type)
2727+
return dtype.construct_array_type()._from_sequence(pa_array, dtype=dtype)
2728+
26852729
elif seen.timedelta_:
26862730
if is_timedelta_or_timedelta64_array(objects):
26872731
from pandas import TimedeltaIndex
@@ -2914,32 +2958,30 @@ def map_infer_mask(
29142958

29152959
ndarray result = np.empty(n, dtype=dtype)
29162960

2917-
flatiter arr_it = PyArray_IterNew(arr)
29182961
flatiter result_it = PyArray_IterNew(result)
29192962

29202963
for i in range(n):
29212964
if mask[i]:
29222965
if na_value is no_default:
2923-
val = PyArray_GETITEM(arr, PyArray_ITER_DATA(arr_it))
2966+
val = arr[i]
29242967
else:
29252968
val = na_value
29262969
else:
2927-
val = PyArray_GETITEM(arr, PyArray_ITER_DATA(arr_it))
2970+
val = arr[i]
29282971
val = f(val)
29292972

29302973
if cnp.PyArray_IsZeroDim(val):
29312974
# unbox 0-dim arrays, GH#690
29322975
val = val.item()
29332976

29342977
PyArray_SETITEM(result, PyArray_ITER_DATA(result_it), val)
2935-
2936-
PyArray_ITER_NEXT(arr_it)
29372978
PyArray_ITER_NEXT(result_it)
29382979

29392980
if convert:
29402981
return maybe_convert_objects(
29412982
result,
29422983
convert_to_nullable_dtype=convert_to_nullable_dtype,
2984+
convert_non_numeric=True,
29432985
storage=storage,
29442986
)
29452987
else:

pandas/core/algorithms.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,12 +1703,10 @@ def map_array(
17031703
arr = cast("ExtensionArray", arr)
17041704
arr_dtype = arr.dtype.__repr__()
17051705
if "pyarrow" in arr_dtype:
1706-
if any(
1707-
time_type in arr_dtype for time_type in ["date", "time", "duration"]
1708-
):
1706+
if any(time_type in arr_dtype for time_type in ["duration"]):
17091707
values = arr.astype(object, copy=False)
17101708
else:
1711-
values = arr._pa_array.to_numpy()
1709+
values = np.asarray(arr)
17121710
storage = "pyarrow"
17131711
else:
17141712
values = np.asarray(arr)

pandas/tests/extension/test_arrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def test_map(self, data_missing, na_action):
279279
if data_missing.dtype.kind in "mM":
280280
result = data_missing.map(lambda x: x, na_action=na_action)
281281
expected = data_missing
282-
tm.assert_extension_array_equal(result, expected, check_dtype=False)
282+
tm.assert_extension_array_equal(result, expected)
283283
else:
284284
result = data_missing.map(lambda x: x, na_action=na_action)
285285
expected = data_missing

0 commit comments

Comments
 (0)