@@ -1300,6 +1300,7 @@ cdef class Seen:
1300
1300
bint object_ # seen_object
1301
1301
bint complex_ # seen_complex
1302
1302
bint datetime_ # seen_datetime
1303
+ bint date_ # seen_date
1303
1304
bint coerce_numeric # coerce data to numeric
1304
1305
bint timedelta_ # seen_timedelta
1305
1306
bint datetimetz_ # seen_datetimetz
@@ -1328,6 +1329,7 @@ cdef class Seen:
1328
1329
self .object_ = False
1329
1330
self .complex_ = False
1330
1331
self .datetime_ = False
1332
+ self .date_ = False
1331
1333
self .timedelta_ = False
1332
1334
self .datetimetz_ = False
1333
1335
self .period_ = False
@@ -2613,6 +2615,13 @@ def maybe_convert_objects(ndarray[object] objects,
2613
2615
else :
2614
2616
seen.object_ = True
2615
2617
break
2618
+ elif PyDate_Check(val):
2619
+ if convert_non_numeric:
2620
+ seen.date_ = True
2621
+ break
2622
+ else :
2623
+ seen.object_ = True
2624
+ break
2616
2625
elif is_period_object(val):
2617
2626
if convert_non_numeric:
2618
2627
seen.period_ = True
@@ -2656,21 +2665,46 @@ def maybe_convert_objects(ndarray[object] objects,
2656
2665
2657
2666
# we try to coerce datetime w/tz but must all have the same tz
2658
2667
if seen.datetimetz_:
2659
- if is_datetime_with_singletz_array(objects) :
2660
- from pandas import DatetimeIndex
2668
+ if storage == " pyarrow " :
2669
+ from pandas.core.dtypes.dtypes import ArrowDtype
2661
2670
2662
- try :
2663
- dti = DatetimeIndex(objects)
2664
- except OutOfBoundsDatetime:
2665
- # e.g. test_to_datetime_cache_coerce_50_lines_outofbounds
2666
- pass
2671
+ if isinstance (val, datetime):
2672
+ objects[mask] = None
2667
2673
else :
2668
- # unbox to DatetimeArray
2669
- return dti._data
2670
- seen.object_ = True
2674
+ objects[mask] = np.datetime64(" NaT" )
2675
+ datetime64_array = objects.astype(val.dtype)
2676
+ pa_array = pa.array(datetime64_array)
2677
+ dtype = ArrowDtype(pa_array.type)
2678
+ return dtype.construct_array_type()._from_sequence(pa_array, dtype = dtype)
2679
+
2680
+ else :
2681
+ if is_datetime_with_singletz_array(objects):
2682
+ from pandas import DatetimeIndex
2683
+
2684
+ try :
2685
+ dti = DatetimeIndex(objects)
2686
+ except OutOfBoundsDatetime:
2687
+ # e.g. test_to_datetime_cache_coerce_50_lines_outofbounds
2688
+ pass
2689
+ else :
2690
+ # unbox to DatetimeArray
2691
+ return dti._data
2692
+ seen.object_ = True
2671
2693
2672
2694
elif seen.datetime_:
2673
- if is_datetime_or_datetime64_array(objects):
2695
+ if storage == " pyarrow" :
2696
+ from pandas.core.dtypes.dtypes import ArrowDtype
2697
+
2698
+ if isinstance (val, datetime):
2699
+ objects[mask] = None
2700
+ else :
2701
+ objects[mask] = np.datetime64(" NaT" )
2702
+ datetime64_array = objects.astype(val.dtype)
2703
+ pa_array = pa.array(datetime64_array)
2704
+ dtype = ArrowDtype(pa_array.type)
2705
+ return dtype.construct_array_type()._from_sequence(pa_array, dtype = dtype)
2706
+
2707
+ elif is_datetime_or_datetime64_array(objects):
2674
2708
from pandas import DatetimeIndex
2675
2709
2676
2710
try :
@@ -2682,6 +2716,16 @@ def maybe_convert_objects(ndarray[object] objects,
2682
2716
return dti._data._ndarray
2683
2717
seen.object_ = True
2684
2718
2719
+ elif seen.date_:
2720
+ if storage == " pyarrow" :
2721
+
2722
+ from pandas.core.dtypes.dtypes import ArrowDtype
2723
+
2724
+ objects[mask] = None
2725
+ pa_array = pa.array(objects)
2726
+ dtype = ArrowDtype(pa_array.type)
2727
+ return dtype.construct_array_type()._from_sequence(pa_array, dtype = dtype)
2728
+
2685
2729
elif seen.timedelta_:
2686
2730
if is_timedelta_or_timedelta64_array(objects):
2687
2731
from pandas import TimedeltaIndex
@@ -2914,32 +2958,30 @@ def map_infer_mask(
2914
2958
2915
2959
ndarray result = np.empty(n, dtype = dtype)
2916
2960
2917
- flatiter arr_it = PyArray_IterNew(arr)
2918
2961
flatiter result_it = PyArray_IterNew(result)
2919
2962
2920
2963
for i in range(n ):
2921
2964
if mask[i]:
2922
2965
if na_value is no_default:
2923
- val = PyArray_GETITEM( arr, PyArray_ITER_DATA(arr_it))
2966
+ val = arr[i]
2924
2967
else :
2925
2968
val = na_value
2926
2969
else :
2927
- val = PyArray_GETITEM( arr, PyArray_ITER_DATA(arr_it))
2970
+ val = arr[i]
2928
2971
val = f(val)
2929
2972
2930
2973
if cnp.PyArray_IsZeroDim(val):
2931
2974
# unbox 0-dim arrays, GH#690
2932
2975
val = val.item()
2933
2976
2934
2977
PyArray_SETITEM(result, PyArray_ITER_DATA(result_it), val)
2935
-
2936
- PyArray_ITER_NEXT(arr_it)
2937
2978
PyArray_ITER_NEXT(result_it)
2938
2979
2939
2980
if convert:
2940
2981
return maybe_convert_objects(
2941
2982
result,
2942
2983
convert_to_nullable_dtype = convert_to_nullable_dtype,
2984
+ convert_non_numeric = True ,
2943
2985
storage = storage,
2944
2986
)
2945
2987
else :
0 commit comments