@@ -27,7 +27,8 @@ cdef extern from "<variant>" namespace "std":
2727 T get[T](...)
2828
2929cdef _sequence_to_array(object sequence, object mask, object size,
30- DataType type , CMemoryPool* pool, c_bool from_pandas):
30+ DataType type , CMemoryPool* pool, c_bool from_pandas,
31+ bint truncate_date64_time):
3132 cdef:
3233 int64_t c_size
3334 PyConversionOptions options
@@ -41,6 +42,7 @@ cdef _sequence_to_array(object sequence, object mask, object size,
4142
4243 options.from_pandas = from_pandas
4344 options.ignore_timezone = os.environ.get(' PYARROW_IGNORE_TIMEZONE' , False )
45+ options.truncate_date64_time = truncate_date64_time
4446
4547 with nogil:
4648 chunked = GetResultValue(
@@ -81,15 +83,16 @@ cdef shared_ptr[CDataType] _ndarray_to_type(object values,
8183
8284
8385cdef _ndarray_to_array(object values, object mask, DataType type ,
84- c_bool from_pandas, c_bool safe, CMemoryPool* pool):
86+ c_bool from_pandas, c_bool safe, CMemoryPool* pool,
87+ bint truncate_date64_time):
8588 cdef:
8689 shared_ptr[CChunkedArray] chunked_out
8790 shared_ptr[CDataType] c_type = _ndarray_to_type(values, type )
8891 CCastOptions cast_options = CCastOptions(safe)
8992
9093 with nogil:
9194 check_status(NdarrayToArrow(pool, values, mask, from_pandas,
92- c_type, cast_options, & chunked_out))
95+ c_type, cast_options, truncate_date64_time, & chunked_out))
9396
9497 if chunked_out.get().num_chunks() > 1 :
9598 return pyarrow_wrap_chunked_array(chunked_out)
@@ -127,7 +130,7 @@ def _handle_arrow_array_protocol(obj, type, mask, size):
127130
128131
129132def array (object obj , type = None , mask = None , size = None , from_pandas = None ,
130- bint safe = True , MemoryPool memory_pool = None ):
133+ bint safe = True , MemoryPool memory_pool = None , bint truncate_date64_time = True ):
131134 """
132135 Create pyarrow.Array instance from a Python object.
133136
@@ -162,6 +165,10 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
162165 memory_pool : pyarrow.MemoryPool, optional
163166 If not passed, will allocate memory from the currently-set default
164167 memory pool.
168+ truncate_date64_time : bool, default True
169+ If True (default), truncate intraday milliseconds when converting Python
170+ datetime objects to date64.
171+ If False, preserve the full datetime including time components.
165172
166173 Returns
167174 -------
@@ -313,7 +320,8 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
313320 elif (pandas_api.is_categorical(values) and
314321 type is not None and type .id != Type_DICTIONARY):
315322 result = _ndarray_to_array(
316- np.asarray(values), mask, type , c_from_pandas, safe, pool
323+ np.asarray(
324+ values), mask, type , c_from_pandas, safe, pool, truncate_date64_time
317325 )
318326 elif pandas_api.is_categorical(values):
319327 if type is not None :
@@ -358,21 +366,22 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
358366 values, obj.dtype, type )
359367 if type and type .id == _Type_RUN_END_ENCODED:
360368 arr = _ndarray_to_array(
361- values, mask, type .value_type, c_from_pandas, safe, pool)
369+ values, mask, type .value_type, c_from_pandas, safe, pool, truncate_date64_time )
362370 result = _pc().run_end_encode(arr, run_end_type = type .run_end_type,
363371 memory_pool = memory_pool)
364372 else :
365373 result = _ndarray_to_array(values, mask, type , c_from_pandas, safe,
366- pool)
374+ pool, truncate_date64_time )
367375 else :
368376 if type and type .id == _Type_RUN_END_ENCODED:
369377 arr = _sequence_to_array(
370- obj, mask, size, type .value_type, pool, from_pandas)
378+ obj, mask, size, type .value_type, pool, from_pandas, truncate_date64_time )
371379 result = _pc().run_end_encode(arr, run_end_type = type .run_end_type,
372380 memory_pool = memory_pool)
373381 # ConvertPySequence does strict conversion if type is explicitly passed
374382 else :
375- result = _sequence_to_array(obj, mask, size, type , pool, c_from_pandas)
383+ result = _sequence_to_array(
384+ obj, mask, size, type , pool, c_from_pandas, truncate_date64_time)
376385
377386 if extension_type is not None :
378387 result = ExtensionArray.from_storage(extension_type, result)
@@ -880,7 +889,8 @@ cdef class _PandasConvertible(_Weakrefable):
880889 bint self_destruct = False ,
881890 str maps_as_pydicts = None ,
882891 types_mapper = None ,
883- bint coerce_temporal_nanoseconds = False
892+ bint coerce_temporal_nanoseconds = False ,
893+ bint truncate_date64_time = False
884894 ):
885895 """
886896 Convert to a pandas-compatible NumPy array or DataFrame, as appropriate
@@ -965,6 +975,10 @@ cdef class _PandasConvertible(_Weakrefable):
965975 default behavior in pandas version 1.x. Set this option to True if
966976 you'd like to use this coercion when using pandas version >= 2.0
967977 for backwards compatibility (not recommended otherwise).
978+ truncate_date64_time : bool, default False
979+ If True, truncate intraday milliseconds when converting date64 to pandas
980+ datetime.
981+ If False (default), preserve the full datetime including time components.
968982
969983 Returns
970984 -------
@@ -1041,6 +1055,7 @@ cdef class _PandasConvertible(_Weakrefable):
10411055 split_blocks = split_blocks,
10421056 self_destruct = self_destruct,
10431057 maps_as_pydicts = maps_as_pydicts,
1058+ truncate_date64_time = truncate_date64_time,
10441059 coerce_temporal_nanoseconds = coerce_temporal_nanoseconds
10451060 )
10461061 return self ._to_pandas(options, categories = categories,
@@ -1063,6 +1078,7 @@ cdef PandasOptions _convert_pandas_options(dict options):
10631078 result.self_destruct = options[' self_destruct' ]
10641079 result.coerce_temporal_nanoseconds = options[' coerce_temporal_nanoseconds' ]
10651080 result.ignore_timezone = os.environ.get(' PYARROW_IGNORE_TIMEZONE' , False )
1081+ result.truncate_date64_time = options[' truncate_date64_time' ]
10661082
10671083 maps_as_pydicts = options[' maps_as_pydicts' ]
10681084 if maps_as_pydicts is None :
0 commit comments