@@ -349,3 +349,116 @@ def array_to_datetime(array: Sequence[Any] | np.ndarray) -> np.ndarray:
349
349
'2018-01-01T00:00:00.000000'], dtype='datetime64[us]')
350
350
"""
351
351
return np .asarray (array , dtype = np .datetime64 )
352
+
353
+
354
+ def _array_dtypes (array : Any ) -> tuple [str , str ]:
355
+ """
356
+ Get the dtypes of an array-like object and the numpy array after applying the
357
+ np.ascontiguousarray function.
358
+
359
+ For the input array-like object, the function checks the "dtype" (for NumPy and
360
+ Pandas objects) or "type" (for PyArrow objects) property to determine the dtype. If
361
+ both of these properties are not found (e.g., a list), the dtype is set to an empty
362
+ string. Then the function applies the np.ascontiguousarray function to the input
363
+ object and determine the dtype of the converted np.ndarray object.
364
+
365
+ The function returns a tuple of the two dtypes. If the output dtype is "object", it
366
+ means np.ascontiguousarray has failed to convert the input object to a NumPy dtype
367
+ that can be recognized by the GMT C API, and we have to maintain a mapping from the
368
+ input dtype to the expected output dtype (e.g., for a panda.Series with
369
+ ``dtype="string[python]"``, the output is "object", and we need to have a dtype
370
+ mapping from ``string`` to ``np.str_``).
371
+
372
+ This function is not used anywhere in the project. Instead, similar codes are used
373
+ in the ``vectors_to_arrays`` function. This function is kept for understand the
374
+ dtype's string representation of different array-like objects and what dtype they
375
+ are converted to by NumPy. This function is kept for understanding the dtype
376
+ conversion process and for testing purposes, since some of the dtypes may change in
377
+ the future (e.g., pandas.StringDtype is still an experimental feature).
378
+
379
+ Parameters
380
+ ----------
381
+ array
382
+ The array-like object to be checked.
383
+
384
+ Returns
385
+ -------
386
+ dtype
387
+ The data type of the array-like object.
388
+
389
+ Examples
390
+ --------
391
+ >>> import datetime
392
+ >>> import numpy as np
393
+ >>> import pandas as pd
394
+ >>> datetimes = [datetime.date(2021, 1, 1), datetime.date(2022, 1, 1)]
395
+
396
+ For Python built-in types:
397
+ >>> _array_dtypes([1, 2, 3])
398
+ ('', 'int64')
399
+ >>> _array_dtypes([1.0, 2.0, 3.0])
400
+ ('', 'float64')
401
+ >>> _array_dtypes(["a", "b", "c"])
402
+ ('', '<U1')
403
+
404
+ For NumPy arrays:
405
+
406
+ >>> _array_dtypes(np.array([1, 2, 3]))
407
+ ('int64', 'int64')
408
+ >>> _array_dtypes(np.array([1.0, 2.0, 3.0]))
409
+ ('float64', 'float64')
410
+ >>> _array_dtypes(np.datetime64("2021-01-01"))
411
+ ('datetime64[D]', 'datetime64[D]')
412
+
413
+ For Pandas objects:
414
+
415
+ >>> _array_dtypes(pd.Series(data=[1, 2, 3]))
416
+ ('int64', 'int64')
417
+ >>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0]))
418
+ ('float64', 'float64')
419
+ >>> _array_dtypes(pd.Series(data=[1, 2, 3], dtype=pd.Int32Dtype()))
420
+ ('Int32', 'int32')
421
+ >>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0], dtype=pd.Float32Dtype()))
422
+ ('Float32', 'float32')
423
+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"]))
424
+ ('object', 'object')
425
+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[python]"))
426
+ ('string', 'object')
427
+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[pyarrow]"))
428
+ ('string', 'object')
429
+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="datetime64[ns]"))
430
+ ('datetime64[ns]', 'datetime64[ns]')
431
+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="date32[day][pyarrow]"))
432
+ ('date32[day][pyarrow]', 'object')
433
+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="date64[ms][pyarrow]"))
434
+ ('date64[ms][pyarrow]', 'object')
435
+
436
+ For PyArrow objects:
437
+
438
+ >>> import pytest
439
+ >>> pa = pytest.importorskip("pyarrow")
440
+ >>> _array_dtypes(pa.array([1, 2, 3]))
441
+ ('int64', 'int64')
442
+ >>> _array_dtypes(pa.array([1.0, 2.0, 3.0]))
443
+ ('double', 'float64')
444
+ >>> _array_dtypes(pa.array([1, 2, 3], type=pa.int32()))
445
+ ('int32', 'int32')
446
+ >>> _array_dtypes(pa.array([1.0, 2.0, 3.0], type=pa.float32()))
447
+ ('float', 'float32')
448
+ >>> _array_dtypes(pa.array(["a", "b", "c"]))
449
+ ('string', 'object')
450
+ >>> _array_dtypes(pa.array(datetimes, type=pa.date32()))
451
+ ('date32[day]', 'datetime64[D]')
452
+ >>> _array_dtypes(pa.array(datetimes, type=pa.date64()))
453
+ ('date64[ms]', 'datetime64[ms]')
454
+ """
455
+
456
+ def _get_dtype (array ):
457
+ """
458
+ Get the data type of the array-like object.
459
+ """
460
+ return str (getattr (array , "dtype" , getattr (array , "type" , "" )))
461
+
462
+ dtype_in = _get_dtype (array )
463
+ dtype_out = str (np .ascontiguousarray (array ).dtype )
464
+ return dtype_in , dtype_out
0 commit comments