@@ -383,3 +383,116 @@ def array_to_datetime(array: Sequence[Any]) -> np.ndarray:
383
383
'2018-01-01T00:00:00.000000'], dtype='datetime64[us]')
384
384
"""
385
385
return np .asarray (array , dtype = np .datetime64 )
386
+
387
+
388
+ def _array_dtypes (array : Any ) -> tuple [str , str ]:
389
+ """
390
+ Get the dtypes of an array-like object and the numpy array after applying the
391
+ np.ascontiguousarray function.
392
+
393
+ For the input array-like object, the function checks the "dtype" (for NumPy and
394
+ Pandas objects) or "type" (for PyArrow objects) property to determine the dtype. If
395
+ both of these properties are not found (e.g., a list), the dtype is set to an empty
396
+ string. Then the function applies the np.ascontiguousarray function to the input
397
+ object and determine the dtype of the converted np.ndarray object.
398
+
399
+ The function returns a tuple of the two dtypes. If the output dtype is "object", it
400
+ means np.ascontiguousarray has failed to convert the input object to a NumPy dtype
401
+ that can be recognized by the GMT C API, and we have to maintain a mapping from the
402
+ input dtype to the expected output dtype (e.g., for a panda.Series with
403
+ ``dtype="string[python]"``, the output is "object", and we need to have a dtype
404
+ mapping from ``string`` to ``np.str_``).
405
+
406
+ This function is not used anywhere in the project. Instead, similar codes are used
407
+ in the ``vectors_to_arrays`` function. This function is kept for understand the
408
+ dtype's string representation of different array-like objects and what dtype they
409
+ are converted to by NumPy. This function is kept for understanding the dtype
410
+ conversion process and for testing purposes, since some of the dtypes may change in
411
+ the future (e.g., pandas.StringDtype is still an experimental feature).
412
+
413
+ Parameters
414
+ ----------
415
+ array
416
+ The array-like object to be checked.
417
+
418
+ Returns
419
+ -------
420
+ dtype
421
+ The data type of the array-like object.
422
+
423
+ Examples
424
+ --------
425
+ >>> import datetime
426
+ >>> import numpy as np
427
+ >>> import pandas as pd
428
+ >>> datetimes = [datetime.date(2021, 1, 1), datetime.date(2022, 1, 1)]
429
+
430
+ For Python built-in types:
431
+ >>> _array_dtypes([1, 2, 3])
432
+ ('', 'int64')
433
+ >>> _array_dtypes([1.0, 2.0, 3.0])
434
+ ('', 'float64')
435
+ >>> _array_dtypes(["a", "b", "c"])
436
+ ('', '<U1')
437
+
438
+ For NumPy arrays:
439
+
440
+ >>> _array_dtypes(np.array([1, 2, 3]))
441
+ ('int64', 'int64')
442
+ >>> _array_dtypes(np.array([1.0, 2.0, 3.0]))
443
+ ('float64', 'float64')
444
+ >>> _array_dtypes(np.datetime64("2021-01-01"))
445
+ ('datetime64[D]', 'datetime64[D]')
446
+
447
+ For Pandas objects:
448
+
449
+ >>> _array_dtypes(pd.Series(data=[1, 2, 3]))
450
+ ('int64', 'int64')
451
+ >>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0]))
452
+ ('float64', 'float64')
453
+ >>> _array_dtypes(pd.Series(data=[1, 2, 3], dtype=pd.Int32Dtype()))
454
+ ('Int32', 'int32')
455
+ >>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0], dtype=pd.Float32Dtype()))
456
+ ('Float32', 'float32')
457
+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"]))
458
+ ('object', 'object')
459
+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[python]"))
460
+ ('string', 'object')
461
+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[pyarrow]"))
462
+ ('string', 'object')
463
+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="datetime64[ns]"))
464
+ ('datetime64[ns]', 'datetime64[ns]')
465
+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="date32[day][pyarrow]"))
466
+ ('date32[day][pyarrow]', 'object')
467
+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="date64[ms][pyarrow]"))
468
+ ('date64[ms][pyarrow]', 'object')
469
+
470
+ For PyArrow objects:
471
+
472
+ >>> import pytest
473
+ >>> pa = pytest.importorskip("pyarrow")
474
+ >>> _array_dtypes(pa.array([1, 2, 3]))
475
+ ('int64', 'int64')
476
+ >>> _array_dtypes(pa.array([1.0, 2.0, 3.0]))
477
+ ('double', 'float64')
478
+ >>> _array_dtypes(pa.array([1, 2, 3], type=pa.int32()))
479
+ ('int32', 'int32')
480
+ >>> _array_dtypes(pa.array([1.0, 2.0, 3.0], type=pa.float32()))
481
+ ('float', 'float32')
482
+ >>> _array_dtypes(pa.array(["a", "b", "c"]))
483
+ ('string', 'object')
484
+ >>> _array_dtypes(pa.array(datetimes, type=pa.date32()))
485
+ ('date32[day]', 'datetime64[D]')
486
+ >>> _array_dtypes(pa.array(datetimes, type=pa.date64()))
487
+ ('date64[ms]', 'datetime64[ms]')
488
+ """
489
+
490
+ def _get_dtype (array ):
491
+ """
492
+ Get the data type of the array-like object.
493
+ """
494
+ return str (getattr (array , "dtype" , getattr (array , "type" , "" )))
495
+
496
+ dtype_in = _get_dtype (array )
497
+ dtype_out = str (np .ascontiguousarray (array ).dtype )
498
+ return dtype_in , dtype_out
0 commit comments