Skip to content

Commit ec7d79c

Browse files
committed
Add the private/unused _array_dtypes function to test dtype conversions
1 parent c2e429c commit ec7d79c

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed

pygmt/clib/conversion.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,3 +383,116 @@ def array_to_datetime(array: Sequence[Any]) -> np.ndarray:
383383
'2018-01-01T00:00:00.000000'], dtype='datetime64[us]')
384384
"""
385385
return np.asarray(array, dtype=np.datetime64)
386+
387+
388+
def _array_dtypes(array: Any) -> tuple[str, str]:
389+
"""
390+
Get the dtypes of an array-like object and the numpy array after applying the
391+
np.ascontiguousarray function.
392+
393+
For the input array-like object, the function checks the "dtype" (for NumPy and
394+
Pandas objects) or "type" (for PyArrow objects) property to determine the dtype. If
395+
both of these properties are not found (e.g., a list), the dtype is set to an empty
396+
string. Then the function applies the np.ascontiguousarray function to the input
397+
object and determine the dtype of the converted np.ndarray object.
398+
399+
The function returns a tuple of the two dtypes. If the output dtype is "object", it
400+
means np.ascontiguousarray has failed to convert the input object to a NumPy dtype
401+
that can be recognized by the GMT C API, and we have to maintain a mapping from the
402+
input dtype to the expected output dtype (e.g., for a panda.Series with
403+
``dtype="string[python]"``, the output is "object", and we need to have a dtype
404+
mapping from ``string`` to ``np.str_``).
405+
406+
This function is not used anywhere in the project. Instead, similar codes are used
407+
in the ``vectors_to_arrays`` function. This function is kept for understand the
408+
dtype's string representation of different array-like objects and what dtype they
409+
are converted to by NumPy. This function is kept for understanding the dtype
410+
conversion process and for testing purposes, since some of the dtypes may change in
411+
the future (e.g., pandas.StringDtype is still an experimental feature).
412+
413+
Parameters
414+
----------
415+
array
416+
The array-like object to be checked.
417+
418+
Returns
419+
-------
420+
dtype
421+
The data type of the array-like object.
422+
423+
Examples
424+
--------
425+
>>> import datetime
426+
>>> import numpy as np
427+
>>> import pandas as pd
428+
>>> datetimes = [datetime.date(2021, 1, 1), datetime.date(2022, 1, 1)]
429+
430+
For Python built-in types:
431+
>>> _array_dtypes([1, 2, 3])
432+
('', 'int64')
433+
>>> _array_dtypes([1.0, 2.0, 3.0])
434+
('', 'float64')
435+
>>> _array_dtypes(["a", "b", "c"])
436+
('', '<U1')
437+
438+
For NumPy arrays:
439+
440+
>>> _array_dtypes(np.array([1, 2, 3]))
441+
('int64', 'int64')
442+
>>> _array_dtypes(np.array([1.0, 2.0, 3.0]))
443+
('float64', 'float64')
444+
>>> _array_dtypes(np.datetime64("2021-01-01"))
445+
('datetime64[D]', 'datetime64[D]')
446+
447+
For Pandas objects:
448+
449+
>>> _array_dtypes(pd.Series(data=[1, 2, 3]))
450+
('int64', 'int64')
451+
>>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0]))
452+
('float64', 'float64')
453+
>>> _array_dtypes(pd.Series(data=[1, 2, 3], dtype=pd.Int32Dtype()))
454+
('Int32', 'int32')
455+
>>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0], dtype=pd.Float32Dtype()))
456+
('Float32', 'float32')
457+
>>> _array_dtypes(pd.Series(data=["a", "b", "c"]))
458+
('object', 'object')
459+
>>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[python]"))
460+
('string', 'object')
461+
>>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[pyarrow]"))
462+
('string', 'object')
463+
>>> _array_dtypes(pd.Series(data=datetimes, dtype="datetime64[ns]"))
464+
('datetime64[ns]', 'datetime64[ns]')
465+
>>> _array_dtypes(pd.Series(data=datetimes, dtype="date32[day][pyarrow]"))
466+
('date32[day][pyarrow]', 'object')
467+
>>> _array_dtypes(pd.Series(data=datetimes, dtype="date64[ms][pyarrow]"))
468+
('date64[ms][pyarrow]', 'object')
469+
470+
For PyArrow objects:
471+
472+
>>> import pytest
473+
>>> pa = pytest.importorskip("pyarrow")
474+
>>> _array_dtypes(pa.array([1, 2, 3]))
475+
('int64', 'int64')
476+
>>> _array_dtypes(pa.array([1.0, 2.0, 3.0]))
477+
('double', 'float64')
478+
>>> _array_dtypes(pa.array([1, 2, 3], type=pa.int32()))
479+
('int32', 'int32')
480+
>>> _array_dtypes(pa.array([1.0, 2.0, 3.0], type=pa.float32()))
481+
('float', 'float32')
482+
>>> _array_dtypes(pa.array(["a", "b", "c"]))
483+
('string', 'object')
484+
>>> _array_dtypes(pa.array(datetimes, type=pa.date32()))
485+
('date32[day]', 'datetime64[D]')
486+
>>> _array_dtypes(pa.array(datetimes, type=pa.date64()))
487+
('date64[ms]', 'datetime64[ms]')
488+
"""
489+
490+
def _get_dtype(array):
491+
"""
492+
Get the data type of the array-like object.
493+
"""
494+
return str(getattr(array, "dtype", getattr(array, "type", "")))
495+
496+
dtype_in = _get_dtype(array)
497+
dtype_out = str(np.ascontiguousarray(array).dtype)
498+
return dtype_in, dtype_out

0 commit comments

Comments
 (0)