Skip to content

Commit f18d17d

Browse files
committed
Add the private/unused _array_dtypes function to test dtype conversions
1 parent 82b0c73 commit f18d17d

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed

pygmt/clib/conversion.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,3 +349,116 @@ def array_to_datetime(array: Sequence[Any] | np.ndarray) -> np.ndarray:
349349
'2018-01-01T00:00:00.000000'], dtype='datetime64[us]')
350350
"""
351351
return np.asarray(array, dtype=np.datetime64)
352+
353+
354+
def _array_dtypes(array: Any) -> tuple[str, str]:
355+
"""
356+
Get the dtypes of an array-like object and the numpy array after applying the
357+
np.ascontiguousarray function.
358+
359+
For the input array-like object, the function checks the "dtype" (for NumPy and
360+
Pandas objects) or "type" (for PyArrow objects) property to determine the dtype. If
361+
both of these properties are not found (e.g., a list), the dtype is set to an empty
362+
string. Then the function applies the np.ascontiguousarray function to the input
363+
object and determine the dtype of the converted np.ndarray object.
364+
365+
The function returns a tuple of the two dtypes. If the output dtype is "object", it
366+
means np.ascontiguousarray has failed to convert the input object to a NumPy dtype
367+
that can be recognized by the GMT C API, and we have to maintain a mapping from the
368+
input dtype to the expected output dtype (e.g., for a panda.Series with
369+
``dtype="string[python]"``, the output is "object", and we need to have a dtype
370+
mapping from ``string`` to ``np.str_``).
371+
372+
This function is not used anywhere in the project. Instead, similar codes are used
373+
in the ``vectors_to_arrays`` function. This function is kept for understand the
374+
dtype's string representation of different array-like objects and what dtype they
375+
are converted to by NumPy. This function is kept for understanding the dtype
376+
conversion process and for testing purposes, since some of the dtypes may change in
377+
the future (e.g., pandas.StringDtype is still an experimental feature).
378+
379+
Parameters
380+
----------
381+
array
382+
The array-like object to be checked.
383+
384+
Returns
385+
-------
386+
dtype
387+
The data type of the array-like object.
388+
389+
Examples
390+
--------
391+
>>> import datetime
392+
>>> import numpy as np
393+
>>> import pandas as pd
394+
>>> datetimes = [datetime.date(2021, 1, 1), datetime.date(2022, 1, 1)]
395+
396+
For Python built-in types:
397+
>>> _array_dtypes([1, 2, 3])
398+
('', 'int64')
399+
>>> _array_dtypes([1.0, 2.0, 3.0])
400+
('', 'float64')
401+
>>> _array_dtypes(["a", "b", "c"])
402+
('', '<U1')
403+
404+
For NumPy arrays:
405+
406+
>>> _array_dtypes(np.array([1, 2, 3]))
407+
('int64', 'int64')
408+
>>> _array_dtypes(np.array([1.0, 2.0, 3.0]))
409+
('float64', 'float64')
410+
>>> _array_dtypes(np.datetime64("2021-01-01"))
411+
('datetime64[D]', 'datetime64[D]')
412+
413+
For Pandas objects:
414+
415+
>>> _array_dtypes(pd.Series(data=[1, 2, 3]))
416+
('int64', 'int64')
417+
>>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0]))
418+
('float64', 'float64')
419+
>>> _array_dtypes(pd.Series(data=[1, 2, 3], dtype=pd.Int32Dtype()))
420+
('Int32', 'int32')
421+
>>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0], dtype=pd.Float32Dtype()))
422+
('Float32', 'float32')
423+
>>> _array_dtypes(pd.Series(data=["a", "b", "c"]))
424+
('object', 'object')
425+
>>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[python]"))
426+
('string', 'object')
427+
>>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[pyarrow]"))
428+
('string', 'object')
429+
>>> _array_dtypes(pd.Series(data=datetimes, dtype="datetime64[ns]"))
430+
('datetime64[ns]', 'datetime64[ns]')
431+
>>> _array_dtypes(pd.Series(data=datetimes, dtype="date32[day][pyarrow]"))
432+
('date32[day][pyarrow]', 'object')
433+
>>> _array_dtypes(pd.Series(data=datetimes, dtype="date64[ms][pyarrow]"))
434+
('date64[ms][pyarrow]', 'object')
435+
436+
For PyArrow objects:
437+
438+
>>> import pytest
439+
>>> pa = pytest.importorskip("pyarrow")
440+
>>> _array_dtypes(pa.array([1, 2, 3]))
441+
('int64', 'int64')
442+
>>> _array_dtypes(pa.array([1.0, 2.0, 3.0]))
443+
('double', 'float64')
444+
>>> _array_dtypes(pa.array([1, 2, 3], type=pa.int32()))
445+
('int32', 'int32')
446+
>>> _array_dtypes(pa.array([1.0, 2.0, 3.0], type=pa.float32()))
447+
('float', 'float32')
448+
>>> _array_dtypes(pa.array(["a", "b", "c"]))
449+
('string', 'object')
450+
>>> _array_dtypes(pa.array(datetimes, type=pa.date32()))
451+
('date32[day]', 'datetime64[D]')
452+
>>> _array_dtypes(pa.array(datetimes, type=pa.date64()))
453+
('date64[ms]', 'datetime64[ms]')
454+
"""
455+
456+
def _get_dtype(array):
457+
"""
458+
Get the data type of the array-like object.
459+
"""
460+
return str(getattr(array, "dtype", getattr(array, "type", "")))
461+
462+
dtype_in = _get_dtype(array)
463+
dtype_out = str(np.ascontiguousarray(array).dtype)
464+
return dtype_in, dtype_out

0 commit comments

Comments
 (0)