|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from typing import TYPE_CHECKING, Optional, TypeVar, Union |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | +from typeguard import typechecked |
| 8 | + |
| 9 | +from arkouda.numpy.dtypes import SeriesDTypes |
| 10 | +from arkouda.numpy.pdarrayclass import pdarray |
| 11 | + |
| 12 | + |
| 13 | +if TYPE_CHECKING: |
| 14 | + from arkouda.numpy.strings import Strings |
| 15 | +else: |
| 16 | + Strings = TypeVar("Strings") |
| 17 | + |
| 18 | +__all__ = ["from_series"] |
| 19 | + |
| 20 | + |
| 21 | +@typechecked |
| 22 | +def from_series( |
| 23 | + series: pd.Series, dtype: Optional[Union[type, str]] = None |
| 24 | +) -> Union[pdarray, "Strings"]: |
| 25 | + """ |
| 26 | + Converts a Pandas Series to an Arkouda pdarray or Strings object. If |
| 27 | + dtype is None, the dtype is inferred from the Pandas Series. Otherwise, |
| 28 | + the dtype parameter is set if the dtype of the Pandas Series is to be |
| 29 | + overridden or is unknown (for example, in situations where the Series |
| 30 | + dtype is object). |
| 31 | +
|
| 32 | + Parameters |
| 33 | + ---------- |
| 34 | + series : pd.Series |
| 35 | + The Pandas Series with a dtype of bool, float64, int64, or string |
| 36 | + dtype : Optional[Union[type, str]] |
| 37 | + The valid dtype types are np.bool, np.float64, np.int64, and np.str |
| 38 | +
|
| 39 | + Returns |
| 40 | + ------- |
| 41 | + Union[pdarray, Strings] |
| 42 | +
|
| 43 | + Raises |
| 44 | + ------ |
| 45 | + ValueError |
| 46 | + Raised if the Series dtype is not bool, float64, int64, string, datetime, or timedelta |
| 47 | +
|
| 48 | + Examples |
| 49 | + -------- |
| 50 | + >>> import arkouda as ak |
| 51 | + >>> np.random.seed(1701) |
| 52 | + >>> ak.from_series(pd.Series(np.random.randint(0,10,5))) |
| 53 | + array([4 3 3 5 0]) |
| 54 | +
|
| 55 | + >>> ak.from_series(pd.Series(['1', '2', '3', '4', '5']),dtype=np.int64) |
| 56 | + array([1 2 3 4 5]) |
| 57 | +
|
| 58 | + >>> np.random.seed(1701) |
| 59 | + >>> ak.from_series(pd.Series(np.random.uniform(low=0.0,high=1.0,size=3))) |
| 60 | + array([0.089433234324597599 0.1153776854774361 0.51874393620990389]) |
| 61 | +
|
| 62 | + >>> ak.from_series( |
| 63 | + ... pd.Series([ |
| 64 | + ... '0.57600036956445599', |
| 65 | + ... '0.41619265571741659', |
| 66 | + ... '0.6615356693784662', |
| 67 | + ... ]), |
| 68 | + ... dtype=np.float64, |
| 69 | + ... ) |
| 70 | + array([0.57600036956445599 0.41619265571741659 0.6615356693784662]) |
| 71 | +
|
| 72 | + >>> np.random.seed(1864) |
| 73 | + >>> ak.from_series(pd.Series(np.random.choice([True, False],size=5))) |
| 74 | + array([True True True False False]) |
| 75 | +
|
| 76 | + >>> ak.from_series(pd.Series(['True', 'False', 'False', 'True', 'True']), dtype=bool) |
| 77 | + array([True True True True True]) |
| 78 | +
|
| 79 | + >>> ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e'], dtype="string")) |
| 80 | + array(['a', 'b', 'c', 'd', 'e']) |
| 81 | +
|
| 82 | + >>> ak.from_series(pd.Series(pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01')]))) |
| 83 | + array([1514764800000000000 1514764800000000000]) |
| 84 | +
|
| 85 | + Notes |
| 86 | + ----- |
| 87 | + The supported datatypes are bool, float64, int64, string, and datetime64[ns]. The |
| 88 | + data type is either inferred from the the Series or is set via the dtype parameter. |
| 89 | +
|
| 90 | + Series of datetime or timedelta are converted to Arkouda arrays of dtype int64 (nanoseconds) |
| 91 | +
|
| 92 | + A Pandas Series containing strings has a dtype of object. Arkouda assumes the Series |
| 93 | + contains strings and sets the dtype to str |
| 94 | + """ |
| 95 | + from arkouda.numpy.pdarraycreation import array |
| 96 | + |
| 97 | + if not dtype: |
| 98 | + dt = series.dtype.name |
| 99 | + else: |
| 100 | + dt = str(dtype) |
| 101 | + try: |
| 102 | + """ |
| 103 | + If the Series has a object dtype, set dtype to string to comply with method |
| 104 | + signature that does not require a dtype; this is required because Pandas can infer |
| 105 | + non-str dtypes from the input np or Python array. |
| 106 | + """ |
| 107 | + if dt == "object": |
| 108 | + dt = "string" |
| 109 | + |
| 110 | + n_array = series.to_numpy(dtype=SeriesDTypes[dt]) # type: ignore |
| 111 | + except KeyError: |
| 112 | + raise ValueError( |
| 113 | + f"dtype {dt} is unsupported. Supported dtypes are bool, float64, int64, string, " |
| 114 | + f"datetime64[ns], and timedelta64[ns]" |
| 115 | + ) |
| 116 | + return array(n_array) |
0 commit comments