Skip to content

Commit 8a95e0c

Browse files
authored
Closes #5189: move from_series to pandas module (#5190)
Moves `from_series` out of the `numpy` module into the `pandas` module. Closes #5189: move from_series to pandas module --------- Co-authored-by: ajpotts <[email protected]>
1 parent f464e45 commit 8a95e0c

File tree

10 files changed

+175
-149
lines changed

10 files changed

+175
-149
lines changed

arkouda/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,6 @@
189189
format_float_positional,
190190
format_float_scientific,
191191
format_parser,
192-
from_series,
193192
full,
194193
full_like,
195194
get_byteorder,
@@ -364,6 +363,7 @@
364363
Row,
365364
Series,
366365
compute_join_size,
366+
from_series,
367367
gen_ranges,
368368
join,
369369
join_on_eq_with_dt,

arkouda/numpy/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,6 @@
274274
arange,
275275
array,
276276
bigint_from_uint_arrays,
277-
from_series,
278277
full,
279278
full_like,
280279
linspace,

arkouda/numpy/pdarraycreation.py

Lines changed: 0 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
NUMBER_FORMAT_STRINGS,
2626
DTypes,
2727
NumericDTypes,
28-
SeriesDTypes,
2928
bigint,
3029
bool_scalars,
3130
float64,
@@ -65,109 +64,12 @@
6564
"standard_normal",
6665
"random_strings_uniform",
6766
"random_strings_lognormal",
68-
"from_series",
6967
"bigint_from_uint_arrays",
7068
"promote_to_common_dtype",
7169
"scalar_array",
7270
]
7371

7472

75-
@typechecked
76-
def from_series(series: pd.Series, dtype: Optional[Union[type, str]] = None) -> Union[pdarray, Strings]:
77-
"""
78-
Converts a Pandas Series to an Arkouda pdarray or Strings object. If
79-
dtype is None, the dtype is inferred from the Pandas Series. Otherwise,
80-
the dtype parameter is set if the dtype of the Pandas Series is to be
81-
overridden or is unknown (for example, in situations where the Series
82-
dtype is object).
83-
84-
Parameters
85-
----------
86-
series : Pandas Series
87-
The Pandas Series with a dtype of bool, float64, int64, or string
88-
dtype : Optional[type]
89-
The valid dtype types are np.bool, np.float64, np.int64, and np.str
90-
91-
Returns
92-
-------
93-
Union[pdarray,Strings]
94-
95-
Raises
96-
------
97-
TypeError
98-
Raised if series is not a Pandas Series object
99-
ValueError
100-
Raised if the Series dtype is not bool, float64, int64, string, datetime, or timedelta
101-
102-
Examples
103-
--------
104-
>>> import arkouda as ak
105-
>>> np.random.seed(1701)
106-
>>> ak.from_series(pd.Series(np.random.randint(0,10,5)))
107-
array([4 3 3 5 0])
108-
109-
>>> ak.from_series(pd.Series(['1', '2', '3', '4', '5']),dtype=np.int64)
110-
array([1 2 3 4 5])
111-
112-
>>> np.random.seed(1701)
113-
>>> ak.from_series(pd.Series(np.random.uniform(low=0.0,high=1.0,size=3)))
114-
array([0.089433234324597599 0.1153776854774361 0.51874393620990389])
115-
116-
>>> ak.from_series(
117-
... pd.Series([
118-
... '0.57600036956445599',
119-
... '0.41619265571741659',
120-
... '0.6615356693784662',
121-
... ]),
122-
... dtype=np.float64,
123-
... )
124-
array([0.57600036956445599 0.41619265571741659 0.6615356693784662])
125-
126-
>>> np.random.seed(1864)
127-
>>> ak.from_series(pd.Series(np.random.choice([True, False],size=5)))
128-
array([True True True False False])
129-
130-
>>> ak.from_series(pd.Series(['True', 'False', 'False', 'True', 'True']), dtype=bool)
131-
array([True True True True True])
132-
133-
>>> ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e'], dtype="string"))
134-
array(['a', 'b', 'c', 'd', 'e'])
135-
136-
>>> ak.from_series(pd.Series(pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01')])))
137-
array([1514764800000000000 1514764800000000000])
138-
139-
Notes
140-
-----
141-
The supported datatypes are bool, float64, int64, string, and datetime64[ns]. The
142-
data type is either inferred from the the Series or is set via the dtype parameter.
143-
144-
Series of datetime or timedelta are converted to Arkouda arrays of dtype int64 (nanoseconds)
145-
146-
A Pandas Series containing strings has a dtype of object. Arkouda assumes the Series
147-
contains strings and sets the dtype to str
148-
"""
149-
if not dtype:
150-
dt = series.dtype.name
151-
else:
152-
dt = str(dtype)
153-
try:
154-
"""
155-
If the Series has a object dtype, set dtype to string to comply with method
156-
signature that does not require a dtype; this is required because Pandas can infer
157-
non-str dtypes from the input np or Python array.
158-
"""
159-
if dt == "object":
160-
dt = "string"
161-
162-
n_array = series.to_numpy(dtype=SeriesDTypes[dt]) # type: ignore
163-
except KeyError:
164-
raise ValueError(
165-
f"dtype {dt} is unsupported. Supported dtypes are bool, float64, int64, string, "
166-
f"datetime64[ns], and timedelta64[ns]"
167-
)
168-
return array(n_array)
169-
170-
17173
def _deepcopy(a: pdarray) -> pdarray:
17274
from arkouda.client import generic_msg
17375
from arkouda.numpy.pdarrayclass import create_pdarray

arkouda/numpy/timeclass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class _AbstractBaseTime(pdarray):
9595

9696
def __init__(self, pda, unit: str = _BASE_UNIT):
9797
from arkouda.numpy import cast as akcast
98-
from arkouda.numpy.pdarraycreation import from_series
98+
from arkouda.pandas.conversion import from_series
9999

100100
if isinstance(pda, Datetime) or isinstance(pda, Timedelta):
101101
self.unit: str = pda.unit

arkouda/pandas/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@
1717
from arkouda.pandas.row import Row
1818
from arkouda.pandas.series import Series
1919
from arkouda.pandas.typing import ArkoudaArrayLike
20+
from arkouda.pandas.conversion import from_series

arkouda/pandas/conversion.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Optional, TypeVar, Union
4+
5+
import pandas as pd
6+
7+
from typeguard import typechecked
8+
9+
from arkouda.numpy.dtypes import SeriesDTypes
10+
from arkouda.numpy.pdarrayclass import pdarray
11+
12+
13+
if TYPE_CHECKING:
14+
from arkouda.numpy.strings import Strings
15+
else:
16+
Strings = TypeVar("Strings")
17+
18+
__all__ = ["from_series"]
19+
20+
21+
@typechecked
22+
def from_series(
23+
series: pd.Series, dtype: Optional[Union[type, str]] = None
24+
) -> Union[pdarray, "Strings"]:
25+
"""
26+
Converts a Pandas Series to an Arkouda pdarray or Strings object. If
27+
dtype is None, the dtype is inferred from the Pandas Series. Otherwise,
28+
the dtype parameter is set if the dtype of the Pandas Series is to be
29+
overridden or is unknown (for example, in situations where the Series
30+
dtype is object).
31+
32+
Parameters
33+
----------
34+
series : pd.Series
35+
The Pandas Series with a dtype of bool, float64, int64, or string
36+
dtype : Optional[Union[type, str]]
37+
The valid dtype types are np.bool, np.float64, np.int64, and np.str
38+
39+
Returns
40+
-------
41+
Union[pdarray, Strings]
42+
43+
Raises
44+
------
45+
ValueError
46+
Raised if the Series dtype is not bool, float64, int64, string, datetime, or timedelta
47+
48+
Examples
49+
--------
50+
>>> import arkouda as ak
51+
>>> np.random.seed(1701)
52+
>>> ak.from_series(pd.Series(np.random.randint(0,10,5)))
53+
array([4 3 3 5 0])
54+
55+
>>> ak.from_series(pd.Series(['1', '2', '3', '4', '5']),dtype=np.int64)
56+
array([1 2 3 4 5])
57+
58+
>>> np.random.seed(1701)
59+
>>> ak.from_series(pd.Series(np.random.uniform(low=0.0,high=1.0,size=3)))
60+
array([0.089433234324597599 0.1153776854774361 0.51874393620990389])
61+
62+
>>> ak.from_series(
63+
... pd.Series([
64+
... '0.57600036956445599',
65+
... '0.41619265571741659',
66+
... '0.6615356693784662',
67+
... ]),
68+
... dtype=np.float64,
69+
... )
70+
array([0.57600036956445599 0.41619265571741659 0.6615356693784662])
71+
72+
>>> np.random.seed(1864)
73+
>>> ak.from_series(pd.Series(np.random.choice([True, False],size=5)))
74+
array([True True True False False])
75+
76+
>>> ak.from_series(pd.Series(['True', 'False', 'False', 'True', 'True']), dtype=bool)
77+
array([True True True True True])
78+
79+
>>> ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e'], dtype="string"))
80+
array(['a', 'b', 'c', 'd', 'e'])
81+
82+
>>> ak.from_series(pd.Series(pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01')])))
83+
array([1514764800000000000 1514764800000000000])
84+
85+
Notes
86+
-----
87+
The supported datatypes are bool, float64, int64, string, and datetime64[ns]. The
88+
data type is either inferred from the the Series or is set via the dtype parameter.
89+
90+
Series of datetime or timedelta are converted to Arkouda arrays of dtype int64 (nanoseconds)
91+
92+
A Pandas Series containing strings has a dtype of object. Arkouda assumes the Series
93+
contains strings and sets the dtype to str
94+
"""
95+
from arkouda.numpy.pdarraycreation import array
96+
97+
if not dtype:
98+
dt = series.dtype.name
99+
else:
100+
dt = str(dtype)
101+
try:
102+
"""
103+
If the Series has a object dtype, set dtype to string to comply with method
104+
signature that does not require a dtype; this is required because Pandas can infer
105+
non-str dtypes from the input np or Python array.
106+
"""
107+
if dt == "object":
108+
dt = "string"
109+
110+
n_array = series.to_numpy(dtype=SeriesDTypes[dt]) # type: ignore
111+
except KeyError:
112+
raise ValueError(
113+
f"dtype {dt} is unsupported. Supported dtypes are bool, float64, int64, string, "
114+
f"datetime64[ns], and timedelta64[ns]"
115+
)
116+
return array(n_array)

arkouda/pdarraycreation/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
arange,
55
array,
66
bigint_from_uint_arrays,
7-
from_series,
87
full,
98
full_like,
109
linspace,

pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ testpaths =
6666
tests/numpy/utils_test.py
6767
tests/operator_test.py
6868
tests/pandas/categorical_test.py
69+
tests/pandas/conversion_test.py
6970
tests/pandas/dataframe_test.py
7071
tests/pandas/extension/arkouda_array_extension.py
7172
tests/pandas/extension/arkouda_categorical_extension.py

tests/numpy/pdarray_creation_test.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import datetime as dt
21
import math
32
import statistics
43

@@ -1128,52 +1127,6 @@ def test_random_strings_lognormal_with_seed(self):
11281127
)
11291128
assert printable_randoms == pda.tolist()
11301129

1131-
@pytest.mark.parametrize("size", pytest.prob_size)
1132-
@pytest.mark.parametrize("dtype", [bool, np.float64, np.int64, str])
1133-
def test_from_series_dtypes(self, size, dtype):
1134-
p_array = ak.from_series(pd.Series(np.random.randint(0, 10, size)), dtype)
1135-
assert isinstance(p_array, ak.pdarray if dtype is not str else ak.Strings)
1136-
assert dtype == p_array.dtype
1137-
1138-
p_objects_array = ak.from_series(
1139-
pd.Series(np.random.randint(0, 10, size), dtype="object"), dtype=dtype
1140-
)
1141-
assert isinstance(p_objects_array, ak.pdarray if dtype is not str else ak.Strings)
1142-
assert dtype == p_objects_array.dtype
1143-
1144-
def test_from_series_misc(self):
1145-
p_array = ak.from_series(pd.Series(["a", "b", "c", "d", "e"]))
1146-
assert isinstance(p_array, ak.Strings)
1147-
assert str == p_array.dtype
1148-
1149-
p_array = ak.from_series(pd.Series(np.random.choice([True, False], size=10)))
1150-
1151-
assert isinstance(p_array, ak.pdarray)
1152-
assert bool == p_array.dtype
1153-
1154-
p_array = ak.from_series(pd.Series([dt.datetime(2016, 1, 1, 0, 0, 1)]))
1155-
1156-
assert isinstance(p_array, ak.pdarray)
1157-
assert np.int64 == p_array.dtype
1158-
1159-
p_array = ak.from_series(pd.Series([np.datetime64("2018-01-01")]))
1160-
1161-
assert isinstance(p_array, ak.pdarray)
1162-
assert np.int64 == p_array.dtype
1163-
1164-
p_array = ak.from_series(
1165-
pd.Series(pd.to_datetime(["1/1/2018", np.datetime64("2018-01-01"), dt.datetime(2018, 1, 1)]))
1166-
)
1167-
1168-
assert isinstance(p_array, ak.pdarray)
1169-
assert np.int64 == p_array.dtype
1170-
1171-
with pytest.raises(TypeError):
1172-
ak.from_series(np.ones(10))
1173-
1174-
with pytest.raises(ValueError):
1175-
ak.from_series(pd.Series(np.random.randint(0, 10, 10), dtype=np.int8))
1176-
11771130
@pytest.mark.parametrize("dtype", NUMERIC_SCALARS)
11781131
@pytest.mark.parametrize("size", pytest.prob_size)
11791132
def test_fill(self, size, dtype):

0 commit comments

Comments
 (0)