Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arkouda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@
format_float_positional,
format_float_scientific,
format_parser,
from_series,
full,
full_like,
get_byteorder,
Expand Down Expand Up @@ -364,6 +363,7 @@
Row,
Series,
compute_join_size,
from_series,
gen_ranges,
join,
join_on_eq_with_dt,
Expand Down
1 change: 0 additions & 1 deletion arkouda/numpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,6 @@
arange,
array,
bigint_from_uint_arrays,
from_series,
full,
full_like,
linspace,
Expand Down
98 changes: 0 additions & 98 deletions arkouda/numpy/pdarraycreation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
NUMBER_FORMAT_STRINGS,
DTypes,
NumericDTypes,
SeriesDTypes,
bigint,
bool_scalars,
float64,
Expand Down Expand Up @@ -65,109 +64,12 @@
"standard_normal",
"random_strings_uniform",
"random_strings_lognormal",
"from_series",
"bigint_from_uint_arrays",
"promote_to_common_dtype",
"scalar_array",
]


@typechecked
def from_series(series: pd.Series, dtype: Optional[Union[type, str]] = None) -> Union[pdarray, Strings]:
"""
Converts a Pandas Series to an Arkouda pdarray or Strings object. If
dtype is None, the dtype is inferred from the Pandas Series. Otherwise,
the dtype parameter is set if the dtype of the Pandas Series is to be
overridden or is unknown (for example, in situations where the Series
dtype is object).

Parameters
----------
series : Pandas Series
The Pandas Series with a dtype of bool, float64, int64, or string
dtype : Optional[type]
The valid dtype types are np.bool, np.float64, np.int64, and np.str

Returns
-------
Union[pdarray,Strings]

Raises
------
TypeError
Raised if series is not a Pandas Series object
ValueError
Raised if the Series dtype is not bool, float64, int64, string, datetime, or timedelta

Examples
--------
>>> import arkouda as ak
>>> np.random.seed(1701)
>>> ak.from_series(pd.Series(np.random.randint(0,10,5)))
array([4 3 3 5 0])

>>> ak.from_series(pd.Series(['1', '2', '3', '4', '5']),dtype=np.int64)
array([1 2 3 4 5])

>>> np.random.seed(1701)
>>> ak.from_series(pd.Series(np.random.uniform(low=0.0,high=1.0,size=3)))
array([0.089433234324597599 0.1153776854774361 0.51874393620990389])

>>> ak.from_series(
... pd.Series([
... '0.57600036956445599',
... '0.41619265571741659',
... '0.6615356693784662',
... ]),
... dtype=np.float64,
... )
array([0.57600036956445599 0.41619265571741659 0.6615356693784662])

>>> np.random.seed(1864)
>>> ak.from_series(pd.Series(np.random.choice([True, False],size=5)))
array([True True True False False])

>>> ak.from_series(pd.Series(['True', 'False', 'False', 'True', 'True']), dtype=bool)
array([True True True True True])

>>> ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e'], dtype="string"))
array(['a', 'b', 'c', 'd', 'e'])

>>> ak.from_series(pd.Series(pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01')])))
array([1514764800000000000 1514764800000000000])

Notes
-----
The supported datatypes are bool, float64, int64, string, and datetime64[ns]. The
data type is either inferred from the the Series or is set via the dtype parameter.

Series of datetime or timedelta are converted to Arkouda arrays of dtype int64 (nanoseconds)

A Pandas Series containing strings has a dtype of object. Arkouda assumes the Series
contains strings and sets the dtype to str
"""
if not dtype:
dt = series.dtype.name
else:
dt = str(dtype)
try:
"""
If the Series has a object dtype, set dtype to string to comply with method
signature that does not require a dtype; this is required because Pandas can infer
non-str dtypes from the input np or Python array.
"""
if dt == "object":
dt = "string"

n_array = series.to_numpy(dtype=SeriesDTypes[dt]) # type: ignore
except KeyError:
raise ValueError(
f"dtype {dt} is unsupported. Supported dtypes are bool, float64, int64, string, "
f"datetime64[ns], and timedelta64[ns]"
)
return array(n_array)


def _deepcopy(a: pdarray) -> pdarray:
from arkouda.client import generic_msg
from arkouda.numpy.pdarrayclass import create_pdarray
Expand Down
2 changes: 1 addition & 1 deletion arkouda/numpy/timeclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class _AbstractBaseTime(pdarray):

def __init__(self, pda, unit: str = _BASE_UNIT):
from arkouda.numpy import cast as akcast
from arkouda.numpy.pdarraycreation import from_series
from arkouda.pandas.conversion import from_series

if isinstance(pda, Datetime) or isinstance(pda, Timedelta):
self.unit: str = pda.unit
Expand Down
1 change: 1 addition & 0 deletions arkouda/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
from arkouda.pandas.row import Row
from arkouda.pandas.series import Series
from arkouda.pandas.typing import ArkoudaArrayLike
from arkouda.pandas.conversion import from_series
116 changes: 116 additions & 0 deletions arkouda/pandas/conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Optional, TypeVar, Union

import pandas as pd

from typeguard import typechecked

from arkouda.numpy.dtypes import SeriesDTypes
from arkouda.numpy.pdarrayclass import pdarray


if TYPE_CHECKING:
from arkouda.numpy.strings import Strings
else:
Strings = TypeVar("Strings")

__all__ = ["from_series"]


@typechecked
def from_series(
series: pd.Series, dtype: Optional[Union[type, str]] = None
) -> Union[pdarray, "Strings"]:
"""
Converts a Pandas Series to an Arkouda pdarray or Strings object. If
dtype is None, the dtype is inferred from the Pandas Series. Otherwise,
the dtype parameter is set if the dtype of the Pandas Series is to be
overridden or is unknown (for example, in situations where the Series
dtype is object).

Parameters
----------
series : pd.Series
The Pandas Series with a dtype of bool, float64, int64, or string
dtype : Optional[Union[type, str]]
The valid dtype types are np.bool, np.float64, np.int64, and np.str

Returns
-------
Union[pdarray, Strings]

Raises
------
ValueError
Raised if the Series dtype is not bool, float64, int64, string, datetime, or timedelta

Examples
--------
>>> import arkouda as ak
>>> np.random.seed(1701)
>>> ak.from_series(pd.Series(np.random.randint(0,10,5)))
array([4 3 3 5 0])

>>> ak.from_series(pd.Series(['1', '2', '3', '4', '5']),dtype=np.int64)
array([1 2 3 4 5])

>>> np.random.seed(1701)
>>> ak.from_series(pd.Series(np.random.uniform(low=0.0,high=1.0,size=3)))
array([0.089433234324597599 0.1153776854774361 0.51874393620990389])

>>> ak.from_series(
... pd.Series([
... '0.57600036956445599',
... '0.41619265571741659',
... '0.6615356693784662',
... ]),
... dtype=np.float64,
... )
array([0.57600036956445599 0.41619265571741659 0.6615356693784662])

>>> np.random.seed(1864)
>>> ak.from_series(pd.Series(np.random.choice([True, False],size=5)))
array([True True True False False])

>>> ak.from_series(pd.Series(['True', 'False', 'False', 'True', 'True']), dtype=bool)
array([True True True True True])

>>> ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e'], dtype="string"))
array(['a', 'b', 'c', 'd', 'e'])

>>> ak.from_series(pd.Series(pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01')])))
array([1514764800000000000 1514764800000000000])

Notes
-----
The supported datatypes are bool, float64, int64, string, and datetime64[ns]. The
data type is either inferred from the the Series or is set via the dtype parameter.

Series of datetime or timedelta are converted to Arkouda arrays of dtype int64 (nanoseconds)

A Pandas Series containing strings has a dtype of object. Arkouda assumes the Series
contains strings and sets the dtype to str
"""
from arkouda.numpy.pdarraycreation import array

if not dtype:
dt = series.dtype.name
else:
dt = str(dtype)
try:
"""
If the Series has a object dtype, set dtype to string to comply with method
signature that does not require a dtype; this is required because Pandas can infer
non-str dtypes from the input np or Python array.
"""
if dt == "object":
dt = "string"

n_array = series.to_numpy(dtype=SeriesDTypes[dt]) # type: ignore
except KeyError:
raise ValueError(
f"dtype {dt} is unsupported. Supported dtypes are bool, float64, int64, string, "
f"datetime64[ns], and timedelta64[ns]"
)
return array(n_array)
1 change: 0 additions & 1 deletion arkouda/pdarraycreation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
arange,
array,
bigint_from_uint_arrays,
from_series,
full,
full_like,
linspace,
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ testpaths =
tests/numpy/utils_test.py
tests/operator_test.py
tests/pandas/categorical_test.py
tests/pandas/conversion_test.py
tests/pandas/dataframe_test.py
tests/pandas/extension/arkouda_array_extension.py
tests/pandas/extension/arkouda_categorical_extension.py
Expand Down
47 changes: 0 additions & 47 deletions tests/numpy/pdarray_creation_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import datetime as dt
import math
import statistics

Expand Down Expand Up @@ -1128,52 +1127,6 @@ def test_random_strings_lognormal_with_seed(self):
)
assert printable_randoms == pda.tolist()

@pytest.mark.parametrize("size", pytest.prob_size)
@pytest.mark.parametrize("dtype", [bool, np.float64, np.int64, str])
def test_from_series_dtypes(self, size, dtype):
p_array = ak.from_series(pd.Series(np.random.randint(0, 10, size)), dtype)
assert isinstance(p_array, ak.pdarray if dtype is not str else ak.Strings)
assert dtype == p_array.dtype

p_objects_array = ak.from_series(
pd.Series(np.random.randint(0, 10, size), dtype="object"), dtype=dtype
)
assert isinstance(p_objects_array, ak.pdarray if dtype is not str else ak.Strings)
assert dtype == p_objects_array.dtype

def test_from_series_misc(self):
p_array = ak.from_series(pd.Series(["a", "b", "c", "d", "e"]))
assert isinstance(p_array, ak.Strings)
assert str == p_array.dtype

p_array = ak.from_series(pd.Series(np.random.choice([True, False], size=10)))

assert isinstance(p_array, ak.pdarray)
assert bool == p_array.dtype

p_array = ak.from_series(pd.Series([dt.datetime(2016, 1, 1, 0, 0, 1)]))

assert isinstance(p_array, ak.pdarray)
assert np.int64 == p_array.dtype

p_array = ak.from_series(pd.Series([np.datetime64("2018-01-01")]))

assert isinstance(p_array, ak.pdarray)
assert np.int64 == p_array.dtype

p_array = ak.from_series(
pd.Series(pd.to_datetime(["1/1/2018", np.datetime64("2018-01-01"), dt.datetime(2018, 1, 1)]))
)

assert isinstance(p_array, ak.pdarray)
assert np.int64 == p_array.dtype

with pytest.raises(TypeError):
ak.from_series(np.ones(10))

with pytest.raises(ValueError):
ak.from_series(pd.Series(np.random.randint(0, 10, 10), dtype=np.int8))

@pytest.mark.parametrize("dtype", NUMERIC_SCALARS)
@pytest.mark.parametrize("size", pytest.prob_size)
def test_fill(self, size, dtype):
Expand Down
Loading
Loading