Skip to content

Commit ae2c010

Browse files
authored
Add Series.index_of (#1118)
* wip all tests passing * better handle casting to scalar? * add index_of example for value not present * implementation based on feedback * remove unused code * cast durations, add more examples, better error message * reword docs * add comment explaining casting
1 parent e3c3cdf commit ae2c010

File tree

8 files changed

+187
-2
lines changed

8 files changed

+187
-2
lines changed

lib/explorer/backend/lazy_series.ex

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1279,7 +1279,8 @@ defmodule Explorer.Backend.LazySeries do
12791279
owner_import: 1,
12801280
owner_export: 1,
12811281
to_iovec: 1,
1282-
to_list: 1
1282+
to_list: 1,
1283+
index_of: 2
12831284
]
12841285

12851286
for {fun, arity} <- @remaining_non_lazy_operations do

lib/explorer/backend/series.ex

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,8 @@ defmodule Explorer.Backend.Series do
344344
# Struct
345345
@callback field(s, String.t()) :: s
346346

347+
@callback index_of(s, valid_types()) :: integer()
348+
347349
# Functions
348350

349351
@doc """

lib/explorer/polars_backend/native.ex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,7 @@ defmodule Explorer.PolarsBackend.Native do
519519
def s_field(_s, _name), do: err()
520520
def s_json_decode(_s, _dtype), do: err()
521521
def s_json_path_match(_s, _json_path), do: err()
522+
def s_index_of(_s, _v), do: err()
522523

523524
def message_on_gc(_pid, _payload), do: err()
524525
def is_message_on_gc(_term), do: err()

lib/explorer/polars_backend/series.ex

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -850,6 +850,32 @@ defmodule Explorer.PolarsBackend.Series do
850850
Shared.apply_series(series, :s_re_named_captures, [pattern])
851851
end
852852

853+
@impl true
854+
def index_of(series, value) do
855+
value_series =
856+
try do
857+
case {series.dtype, value} do
858+
# cast value to duration of same type as series to ensure durations are correctly
859+
# compared at the same precision
860+
{{:duration, precision}, %Explorer.Duration{}} ->
861+
Series.from_list([value]) |> cast({:duration, precision})
862+
863+
{{:duration, _}, _} ->
864+
raise ArgumentError,
865+
"unable to get index of value: #{inspect(value)} in series of type: #{inspect(series.dtype)}"
866+
867+
{dtype, _} ->
868+
Series.from_list([value], dtype: dtype)
869+
end
870+
rescue
871+
_ ->
872+
raise ArgumentError,
873+
"unable to get index of value: #{inspect(value)} in series of type: #{inspect(series.dtype)}"
874+
end
875+
876+
Shared.apply_series(series, :s_index_of, [value_series.data])
877+
end
878+
853879
# Polars specific functions
854880

855881
def name(series), do: Shared.apply_series(series, :s_name)

lib/explorer/series.ex

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6849,6 +6849,41 @@ defmodule Explorer.Series do
68496849
apply_series(series, :json_path_match, [json_path])
68506850
end
68516851

6852+
@doc """
6853+
Finds the index of the first value in a series.
6854+
6855+
## Examples
6856+
6857+
iex> s = Explorer.Series.from_list([1, 2, 3])
6858+
iex> Explorer.Series.index_of(s, 2)
6859+
1
6860+
6861+
iex> s = Explorer.Series.from_list([1, 2, 3])
6862+
iex> Explorer.Series.index_of(s, 4)
6863+
nil
6864+
6865+
This operation raises an `ArgumentError` when `value` is not compatible with the
6866+
series.
6867+
6868+
iex> s = Explorer.Series.from_list([1, 2, 3])
6869+
iex> Explorer.Series.index_of(s, "a")
6870+
** (ArgumentError) unable to get index of value: "a" in series of type: {:s, 64}
6871+
6872+
It will cast `value` when it is an `Explorer.Duration` struct to the same precision as `series`
6873+
when it's dtype is a duration.
6874+
6875+
iex> s = Explorer.Series.from_list([1, 2, 3], dtype: {:duration, :millisecond})
6876+
iex> Explorer.Series.index_of(s, %Explorer.Duration{value: 1000, precision: :microsecond})
6877+
0
6878+
6879+
"""
6880+
6881+
@doc type: :shape
6882+
@spec index_of(series :: Series.t(), value :: Explorer.Backend.Series.valid_types()) :: any()
6883+
def index_of(series, value) do
6884+
apply_series(series, :index_of, [value])
6885+
end
6886+
68526887
# Helpers
68536888

68546889
defp backend_from_options!(opts) do

native/explorer/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,14 @@ features = [
8080
"timezones",
8181
"to_dummies",
8282
"trigonometry",
83+
"object",
84+
"index_of",
85+
"search_sorted",
8386
]
8487

8588
[dependencies.polars-ops]
8689
version = "0.49"
87-
features = ["abs", "ewma", "cum_agg", "cov"]
90+
features = ["abs", "ewma", "cum_agg", "cov", "index_of"]
8891

8992
[features]
9093
default = ["ndjson", "cloud", "nif_version_2_15"]

native/explorer/src/series.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1962,3 +1962,18 @@ pub fn s_re_named_captures(s1: ExSeries, pattern: &str) -> Result<ExSeries, Expl
19621962

19631963
Ok(ExSeries::new(s2))
19641964
}
1965+
1966+
#[rustler::nif(schedule = "DirtyCpu")]
1967+
pub fn s_index_of(
1968+
env: Env,
1969+
series: ExSeries,
1970+
value_series: ExSeries,
1971+
) -> Result<Term, ExplorerError> {
1972+
let needle = value_series.first();
1973+
let idx_value = match index_of(&series, needle)? {
1974+
None => AnyValue::Null,
1975+
Some(idx) => AnyValue::UInt64(idx as u64),
1976+
};
1977+
1978+
encoding::resource_term_from_value(&series.resource, idx_value, env)
1979+
}

test/explorer/series_test.exs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6771,6 +6771,108 @@ defmodule Explorer.SeriesTest do
67716771
end
67726772
end
67736773

6774+
describe "index_of/2" do
6775+
test "gets index of element in series" do
6776+
series = Series.from_list([1, 2])
6777+
assert series |> Series.index_of(1) == 0
6778+
assert series |> Series.index_of(2) == 1
6779+
assert series |> Series.index_of(3) == nil
6780+
end
6781+
6782+
test "works with floats" do
6783+
series = Series.from_list([1.0, 2.0])
6784+
assert series |> Series.index_of(1.0) == 0
6785+
assert series |> Series.index_of(2.0) == 1
6786+
assert series |> Series.index_of(3.0) == nil
6787+
end
6788+
6789+
test "works with booleans" do
6790+
series = Series.from_list([false, true])
6791+
assert series |> Series.index_of(false) == 0
6792+
assert series |> Series.index_of(true) == 1
6793+
6794+
series = Series.from_list([false])
6795+
assert series |> Series.index_of(true) == nil
6796+
end
6797+
6798+
test "works with strings" do
6799+
series = Series.from_list(["a", "b"])
6800+
assert series |> Series.index_of("a") == 0
6801+
assert series |> Series.index_of("b") == 1
6802+
assert series |> Series.index_of("c") == nil
6803+
end
6804+
6805+
test "works with dates" do
6806+
series = Series.from_list([~D[2021-01-01], ~D[2021-01-02]])
6807+
assert series |> Series.index_of(~D[2021-01-01]) == 0
6808+
assert series |> Series.index_of(~D[2021-01-02]) == 1
6809+
assert series |> Series.index_of(~D[2021-01-03]) == nil
6810+
end
6811+
6812+
test "works with times" do
6813+
series = Series.from_list([~T[00:00:00.000001], ~T[00:00:01.000001]])
6814+
assert series |> Series.index_of(~T[00:00:00.000001]) == 0
6815+
assert series |> Series.index_of(~T[00:00:01.000001]) == 1
6816+
assert series |> Series.index_of(~T[00:00:02]) == nil
6817+
end
6818+
6819+
test "works with datetimes" do
6820+
series = Series.from_list([~N[2021-01-01 00:00:00.000001], ~N[2021-01-02 00:00:00.000001]])
6821+
assert series |> Series.index_of(~N[2021-01-01 00:00:00.000001]) == 0
6822+
assert series |> Series.index_of(~N[2021-01-02 00:00:00.000001]) == 1
6823+
assert series |> Series.index_of(~N[2021-01-03 00:00:00]) == nil
6824+
end
6825+
6826+
test "works with durations" do
6827+
series = Series.from_list([1, 2], dtype: {:duration, :millisecond})
6828+
one = %Explorer.Duration{value: 1000, precision: :microsecond}
6829+
two = %Explorer.Duration{value: 2, precision: :millisecond}
6830+
three = %Explorer.Duration{value: 3, precision: :millisecond}
6831+
6832+
assert series |> Series.index_of(one) == 0
6833+
assert series |> Series.index_of(two) == 1
6834+
assert series |> Series.index_of(three) == nil
6835+
end
6836+
6837+
test "works with decimal" do
6838+
series = Series.from_list([Decimal.new("1"), Decimal.new("2")])
6839+
6840+
assert series |> Series.index_of(Decimal.new("1")) == 0
6841+
assert series |> Series.index_of(Decimal.new("2")) == 1
6842+
assert series |> Series.index_of(Decimal.new("3")) == nil
6843+
end
6844+
6845+
test "duplicate values" do
6846+
series = Series.from_list([0, 0])
6847+
6848+
assert series |> Series.index_of(0) == 0
6849+
end
6850+
6851+
test "raises on type mismatch" do
6852+
assert_raise ArgumentError,
6853+
"unable to get index of value: \"a\" in series of type: {:s, 64}",
6854+
fn -> Series.index_of(Series.from_list([0]), "a") end
6855+
6856+
assert_raise ArgumentError,
6857+
"unable to get index of value: Decimal.new(\"0\") in series of type: {:s, 64}",
6858+
fn -> Series.index_of(Series.from_list([0]), Decimal.new("0")) end
6859+
6860+
assert_raise ArgumentError,
6861+
"unable to get index of value: ~N[2021-01-03 00:00:00] in series of type: {:s, 64}",
6862+
fn -> Series.index_of(Series.from_list([0]), ~N[2021-01-03 00:00:00]) end
6863+
6864+
one = %Explorer.Duration{value: 1, precision: :microsecond}
6865+
6866+
assert_raise ArgumentError,
6867+
"unable to get index of value: #Explorer.Duration[1us] in series of type: {:s, 64}",
6868+
fn -> Series.index_of(Series.from_list([1]), one) end
6869+
6870+
assert_raise ArgumentError,
6871+
"unable to get index of value: 1 in series of type: {:duration, :microsecond}",
6872+
fn -> Series.index_of(Series.from_list([one]), 1) end
6873+
end
6874+
end
6875+
67746876
defp all_close?(a, b, tol \\ 1.0e-8) do
67756877
Series.subtract(a, b)
67766878
|> Series.abs()

0 commit comments

Comments
 (0)