Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,8 @@ defmodule Explorer.Backend.LazySeries do
owner_import: 1,
owner_export: 1,
to_iovec: 1,
to_list: 1
to_list: 1,
index_of: 2
]

for {fun, arity} <- @remaining_non_lazy_operations do
Expand Down
2 changes: 2 additions & 0 deletions lib/explorer/backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@ defmodule Explorer.Backend.Series do
# Struct
@callback field(s, String.t()) :: s

@callback index_of(s, valid_types()) :: integer()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realised this should have a return type of integer() | nil not just integer(). I can make a new pr for it if you think it's necessary.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No that's ok, I'll just change it. Thanks for letting us know!


# Functions

@doc """
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ defmodule Explorer.PolarsBackend.Native do
def s_field(_s, _name), do: err()
def s_json_decode(_s, _dtype), do: err()
def s_json_path_match(_s, _json_path), do: err()
def s_index_of(_s, _v), do: err()

def message_on_gc(_pid, _payload), do: err()
def is_message_on_gc(_term), do: err()
Expand Down
26 changes: 26 additions & 0 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,32 @@ defmodule Explorer.PolarsBackend.Series do
Shared.apply_series(series, :s_re_named_captures, [pattern])
end

@impl true
def index_of(series, value) do
value_series =
try do
case {series.dtype, value} do
# cast value to duration of same type as series to ensure durations are correctly
# compared at the same precision
{{:duration, precision}, %Explorer.Duration{}} ->
Series.from_list([value]) |> cast({:duration, precision})

{{:duration, _}, _} ->
raise ArgumentError,
"unable to get index of value: #{inspect(value)} in series of type: #{inspect(series.dtype)}"

{dtype, _} ->
Series.from_list([value], dtype: dtype)
end
rescue
_ ->
raise ArgumentError,
"unable to get index of value: #{inspect(value)} in series of type: #{inspect(series.dtype)}"
end

Shared.apply_series(series, :s_index_of, [value_series.data])
end

# Polars specific functions

def name(series), do: Shared.apply_series(series, :s_name)
Expand Down
35 changes: 35 additions & 0 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6849,6 +6849,41 @@ defmodule Explorer.Series do
apply_series(series, :json_path_match, [json_path])
end

@doc """
Finds the index of the first value in a series.

## Examples

iex> s = Explorer.Series.from_list([1, 2, 3])
iex> Explorer.Series.index_of(s, 2)
1

iex> s = Explorer.Series.from_list([1, 2, 3])
iex> Explorer.Series.index_of(s, 4)
nil

This operation raises an `ArgumentError` when `value` is not compatible with the
series.

iex> s = Explorer.Series.from_list([1, 2, 3])
iex> Explorer.Series.index_of(s, "a")
** (ArgumentError) unable to get index of value: "a" in series of type: {:s, 64}

It will cast `value` when it is an `Explorer.Duration` struct to the same precision as `series`
when it's dtype is a duration.

iex> s = Explorer.Series.from_list([1, 2, 3], dtype: {:duration, :millisecond})
iex> Explorer.Series.index_of(s, %Explorer.Duration{value: 1000, precision: :microsecond})
0

"""

@doc type: :shape
@spec index_of(series :: Series.t(), value :: Explorer.Backend.Series.valid_types()) :: any()
def index_of(series, value) do
apply_series(series, :index_of, [value])
end

# Helpers

defp backend_from_options!(opts) do
Expand Down
5 changes: 4 additions & 1 deletion native/explorer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,14 @@ features = [
"timezones",
"to_dummies",
"trigonometry",
"object",
"index_of",
"search_sorted",
]

[dependencies.polars-ops]
version = "0.49"
features = ["abs", "ewma", "cum_agg", "cov"]
features = ["abs", "ewma", "cum_agg", "cov", "index_of"]

[features]
default = ["ndjson", "cloud", "nif_version_2_15"]
Expand Down
15 changes: 15 additions & 0 deletions native/explorer/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1962,3 +1962,18 @@ pub fn s_re_named_captures(s1: ExSeries, pattern: &str) -> Result<ExSeries, Expl

Ok(ExSeries::new(s2))
}

#[rustler::nif(schedule = "DirtyCpu")]
pub fn s_index_of(
env: Env,
series: ExSeries,
value_series: ExSeries,
) -> Result<Term, ExplorerError> {
let needle = value_series.first();
let idx_value = match index_of(&series, needle)? {
None => AnyValue::Null,
Some(idx) => AnyValue::UInt64(idx as u64),
};

encoding::resource_term_from_value(&series.resource, idx_value, env)
}
102 changes: 102 additions & 0 deletions test/explorer/series_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -6771,6 +6771,108 @@ defmodule Explorer.SeriesTest do
end
end

describe "index_of/2" do
test "gets index of element in series" do
series = Series.from_list([1, 2])
assert series |> Series.index_of(1) == 0
assert series |> Series.index_of(2) == 1
assert series |> Series.index_of(3) == nil
end

test "works with floats" do
series = Series.from_list([1.0, 2.0])
assert series |> Series.index_of(1.0) == 0
assert series |> Series.index_of(2.0) == 1
assert series |> Series.index_of(3.0) == nil
end

test "works with booleans" do
series = Series.from_list([false, true])
assert series |> Series.index_of(false) == 0
assert series |> Series.index_of(true) == 1

series = Series.from_list([false])
assert series |> Series.index_of(true) == nil
end

test "works with strings" do
series = Series.from_list(["a", "b"])
assert series |> Series.index_of("a") == 0
assert series |> Series.index_of("b") == 1
assert series |> Series.index_of("c") == nil
end

test "works with dates" do
series = Series.from_list([~D[2021-01-01], ~D[2021-01-02]])
assert series |> Series.index_of(~D[2021-01-01]) == 0
assert series |> Series.index_of(~D[2021-01-02]) == 1
assert series |> Series.index_of(~D[2021-01-03]) == nil
end

test "works with times" do
series = Series.from_list([~T[00:00:00.000001], ~T[00:00:01.000001]])
assert series |> Series.index_of(~T[00:00:00.000001]) == 0
assert series |> Series.index_of(~T[00:00:01.000001]) == 1
assert series |> Series.index_of(~T[00:00:02]) == nil
end

test "works with datetimes" do
series = Series.from_list([~N[2021-01-01 00:00:00.000001], ~N[2021-01-02 00:00:00.000001]])
assert series |> Series.index_of(~N[2021-01-01 00:00:00.000001]) == 0
assert series |> Series.index_of(~N[2021-01-02 00:00:00.000001]) == 1
assert series |> Series.index_of(~N[2021-01-03 00:00:00]) == nil
end

test "works with durations" do
series = Series.from_list([1, 2], dtype: {:duration, :millisecond})
one = %Explorer.Duration{value: 1000, precision: :microsecond}
two = %Explorer.Duration{value: 2, precision: :millisecond}
three = %Explorer.Duration{value: 3, precision: :millisecond}

assert series |> Series.index_of(one) == 0
assert series |> Series.index_of(two) == 1
assert series |> Series.index_of(three) == nil
end

test "works with decimal" do
series = Series.from_list([Decimal.new("1"), Decimal.new("2")])

assert series |> Series.index_of(Decimal.new("1")) == 0
assert series |> Series.index_of(Decimal.new("2")) == 1
assert series |> Series.index_of(Decimal.new("3")) == nil
end

test "duplicate values" do
series = Series.from_list([0, 0])

assert series |> Series.index_of(0) == 0
end

test "raises on type mismatch" do
assert_raise ArgumentError,
"unable to get index of value: \"a\" in series of type: {:s, 64}",
fn -> Series.index_of(Series.from_list([0]), "a") end

assert_raise ArgumentError,
"unable to get index of value: Decimal.new(\"0\") in series of type: {:s, 64}",
fn -> Series.index_of(Series.from_list([0]), Decimal.new("0")) end

assert_raise ArgumentError,
"unable to get index of value: ~N[2021-01-03 00:00:00] in series of type: {:s, 64}",
fn -> Series.index_of(Series.from_list([0]), ~N[2021-01-03 00:00:00]) end

one = %Explorer.Duration{value: 1, precision: :microsecond}

assert_raise ArgumentError,
"unable to get index of value: #Explorer.Duration[1us] in series of type: {:s, 64}",
fn -> Series.index_of(Series.from_list([1]), one) end

assert_raise ArgumentError,
"unable to get index of value: 1 in series of type: {:duration, :microsecond}",
fn -> Series.index_of(Series.from_list([one]), 1) end
end
end

defp all_close?(a, b, tol \\ 1.0e-8) do
Series.subtract(a, b)
|> Series.abs()
Expand Down
Loading