Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 70 additions & 0 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1827,6 +1827,76 @@ def capitalize(self) -> Expression:

return capitalize(self)

def to_camel_case(self) -> Expression:
"""Convert a string to lower camel case.

Tip: See Also
[`daft.functions.to_camel_case`](https://docs.daft.ai/en/stable/api/functions/to_camel_case/)
"""
from daft.functions import to_camel_case

return to_camel_case(self)

def to_upper_camel_case(self) -> Expression:
"""Convert a string to upper camel case.

Tip: See Also
[`daft.functions.to_upper_camel_case`](https://docs.daft.ai/en/stable/api/functions/to_upper_camel_case/)
"""
from daft.functions import to_upper_camel_case

return to_upper_camel_case(self)

def to_snake_case(self) -> Expression:
"""Convert a string to snake case.

Tip: See Also
[`daft.functions.to_snake_case`](https://docs.daft.ai/en/stable/api/functions/to_snake_case/)
"""
from daft.functions import to_snake_case

return to_snake_case(self)

def to_upper_snake_case(self) -> Expression:
"""Convert a string to upper snake case.

Tip: See Also
[`daft.functions.to_upper_snake_case`](https://docs.daft.ai/en/stable/api/functions/to_upper_snake_case/)
"""
from daft.functions import to_upper_snake_case

return to_upper_snake_case(self)

def to_kebab_case(self) -> Expression:
"""Convert a string to kebab case.

Tip: See Also
[`daft.functions.to_kebab_case`](https://docs.daft.ai/en/stable/api/functions/to_kebab_case/)
"""
from daft.functions import to_kebab_case

return to_kebab_case(self)

def to_upper_kebab_case(self) -> Expression:
"""Convert a string to upper kebab case.

Tip: See Also
[`daft.functions.to_upper_kebab_case`](https://docs.daft.ai/en/stable/api/functions/to_upper_kebab_case/)
"""
from daft.functions import to_upper_kebab_case

return to_upper_kebab_case(self)

def to_title_case(self) -> Expression:
"""Convert a string to title case.

Tip: See Also
[`daft.functions.to_title_case`](https://docs.daft.ai/en/stable/api/functions/to_title_case/)
"""
from daft.functions import to_title_case

return to_title_case(self)

def left(self, nchars: int | Expression) -> Expression:
"""Gets the n (from nchars) left-most characters of each string.

Expand Down
14 changes: 14 additions & 0 deletions daft/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,13 @@
rstrip,
reverse,
capitalize,
to_camel_case,
to_upper_camel_case,
to_snake_case,
to_upper_snake_case,
to_kebab_case,
to_upper_kebab_case,
to_title_case,
left,
right,
rpad,
Expand Down Expand Up @@ -408,11 +415,18 @@
"tan",
"tanh",
"time",
"to_camel_case",
"to_date",
"to_datetime",
"to_kebab_case",
"to_list",
"to_snake_case",
"to_struct",
"to_title_case",
"to_unix_epoch",
"to_upper_camel_case",
"to_upper_kebab_case",
"to_upper_snake_case",
"tokenize_decode",
"tokenize_encode",
"total_days",
Expand Down
63 changes: 63 additions & 0 deletions daft/functions/str.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,69 @@ def capitalize(expr: Expression) -> Expression:
return Expression._call_builtin_scalar_fn("capitalize", expr)


def to_camel_case(expr: Expression) -> Expression:
"""Convert a string to lower camel case.

Returns:
Expression: a String expression converted to lower camel case
"""
return Expression._call_builtin_scalar_fn("to_camel_case", expr)


def to_upper_camel_case(expr: Expression) -> Expression:
"""Convert a string to upper camel case.

Returns:
Expression: a String expression converted to upper camel case
"""
return Expression._call_builtin_scalar_fn("to_upper_camel_case", expr)


def to_snake_case(expr: Expression) -> Expression:
"""Convert a string to snake case.

Returns:
Expression: a String expression converted to snake case
"""
return Expression._call_builtin_scalar_fn("to_snake_case", expr)


def to_upper_snake_case(expr: Expression) -> Expression:
"""Convert a string to upper snake case.

Returns:
Expression: a String expression converted to upper snake case
"""
return Expression._call_builtin_scalar_fn("to_upper_snake_case", expr)


def to_kebab_case(expr: Expression) -> Expression:
"""Convert a string to kebab case.

Returns:
Expression: a String expression converted to kebab case
"""
return Expression._call_builtin_scalar_fn("to_kebab_case", expr)


def to_upper_kebab_case(expr: Expression) -> Expression:
"""Convert a string to upper kebab case.

Returns:
Expression: a String expression converted to upper kebab case
"""
return Expression._call_builtin_scalar_fn("to_upper_kebab_case", expr)


def to_title_case(expr: Expression) -> Expression:
"""Convert a string to title case.

Returns:
Expression: a String expression converted to title case
"""
return Expression._call_builtin_scalar_fn("to_title_case", expr)


def left(expr: Expression, nchars: int | Expression) -> Expression:
"""Gets the n (from nchars) left-most characters of each string.

Expand Down
21 changes: 21 additions & 0 deletions daft/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,27 @@ def reverse(self) -> Series:
def capitalize(self) -> Series:
return self._eval_expressions("capitalize")

def to_camel_case(self) -> Series:
return self._eval_expressions("to_camel_case")

def to_upper_camel_case(self) -> Series:
return self._eval_expressions("to_upper_camel_case")

def to_snake_case(self) -> Series:
return self._eval_expressions("to_snake_case")

def to_upper_snake_case(self) -> Series:
return self._eval_expressions("to_upper_snake_case")

def to_kebab_case(self) -> Series:
return self._eval_expressions("to_kebab_case")

def to_upper_kebab_case(self) -> Series:
return self._eval_expressions("to_upper_kebab_case")

def to_title_case(self) -> Series:
return self._eval_expressions("to_title_case")

def left(self, nchars: Series) -> Series:
return self._eval_expressions("left", nchars)

Expand Down
1 change: 1 addition & 0 deletions src/daft-functions-utf8/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ chrono-tz = {workspace = true}
common-error = {path = "../common/error", default-features = false}
daft-core = {path = "../daft-core", default-features = false}
daft-dsl = {path = "../daft-dsl", default-features = false}
heck = "0.5.0"
itertools = {workspace = true}
num-traits = {workspace = true}
regex = {workspace = true}
Expand Down
100 changes: 100 additions & 0 deletions src/daft-functions-utf8/src/case.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
use common_error::DaftResult;
use daft_core::{
prelude::{DataType, Field, Schema},
series::{IntoSeries, Series},
};
use daft_dsl::{
ExprRef,
functions::{FunctionArgs, ScalarUDF, scalar::ScalarFn},
};
use heck::{
ToKebabCase, ToLowerCamelCase, ToShoutyKebabCase, ToShoutySnakeCase, ToSnakeCase, ToTitleCase,
ToUpperCamelCase,
};
use serde::{Deserialize, Serialize};

use crate::utils::{Utf8ArrayUtils, unary_utf8_evaluate, unary_utf8_to_field};

macro_rules! define_case_udf {
($struct:ident, $fn_name:ident, $method:ident, $docstring:literal) => {
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct $struct;

#[typetag::serde]
impl ScalarUDF for $struct {
fn name(&self) -> &'static str {
stringify!($fn_name)
}

fn call(&self, inputs: FunctionArgs<Series>) -> DaftResult<Series> {
unary_utf8_evaluate(inputs, |s| {
s.with_utf8_array(|arr| {
Ok(arr
.unary_broadcasted_op(|val| val.$method().into())?
.into_series())
})
})
}

fn get_return_field(
&self,
inputs: FunctionArgs<ExprRef>,
schema: &Schema,
) -> DaftResult<Field> {
unary_utf8_to_field(inputs, schema, self.name(), DataType::Utf8)
}

fn docstring(&self) -> &'static str {
$docstring
}
}

#[must_use]
pub fn $fn_name(input: ExprRef) -> ExprRef {
ScalarFn::builtin($struct, vec![input]).into()
}
};
}

define_case_udf!(
CamelCase,
to_camel_case,
to_lower_camel_case,
"Converts a string to lower camel case."
);
define_case_udf!(
UpperCamelCase,
to_upper_camel_case,
to_upper_camel_case,
"Converts a string to upper camel case."
);
define_case_udf!(
SnakeCase,
to_snake_case,
to_snake_case,
"Converts a string to snake case."
);
define_case_udf!(
UpperSnakeCase,
to_upper_snake_case,
to_shouty_snake_case,
"Converts a string to upper snake case."
);
define_case_udf!(
KebabCase,
to_kebab_case,
to_kebab_case,
"Converts a string to kebab case."
);
define_case_udf!(
UpperKebabCase,
to_upper_kebab_case,
to_shouty_kebab_case,
"Converts a string to upper kebab case."
);
define_case_udf!(
TitleCase,
to_title_case,
to_title_case,
"Converts a string to title case."
);
13 changes: 11 additions & 2 deletions src/daft-functions-utf8/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![allow(deprecated, reason = "arrow2 migration")]

mod capitalize;
mod case;
mod contains;
mod count_matches;
mod endswith;
Expand Down Expand Up @@ -33,6 +34,7 @@ mod upper;
pub(crate) mod utils;

pub use capitalize::*;
pub use case::*;
pub use contains::*;
pub use count_matches::*;
pub use endswith::*;
Expand Down Expand Up @@ -66,12 +68,14 @@ pub struct Utf8Functions;

impl daft_dsl::functions::FunctionModule for Utf8Functions {
fn register(parent: &mut daft_dsl::functions::FunctionRegistry) {
parent.add_fn(CamelCase);
parent.add_fn(Capitalize);
parent.add_fn(Contains);
parent.add_fn(CountMatches);
parent.add_fn(EndsWith);
parent.add_fn(Find);
parent.add_fn(ILike);
parent.add_fn(KebabCase);
parent.add_fn(Left);
parent.add_fn(LengthBytes);
parent.add_fn(Like);
Expand All @@ -83,19 +87,24 @@ impl daft_dsl::functions::FunctionModule for Utf8Functions {
parent.add_fn(RegexpExtract);
parent.add_fn(RegexpExtractAll);
parent.add_fn(RegexpMatch);
parent.add_fn(RegexpReplace);
parent.add_fn(RegexpSplit);
parent.add_fn(Repeat);
parent.add_fn(Replace);
parent.add_fn(RegexpReplace);
parent.add_fn(Reverse);
parent.add_fn(Right);
parent.add_fn(RPad);
parent.add_fn(RStrip);
parent.add_fn(SnakeCase);
parent.add_fn(Split);
parent.add_fn(RegexpSplit);
parent.add_fn(StartsWith);
parent.add_fn(Substr);
parent.add_fn(TitleCase);
parent.add_fn(ToDate);
parent.add_fn(ToDatetime);
parent.add_fn(Upper);
parent.add_fn(UpperCamelCase);
parent.add_fn(UpperKebabCase);
parent.add_fn(UpperSnakeCase);
}
}
Loading
Loading