Skip to content

Commit dfaceb7

Browse files
authored
feat: string casing functions (#6096)
## Changes Made Use [heck](https://crates.io/crates/heck) crate to convert cols and text between string casings. ## Related Issues Closes #2550.
1 parent 1df253b commit dfaceb7

File tree

10 files changed

+326
-2
lines changed

10 files changed

+326
-2
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

daft/expressions/expressions.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,6 +1892,76 @@ def capitalize(self) -> Expression:
18921892

18931893
return capitalize(self)
18941894

1895+
def to_camel_case(self) -> Expression:
1896+
"""Convert a string to lower camel case.
1897+
1898+
Tip: See Also
1899+
[`daft.functions.to_camel_case`](https://docs.daft.ai/en/stable/api/functions/to_camel_case/)
1900+
"""
1901+
from daft.functions import to_camel_case
1902+
1903+
return to_camel_case(self)
1904+
1905+
def to_upper_camel_case(self) -> Expression:
1906+
"""Convert a string to upper camel case.
1907+
1908+
Tip: See Also
1909+
[`daft.functions.to_upper_camel_case`](https://docs.daft.ai/en/stable/api/functions/to_upper_camel_case/)
1910+
"""
1911+
from daft.functions import to_upper_camel_case
1912+
1913+
return to_upper_camel_case(self)
1914+
1915+
def to_snake_case(self) -> Expression:
1916+
"""Convert a string to snake case.
1917+
1918+
Tip: See Also
1919+
[`daft.functions.to_snake_case`](https://docs.daft.ai/en/stable/api/functions/to_snake_case/)
1920+
"""
1921+
from daft.functions import to_snake_case
1922+
1923+
return to_snake_case(self)
1924+
1925+
def to_upper_snake_case(self) -> Expression:
1926+
"""Convert a string to upper snake case.
1927+
1928+
Tip: See Also
1929+
[`daft.functions.to_upper_snake_case`](https://docs.daft.ai/en/stable/api/functions/to_upper_snake_case/)
1930+
"""
1931+
from daft.functions import to_upper_snake_case
1932+
1933+
return to_upper_snake_case(self)
1934+
1935+
def to_kebab_case(self) -> Expression:
1936+
"""Convert a string to kebab case.
1937+
1938+
Tip: See Also
1939+
[`daft.functions.to_kebab_case`](https://docs.daft.ai/en/stable/api/functions/to_kebab_case/)
1940+
"""
1941+
from daft.functions import to_kebab_case
1942+
1943+
return to_kebab_case(self)
1944+
1945+
def to_upper_kebab_case(self) -> Expression:
1946+
"""Convert a string to upper kebab case.
1947+
1948+
Tip: See Also
1949+
[`daft.functions.to_upper_kebab_case`](https://docs.daft.ai/en/stable/api/functions/to_upper_kebab_case/)
1950+
"""
1951+
from daft.functions import to_upper_kebab_case
1952+
1953+
return to_upper_kebab_case(self)
1954+
1955+
def to_title_case(self) -> Expression:
1956+
"""Convert a string to title case.
1957+
1958+
Tip: See Also
1959+
[`daft.functions.to_title_case`](https://docs.daft.ai/en/stable/api/functions/to_title_case/)
1960+
"""
1961+
from daft.functions import to_title_case
1962+
1963+
return to_title_case(self)
1964+
18951965
def left(self, nchars: int | Expression) -> Expression:
18961966
"""Gets the n (from nchars) left-most characters of each string.
18971967

daft/functions/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,13 @@
188188
rstrip,
189189
reverse,
190190
capitalize,
191+
to_camel_case,
192+
to_upper_camel_case,
193+
to_snake_case,
194+
to_upper_snake_case,
195+
to_kebab_case,
196+
to_upper_kebab_case,
197+
to_title_case,
191198
left,
192199
right,
193200
rpad,
@@ -421,11 +428,18 @@
421428
"tan",
422429
"tanh",
423430
"time",
431+
"to_camel_case",
424432
"to_date",
425433
"to_datetime",
434+
"to_kebab_case",
426435
"to_list",
436+
"to_snake_case",
427437
"to_struct",
438+
"to_title_case",
428439
"to_unix_epoch",
440+
"to_upper_camel_case",
441+
"to_upper_kebab_case",
442+
"to_upper_snake_case",
429443
"tokenize_decode",
430444
"tokenize_encode",
431445
"total_days",

daft/functions/str.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,69 @@ def capitalize(expr: Expression) -> Expression:
396396
return Expression._call_builtin_scalar_fn("capitalize", expr)
397397

398398

399+
def to_camel_case(expr: Expression) -> Expression:
400+
"""Convert a string to lower camel case.
401+
402+
Returns:
403+
Expression: a String expression converted to lower camel case
404+
"""
405+
return Expression._call_builtin_scalar_fn("to_camel_case", expr)
406+
407+
408+
def to_upper_camel_case(expr: Expression) -> Expression:
409+
"""Convert a string to upper camel case.
410+
411+
Returns:
412+
Expression: a String expression converted to upper camel case
413+
"""
414+
return Expression._call_builtin_scalar_fn("to_upper_camel_case", expr)
415+
416+
417+
def to_snake_case(expr: Expression) -> Expression:
418+
"""Convert a string to snake case.
419+
420+
Returns:
421+
Expression: a String expression converted to snake case
422+
"""
423+
return Expression._call_builtin_scalar_fn("to_snake_case", expr)
424+
425+
426+
def to_upper_snake_case(expr: Expression) -> Expression:
427+
"""Convert a string to upper snake case.
428+
429+
Returns:
430+
Expression: a String expression converted to upper snake case
431+
"""
432+
return Expression._call_builtin_scalar_fn("to_upper_snake_case", expr)
433+
434+
435+
def to_kebab_case(expr: Expression) -> Expression:
436+
"""Convert a string to kebab case.
437+
438+
Returns:
439+
Expression: a String expression converted to kebab case
440+
"""
441+
return Expression._call_builtin_scalar_fn("to_kebab_case", expr)
442+
443+
444+
def to_upper_kebab_case(expr: Expression) -> Expression:
445+
"""Convert a string to upper kebab case.
446+
447+
Returns:
448+
Expression: a String expression converted to upper kebab case
449+
"""
450+
return Expression._call_builtin_scalar_fn("to_upper_kebab_case", expr)
451+
452+
453+
def to_title_case(expr: Expression) -> Expression:
454+
"""Convert a string to title case.
455+
456+
Returns:
457+
Expression: a String expression converted to title case
458+
"""
459+
return Expression._call_builtin_scalar_fn("to_title_case", expr)
460+
461+
399462
def left(expr: Expression, nchars: int | Expression) -> Expression:
400463
"""Gets the n (from nchars) left-most characters of each string.
401464

daft/series.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,27 @@ def reverse(self) -> Series:
936936
def capitalize(self) -> Series:
937937
return self._eval_expressions("capitalize")
938938

939+
def to_camel_case(self) -> Series:
940+
return self._eval_expressions("to_camel_case")
941+
942+
def to_upper_camel_case(self) -> Series:
943+
return self._eval_expressions("to_upper_camel_case")
944+
945+
def to_snake_case(self) -> Series:
946+
return self._eval_expressions("to_snake_case")
947+
948+
def to_upper_snake_case(self) -> Series:
949+
return self._eval_expressions("to_upper_snake_case")
950+
951+
def to_kebab_case(self) -> Series:
952+
return self._eval_expressions("to_kebab_case")
953+
954+
def to_upper_kebab_case(self) -> Series:
955+
return self._eval_expressions("to_upper_kebab_case")
956+
957+
def to_title_case(self) -> Series:
958+
return self._eval_expressions("to_title_case")
959+
939960
def left(self, nchars: Series) -> Series:
940961
return self._eval_expressions("left", nchars)
941962

src/daft-functions-utf8/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ chrono-tz = {workspace = true}
77
common-error = {path = "../common/error", default-features = false}
88
daft-core = {path = "../daft-core", default-features = false}
99
daft-dsl = {path = "../daft-dsl", default-features = false}
10+
heck = "0.5.0"
1011
itertools = {workspace = true}
1112
num-traits = {workspace = true}
1213
regex = {workspace = true}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
use common_error::DaftResult;
2+
use daft_core::{
3+
prelude::{DataType, Field, Schema},
4+
series::{IntoSeries, Series},
5+
};
6+
use daft_dsl::{
7+
ExprRef,
8+
functions::{FunctionArgs, ScalarUDF, scalar::ScalarFn},
9+
};
10+
use heck::{
11+
ToKebabCase, ToLowerCamelCase, ToShoutyKebabCase, ToShoutySnakeCase, ToSnakeCase, ToTitleCase,
12+
ToUpperCamelCase,
13+
};
14+
use serde::{Deserialize, Serialize};
15+
16+
use crate::utils::{Utf8ArrayUtils, unary_utf8_evaluate, unary_utf8_to_field};
17+
18+
macro_rules! define_case_udf {
19+
($struct:ident, $fn_name:ident, $method:ident, $docstring:literal) => {
20+
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
21+
pub struct $struct;
22+
23+
#[typetag::serde]
24+
impl ScalarUDF for $struct {
25+
fn name(&self) -> &'static str {
26+
stringify!($fn_name)
27+
}
28+
29+
fn call(&self, inputs: FunctionArgs<Series>) -> DaftResult<Series> {
30+
unary_utf8_evaluate(inputs, |s| {
31+
s.with_utf8_array(|arr| {
32+
Ok(arr
33+
.unary_broadcasted_op(|val| val.$method().into())?
34+
.into_series())
35+
})
36+
})
37+
}
38+
39+
fn get_return_field(
40+
&self,
41+
inputs: FunctionArgs<ExprRef>,
42+
schema: &Schema,
43+
) -> DaftResult<Field> {
44+
unary_utf8_to_field(inputs, schema, self.name(), DataType::Utf8)
45+
}
46+
47+
fn docstring(&self) -> &'static str {
48+
$docstring
49+
}
50+
}
51+
52+
#[must_use]
53+
pub fn $fn_name(input: ExprRef) -> ExprRef {
54+
ScalarFn::builtin($struct, vec![input]).into()
55+
}
56+
};
57+
}
58+
59+
define_case_udf!(
60+
CamelCase,
61+
to_camel_case,
62+
to_lower_camel_case,
63+
"Converts a string to lower camel case."
64+
);
65+
define_case_udf!(
66+
UpperCamelCase,
67+
to_upper_camel_case,
68+
to_upper_camel_case,
69+
"Converts a string to upper camel case."
70+
);
71+
define_case_udf!(
72+
SnakeCase,
73+
to_snake_case,
74+
to_snake_case,
75+
"Converts a string to snake case."
76+
);
77+
define_case_udf!(
78+
UpperSnakeCase,
79+
to_upper_snake_case,
80+
to_shouty_snake_case,
81+
"Converts a string to upper snake case."
82+
);
83+
define_case_udf!(
84+
KebabCase,
85+
to_kebab_case,
86+
to_kebab_case,
87+
"Converts a string to kebab case."
88+
);
89+
define_case_udf!(
90+
UpperKebabCase,
91+
to_upper_kebab_case,
92+
to_shouty_kebab_case,
93+
"Converts a string to upper kebab case."
94+
);
95+
define_case_udf!(
96+
TitleCase,
97+
to_title_case,
98+
to_title_case,
99+
"Converts a string to title case."
100+
);

src/daft-functions-utf8/src/lib.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#![allow(deprecated, reason = "arrow2 migration")]
22

33
mod capitalize;
4+
mod case;
45
mod contains;
56
mod count_matches;
67
mod endswith;
@@ -33,6 +34,7 @@ mod upper;
3334
pub(crate) mod utils;
3435

3536
pub use capitalize::*;
37+
pub use case::*;
3638
pub use contains::*;
3739
pub use count_matches::*;
3840
pub use endswith::*;
@@ -66,12 +68,14 @@ pub struct Utf8Functions;
6668

6769
impl daft_dsl::functions::FunctionModule for Utf8Functions {
6870
fn register(parent: &mut daft_dsl::functions::FunctionRegistry) {
71+
parent.add_fn(CamelCase);
6972
parent.add_fn(Capitalize);
7073
parent.add_fn(Contains);
7174
parent.add_fn(CountMatches);
7275
parent.add_fn(EndsWith);
7376
parent.add_fn(Find);
7477
parent.add_fn(ILike);
78+
parent.add_fn(KebabCase);
7579
parent.add_fn(Left);
7680
parent.add_fn(LengthBytes);
7781
parent.add_fn(Like);
@@ -83,19 +87,24 @@ impl daft_dsl::functions::FunctionModule for Utf8Functions {
8387
parent.add_fn(RegexpExtract);
8488
parent.add_fn(RegexpExtractAll);
8589
parent.add_fn(RegexpMatch);
90+
parent.add_fn(RegexpReplace);
91+
parent.add_fn(RegexpSplit);
8692
parent.add_fn(Repeat);
8793
parent.add_fn(Replace);
88-
parent.add_fn(RegexpReplace);
8994
parent.add_fn(Reverse);
9095
parent.add_fn(Right);
9196
parent.add_fn(RPad);
9297
parent.add_fn(RStrip);
98+
parent.add_fn(SnakeCase);
9399
parent.add_fn(Split);
94-
parent.add_fn(RegexpSplit);
95100
parent.add_fn(StartsWith);
96101
parent.add_fn(Substr);
102+
parent.add_fn(TitleCase);
97103
parent.add_fn(ToDate);
98104
parent.add_fn(ToDatetime);
99105
parent.add_fn(Upper);
106+
parent.add_fn(UpperCamelCase);
107+
parent.add_fn(UpperKebabCase);
108+
parent.add_fn(UpperSnakeCase);
100109
}
101110
}

0 commit comments

Comments
 (0)