Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 57 additions & 0 deletions e2e_test/batch/functions/casefold.slt.part
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
query T
SELECT casefold('Hello World');
----
hello world

query T
SELECT casefold('HELLO RUST');
----
hello rust

# ß folds to ss (full case folding)
query T
SELECT casefold('Straße');
----
strasse

# Empty string
query T
SELECT casefold('');
----
(empty)

# NULL input
query T
SELECT casefold(NULL::varchar);
----
NULL

# ASCII-only input
query T
SELECT casefold('ABC123xyz');
----
abc123xyz

# Already lowercase
query T
SELECT casefold('already lowercase');
----
already lowercase

# Mixed with numbers and symbols
query T
SELECT casefold('Test@123#ABC');
----
test@123#abc

# Latin ligatures: fi folds to fi
query T
SELECT casefold('find');
----
find

# Capital sharp S also folds to ss
query T
SELECT casefold('GROẞE');
----
grosse
1 change: 1 addition & 0 deletions proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ message ExprNode {
TRIM_SCALE = 278;
GAMMA = 288;
LGAMMA = 289;
CASEFOLD = 291;

// Boolean comparison
IS_TRUE = 301;
Expand Down
1 change: 1 addition & 0 deletions src/expr/impl/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = ["time"] }
# For arrow-udf-runtime/remote
tonic = { version = "0.12.3", optional = true }
tracing = "0.1"
unicode-casefold = "0.2"
zstd = { version = "0.13", default-features = false, optional = true }

[target.'cfg(not(madsim))'.dependencies]
Expand Down
69 changes: 69 additions & 0 deletions src/expr/impl/src/scalar/casefold.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2023 RisingWave Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use risingwave_expr::function;
use unicode_casefold::UnicodeCaseFold;

/// Unicode case folding for case-insensitive comparison.
///
/// Unlike `lower()`, `casefold()` handles multi-character expansions
/// (e.g., 'ß' → "ss") and normalizes all case variants to a single form
/// (e.g., 'Σ', 'σ', 'ς' all fold to 'σ').
///
/// ```slt
/// query T
/// SELECT casefold('Hello World');
/// ----
/// hello world
///
/// query T
/// SELECT casefold('Straße');
/// ----
/// strasse
///
/// query T
/// SELECT casefold(NULL::varchar);
/// ----
/// NULL
/// ```
#[function("casefold(varchar) -> varchar")]
fn casefold(s: &str, writer: &mut impl std::fmt::Write) {
for c in s.case_fold() {
writer.write_char(c).unwrap();
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_casefold() {
let cases = [
("Hello World", "hello world"),
("HELLO RUST", "hello rust"),
// ß folds to ss
("Straße", "strasse"),
// Final sigma (ς) and capital sigma (Σ) both fold to σ
("ΣΊΣΥΦΟΣ", "σίσυφοσ"),
("", ""),
];

for (input, expected) in cases {
let mut writer = String::new();
casefold(input, &mut writer);
assert_eq!(writer, expected, "casefold({:?})", input);
}
}
}
1 change: 1 addition & 0 deletions src/expr/impl/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ mod bitwise_op;
mod bytea_bits;
mod cardinality;
mod case;
mod casefold;
mod cast;
mod cmp;
mod coalesce;
Expand Down
1 change: 1 addition & 0 deletions src/frontend/src/binder/expr/function/builtin_scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ impl Binder {
("length", raw_call(ExprType::Length)),
("upper", raw_call(ExprType::Upper)),
("lower", raw_call(ExprType::Lower)),
("casefold", raw_call(ExprType::Casefold)),
("trim", raw_call(ExprType::Trim)),
("replace", raw_call(ExprType::Replace)),
("overlay", raw_call(ExprType::Overlay)),
Expand Down
1 change: 1 addition & 0 deletions src/frontend/src/expr/pure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ impl ExprVisitor for ImpureAnalyzer {
| Type::SimilarToEscape
| Type::Upper
| Type::Lower
| Type::Casefold
| Type::Trim
| Type::Replace
| Type::Position
Expand Down
1 change: 1 addition & 0 deletions src/frontend/src/optimizer/plan_expr_visitor/strong.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ impl Strong {
| ExprType::SimilarToEscape
| ExprType::Upper
| ExprType::Lower
| ExprType::Casefold
| ExprType::Replace
| ExprType::Position
| ExprType::Case
Expand Down