Skip to content

Commit daf517c

Browse files
authored
Unicode: Add methods to normalize and order unicode strings (#1)
* Unicode: Add methods for normalizing and ordering unicode strings. * Unicode: Update docs. * Bump version.
1 parent 3eac638 commit daf517c

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ description = "Utils for Quake strings and characters."
44
keywords = ["quake", "quakeworld", "strings"]
55
repository = "https://github.com/vikpe/quake_text"
66
authors = ["Viktor Persson <viktor.persson@arcsin.se>"]
7-
version = "0.2.0"
7+
version = "0.3.0"
88
edition = "2021"
99
license = "MIT"
1010
include = [

src/unicode.rs

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/// unicode
22
use crate::bytestr;
3+
use std::cmp::Ordering;
34

45
/// Convert unicode to ascii
56
///
@@ -51,7 +52,7 @@ pub fn to_bytestr(value: &str) -> Vec<u8> {
5152
value.chars().map(|c| c as u8).collect::<Vec<u8>>()
5253
}
5354

54-
/// Sort array of unicode strings (ascending, case insensitive)
55+
/// Sort an array of unicode strings (ascending, case insensitive)
5556
///
5657
/// # Example
5758
///
@@ -62,10 +63,35 @@ pub fn to_bytestr(value: &str) -> Vec<u8> {
6263
/// ```
6364
pub fn sort(values: &[String]) -> Vec<String> {
6465
let mut values = values.to_vec();
65-
values.sort_by_cached_key(|v| to_utf8(v).to_lowercase());
66+
values.sort_by_cached_key(|v| normalize(v));
6667
values
6768
}
6869

70+
/// Normalize a unicode string (convert to case-insensitive utf8)
71+
///
72+
/// # Example
73+
///
74+
/// ```
75+
/// use quake_text::unicode::normalize;
76+
/// assert_eq!(normalize("BÏÏM"), "boom");
77+
/// ```
78+
pub fn normalize(value: &str) -> String {
79+
to_utf8(value).to_lowercase()
80+
}
81+
82+
/// Order unicode strings
83+
/// # Example
84+
///
85+
/// ```
86+
/// use std::cmp::Ordering;
87+
/// use quake_text::unicode::ord;
88+
/// assert_eq!(ord("BÏÏM", "boom"), Ordering::Equal);
89+
/// assert_eq!(ord("áøå1", "axe2"), Ordering::Less);
90+
/// ```
91+
pub fn ord(a: &str, b: &str) -> Ordering {
92+
normalize(a).cmp(&normalize(b))
93+
}
94+
6995
#[cfg(test)]
7096
mod tests {
7197
use pretty_assertions::assert_eq;
@@ -86,4 +112,23 @@ mod tests {
86112
let mixed_chars = (28..=40).map(char::from).collect::<String>();
87113
assert_eq!(to_utf8(&mixed_chars), "• !\"#$%&'(");
88114
}
115+
116+
#[test]
117+
fn test_ord() {
118+
let values = vec![
119+
"BÏÏM0".to_string(),
120+
"Axe2".to_string(),
121+
"bÏÏm1".to_string(),
122+
"áøå1".to_string(),
123+
];
124+
assert_eq!(
125+
sort(&values),
126+
vec![
127+
"áøå1".to_string(),
128+
"Axe2".to_string(),
129+
"BÏÏM0".to_string(),
130+
"bÏÏm1".to_string(),
131+
]
132+
);
133+
}
89134
}

0 commit comments

Comments
 (0)