11/// unicode
22use crate :: bytestr;
3+ use std:: cmp:: Ordering ;
34
45/// Convert unicode to ascii
56///
@@ -51,7 +52,7 @@ pub fn to_bytestr(value: &str) -> Vec<u8> {
5152 value. chars ( ) . map ( |c| c as u8 ) . collect :: < Vec < u8 > > ( )
5253}
5354
54- /// Sort array of unicode strings (ascending, case insensitive)
55+ /// Sort an array of unicode strings (ascending, case insensitive)
5556///
5657/// # Example
5758///
@@ -62,10 +63,35 @@ pub fn to_bytestr(value: &str) -> Vec<u8> {
6263/// ```
6364pub fn sort ( values : & [ String ] ) -> Vec < String > {
6465 let mut values = values. to_vec ( ) ;
65- values. sort_by_cached_key ( |v| to_utf8 ( v ) . to_lowercase ( ) ) ;
66+ values. sort_by_cached_key ( |v| normalize ( v ) ) ;
6667 values
6768}
6869
70+ /// Normalize a unicode string (convert to case-insensitive utf8)
71+ ///
72+ /// # Example
73+ ///
74+ /// ```
75+ /// use quake_text::unicode::normalize;
76+ /// assert_eq!(normalize("BÏÏM"), "boom");
77+ /// ```
78+ pub fn normalize ( value : & str ) -> String {
79+ to_utf8 ( value) . to_lowercase ( )
80+ }
81+
82+ /// Order unicode strings
83+ /// # Example
84+ ///
85+ /// ```
86+ /// use std::cmp::Ordering;
87+ /// use quake_text::unicode::ord;
88+ /// assert_eq!(ord("BÏÏM", "boom"), Ordering::Equal);
89+ /// assert_eq!(ord("áøå1", "axe2"), Ordering::Less);
90+ /// ```
91+ pub fn ord ( a : & str , b : & str ) -> Ordering {
92+ normalize ( a) . cmp ( & normalize ( b) )
93+ }
94+
6995#[ cfg( test) ]
7096mod tests {
7197 use pretty_assertions:: assert_eq;
@@ -86,4 +112,23 @@ mod tests {
86112 let mixed_chars = ( 28 ..=40 ) . map ( char:: from) . collect :: < String > ( ) ;
87113 assert_eq ! ( to_utf8( & mixed_chars) , "• !\" #$%&'(" ) ;
88114 }
115+
116+ #[ test]
117+ fn test_ord ( ) {
118+ let values = vec ! [
119+ "BÏÏM0" . to_string( ) ,
120+ "Axe2" . to_string( ) ,
121+ "bÏÏm1" . to_string( ) ,
122+ "áøå1" . to_string( ) ,
123+ ] ;
124+ assert_eq ! (
125+ sort( & values) ,
126+ vec![
127+ "áøå1" . to_string( ) ,
128+ "Axe2" . to_string( ) ,
129+ "BÏÏM0" . to_string( ) ,
130+ "bÏÏm1" . to_string( ) ,
131+ ]
132+ ) ;
133+ }
89134}
0 commit comments