Skip to content

Commit 85b2334

Browse files
authored
fix censor behavior for japanese diactritics (#47)
1 parent db5c16a commit 85b2334

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

src/censor.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,12 @@ impl<I: Iterator<Item = char>> Censor<I> {
164164
fn filter_char(c: &char) -> bool {
165165
use finl_unicode::categories::{CharacterCategories, MinorCategory};
166166
let category = c.get_minor_category();
167+
// Preserve Japanese dakuten/handakuten so kana aren't turned into their unvoiced forms.
168+
let preserve_japanese = matches!(*c, '\u{3099}' | '\u{309A}');
167169
let nok = matches!(
168170
category,
169171
MinorCategory::Cn | MinorCategory::Co | MinorCategory::Mn
170-
);
172+
) && !preserve_japanese;
171173

172174
!(nok || BANNED.deref().deref().contains(*c))
173175
}
@@ -1275,6 +1277,15 @@ mod tests {
12751277
);
12761278
}
12771279

1280+
#[test]
1281+
#[serial]
1282+
fn japanese_diacritics_preserved() {
1283+
assert_eq!("パピプペポ", "パピプペポ".censor());
1284+
assert_eq!("バビブベボ", "バビブベボ".censor());
1285+
assert_eq!("ぱぴぷぺぽ", "ぱぴぷぺぽ".censor());
1286+
assert_eq!("ばびぶべぼ", "ばびぶべぼ".censor());
1287+
}
1288+
12781289
#[test]
12791290
#[serial]
12801291
fn bandwidth() {

0 commit comments

Comments
 (0)