diff --git a/yomitan-dict-builder/src/anilist_client.rs b/yomitan-dict-builder/src/anilist_client.rs index 7a60ab1..3f5d876 100644 --- a/yomitan-dict-builder/src/anilist_client.rs +++ b/yomitan-dict-builder/src/anilist_client.rs @@ -268,6 +268,7 @@ impl AnilistClient { full native alternative + alternativeSpoiler first last } @@ -412,6 +413,16 @@ impl AnilistClient { }) .unwrap_or_default(); + let spoiler_alternatives: Vec = name_data["alternativeSpoiler"] + .as_array() + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .filter(|s| !s.is_empty()) + .collect() + }) + .unwrap_or_default(); + // Gender: "Male" → "m", "Female" → "f" let sex = node.get("gender").and_then(|g| g.as_str()).and_then(|g| { match g.to_lowercase().chars().next() { @@ -482,6 +493,7 @@ impl AnilistClient { .and_then(|v| v.as_str()) .map(|s| s.to_string()), aliases: alternatives, + spoiler_aliases: spoiler_alternatives, personality: Vec::new(), // AniList has no trait categories roles: Vec::new(), engages_in: Vec::new(), @@ -1638,6 +1650,77 @@ mod tests { assert_eq!(ch.aliases, vec!["Valid", "Also Valid"]); } + #[test] + fn test_process_character_spoiler_alternatives() { + let client = make_client(); + let mut edge = make_edge( + ROLE_MAIN, + 1, + "Test Character", + "テスト", + None, + None, + None, + None, + None, + vec!["Normal Alt"], + None, + ); + edge["node"]["name"]["alternativeSpoiler"] = + serde_json::json!(["Spoiler Name", "Another Spoiler"]); + let ch = client.process_character(&edge).unwrap(); + assert_eq!(ch.aliases, vec!["Normal Alt"]); + assert_eq!( + ch.spoiler_aliases, + vec!["Spoiler Name", "Another Spoiler"] + ); + } + + #[test] + fn test_process_character_spoiler_alternatives_with_nulls() { + let client = make_client(); + let mut edge = make_edge( + ROLE_MAIN, + 1, + "A", + "あ", + None, + None, + None, + None, + None, + vec![], + None, + ); + edge["node"]["name"]["alternativeSpoiler"] = + serde_json::json!([null, "Valid Spoiler", null, ""]); + let ch = client.process_character(&edge).unwrap(); + assert_eq!(ch.spoiler_aliases, vec!["Valid Spoiler"]); + } + + #[test] + fn test_process_character_no_spoiler_alternatives() { + let client = make_client(); + let edge = make_edge( + ROLE_MAIN, + 1, + "A", + "あ", + None, + None, + None, + None, + None, + vec![], + None, + ); + let ch = client.process_character(&edge).unwrap(); + assert!( + ch.spoiler_aliases.is_empty(), + "Missing alternativeSpoiler should produce empty vec" + ); + } + // === Edge case: hasNextPage missing === #[test] diff --git a/yomitan-dict-builder/src/anilist_name_test_data.rs b/yomitan-dict-builder/src/anilist_name_test_data.rs index 1d5056e..c1639c3 100644 --- a/yomitan-dict-builder/src/anilist_name_test_data.rs +++ b/yomitan-dict-builder/src/anilist_name_test_data.rs @@ -384,9 +384,10 @@ mod tests { fn test_no_hints_no_space_kanji() { let readings = name_parser::generate_name_readings("幸平創真", "Souma Yukihira", None, None); - assert_eq!(readings.full, "そうま ゆきひら"); - assert_eq!(readings.family, "そうま ゆきひら"); - assert_eq!(readings.given, "そうま ゆきひら"); + // Whitespace is stripped from readings — romaji spaces should not appear in kana + assert_eq!(readings.full, "そうまゆきひら"); + assert_eq!(readings.family, "そうまゆきひら"); + assert_eq!(readings.given, "そうまゆきひら"); } // --- Split function with hints --- diff --git a/yomitan-dict-builder/src/dict_builder.rs b/yomitan-dict-builder/src/dict_builder.rs index 00de8cb..0576221 100644 --- a/yomitan-dict-builder/src/dict_builder.rs +++ b/yomitan-dict-builder/src/dict_builder.rs @@ -11,6 +11,35 @@ use crate::kana; use crate::models::*; use crate::name_parser::{self, HONORIFIC_SUFFIXES}; +/// Parse AniList alternative name format "Romanized (Japanese)". +/// +/// AniList alternative names often use the format "Romaji (日本語)" where +/// the romanized reading is outside the parentheses and the Japanese form +/// is inside. This function extracts both parts. +/// +/// Returns `(term, optional_reading)`: +/// - For "Aoki Umi (碧井海)": returns ("碧井海", Some("あおきうみ")) +/// - For "碧井海": returns ("碧井海", None) — standalone Japanese +/// - For "Ruri's Mother": returns ("Ruri's Mother", None) — no parenthesized part +fn parse_alt_name_format(alias: &str) -> (String, Option) { + // Look for pattern: "text (text)" where the parenthesized part contains Japanese + if let Some(open) = alias.rfind('(') { + if let Some(close) = alias[open..].find(')') { + let inside = alias[open + 1..open + close].trim(); + let outside = alias[..open].trim(); + + if kana::contains_japanese(inside) && !outside.is_empty() { + // The Japanese part is the term, the romanized part provides the reading + let reading = kana::alphabet_to_kana(outside).replace(' ', ""); + return (inside.to_string(), Some(reading)); + } + } + } + + // No "X (Y)" format or not Japanese inside — return as-is + (alias.to_string(), None) +} + /// Maximum number of entries to put into one term bank file. /// Yomitan recommends keeping each term bank small to reduce import issues. /// JMdict and Jitendex both use 2k entries as their limit. @@ -267,13 +296,23 @@ impl DictBuilder { // Aliases: union (deduplicated) if !other.aliases.is_empty() { - let existing: HashSet = base.aliases.iter().cloned().collect(); + let mut existing: HashSet = base.aliases.iter().cloned().collect(); for alias in &other.aliases { - if !alias.is_empty() && !existing.contains(alias) { + if !alias.is_empty() && existing.insert(alias.clone()) { base.aliases.push(alias.clone()); } } } + + // Spoiler aliases: union (deduplicated) + if !other.spoiler_aliases.is_empty() { + let mut existing: HashSet = base.spoiler_aliases.iter().cloned().collect(); + for alias in &other.spoiler_aliases { + if !alias.is_empty() && existing.insert(alias.clone()) { + base.spoiler_aliases.push(alias.clone()); + } + } + } } /// Generate all term entries from merged characters. @@ -646,11 +685,32 @@ impl DictBuilder { } // --- Alias entries --- - for alias in &char.aliases { - if !alias.is_empty() && added_terms.insert(alias.clone()) { + // Include spoiler aliases when the user has enabled spoilers + let spoiler_aliases: &[String] = if self.settings.show_spoilers { + &char.spoiler_aliases + } else { + &[] + }; + + for alias in char.aliases.iter().chain(spoiler_aliases.iter()) { + if alias.is_empty() { + continue; + } + + // Parse "Romanized (Japanese)" format used by AniList alternative names. + // E.g. "Aoki Umi (碧井海)" → term="碧井海", reading from "Aoki Umi" + let (alias_term, alias_reading) = parse_alt_name_format(alias); + + // Skip aliases that don't contain any Japanese characters + if !kana::contains_japanese(&alias_term) { + continue; + } + + if added_terms.insert(alias_term.clone()) { + let reading = alias_reading.unwrap_or_else(|| readings.full.clone()); self.entries.push(ContentBuilder::create_term_entry( - alias, - &readings.full, // Use full reading for aliases + &alias_term, + &reading, tag_role, score, &structured_content, @@ -658,7 +718,7 @@ impl DictBuilder { // Also collect alias name+reading for deferred honorific generation if self.settings.honorifics { - base_names_with_readings.push((alias.clone(), readings.full.clone())); + base_names_with_readings.push((alias_term, reading)); } } } @@ -945,7 +1005,7 @@ mod tests { blood_type: Some("A".to_string()), birthday: Some(vec![1, 1]), description: Some("Test description".to_string()), - aliases: vec!["TestAlias".to_string()], + aliases: vec!["テストエイリアス".to_string()], personality: vec![CharacterTrait { name: "Kind".to_string(), spoiler: 0, @@ -1095,7 +1155,7 @@ mod tests { .collect(); assert!( - terms.contains(&"TestAlias".to_string()), + terms.contains(&"テストエイリアス".to_string()), "Should have alias entry" ); } @@ -2236,7 +2296,7 @@ mod tests { char.aliases = vec![ "".to_string(), "".to_string(), - "Valid".to_string(), + "有効".to_string(), "".to_string(), ]; builder.add_character(&char, "Test"); @@ -2248,8 +2308,8 @@ mod tests { .collect(); assert!( - terms.contains(&"Valid".to_string()), - "Non-empty alias should be present" + terms.contains(&"有効".to_string()), + "Non-empty Japanese alias should be present" ); // Empty aliases should not produce entries let empty_count = terms.iter().filter(|t| t.is_empty()).count(); @@ -2802,7 +2862,7 @@ mod tests { fn test_character_with_aliases_generates_alias_entries() { let mut builder = DictBuilder::new(s_no_hon(), None, "Test".to_string()); let mut ch = make_test_character("c1", "Test Name", "テスト", "main"); - ch.aliases = vec!["Alias1".to_string(), "エイリアス".to_string()]; + ch.aliases = vec!["別名".to_string(), "エイリアス".to_string()]; builder.add_character(&ch, "Test"); let terms: Vec = builder @@ -2811,7 +2871,7 @@ mod tests { .filter_map(|e| e[0].as_str().map(|s| s.to_string())) .collect(); - assert!(terms.contains(&"Alias1".to_string())); + assert!(terms.contains(&"別名".to_string())); assert!(terms.contains(&"エイリアス".to_string())); } @@ -3710,25 +3770,25 @@ mod tests { let mut builder = DictBuilder::new(s_no_hon(), None, "Test".to_string()); let mut char1 = make_test_character("c1", "Name", "テスト", "main"); char1.source = "anilist".to_string(); - char1.aliases = vec!["Alias1".to_string()]; + char1.aliases = vec!["別名一".to_string()]; let mut char2 = make_test_character("c1", "Name", "テスト", "side"); char2.source = "anilist".to_string(); - char2.aliases = vec!["Alias2".to_string(), "Alias1".to_string()]; // Alias1 is duplicate + char2.aliases = vec!["別名二".to_string(), "別名一".to_string()]; // 別名一 is duplicate builder.add_character(&char1, "Game A"); builder.add_character(&char2, "Game B"); let entries = builder.base_entries(); let terms: Vec<&str> = entries.iter().filter_map(|e| e[0].as_str()).collect(); - assert!(terms.contains(&"Alias1"), "Should have Alias1"); + assert!(terms.contains(&"別名一"), "Should have 別名一"); assert!( - terms.contains(&"Alias2"), - "Should have Alias2 from second character" + terms.contains(&"別名二"), + "Should have 別名二 from second character" ); - // Alias1 should appear only once as a term - let alias1_count = terms.iter().filter(|&&t| t == "Alias1").count(); - assert_eq!(alias1_count, 1, "Alias1 should not be duplicated"); + // 別名一 should appear only once as a term + let alias1_count = terms.iter().filter(|&&t| t == "別名一").count(); + assert_eq!(alias1_count, 1, "別名一 should not be duplicated"); } #[test] @@ -3791,4 +3851,201 @@ mod tests { "Should have single media title" ); } + + // === parse_alt_name_format tests === + + #[test] + fn test_parse_alt_name_romanized_japanese_format() { + // Standard "Romanized (Japanese)" format + let (term, reading) = parse_alt_name_format("Aoki Umi (碧井海)"); + assert_eq!(term, "碧井海"); + assert!(reading.is_some()); + assert_eq!(reading.unwrap(), "あおきうみ"); + } + + #[test] + fn test_parse_alt_name_standalone_japanese() { + // Standalone Japanese alias — no parentheses + let (term, reading) = parse_alt_name_format("碧井海"); + assert_eq!(term, "碧井海"); + assert!(reading.is_none()); + } + + #[test] + fn test_parse_alt_name_standalone_english() { + // English alias — returned as-is (filtered downstream by contains_japanese) + let (term, reading) = parse_alt_name_format("Ruri's Mother"); + assert_eq!(term, "Ruri's Mother"); + assert!(reading.is_none()); + } + + #[test] + fn test_parse_alt_name_katakana_in_parens() { + let (term, reading) = parse_alt_name_format("Elaina (エレイナ)"); + assert_eq!(term, "エレイナ"); + assert!(reading.is_some()); + // "Elaina" → e=え, la=ら, i=い, na=な → "えらいな" + assert_eq!(reading.unwrap(), "えらいな"); + } + + #[test] + fn test_parse_alt_name_empty_outside() { + // Edge case: nothing outside parens — return as-is + let (term, reading) = parse_alt_name_format("(碧井海)"); + assert_eq!(term, "(碧井海)"); + assert!(reading.is_none()); + } + + // === Non-Japanese alias filtering === + + #[test] + fn test_english_alias_filtered_out() { + let mut builder = DictBuilder::new(s_no_hon(), None, "Test".to_string()); + let mut ch = make_test_character("c1", "Test Name", "テスト", "main"); + ch.aliases = vec!["Ruri's Mother".to_string(), "エイリアス".to_string()]; + builder.add_character(&ch, "Test"); + + let terms: Vec = builder + .base_entries() + .iter() + .filter_map(|e| e[0].as_str().map(|s| s.to_string())) + .collect(); + + assert!( + !terms.contains(&"Ruri's Mother".to_string()), + "English alias should be filtered out" + ); + assert!( + terms.contains(&"エイリアス".to_string()), + "Japanese alias should be present" + ); + } + + // === "Romanized (Japanese)" alias integration === + + #[test] + fn test_alias_romanized_japanese_format_creates_correct_entry() { + let mut builder = DictBuilder::new(s_no_hon(), None, "Test".to_string()); + let mut ch = make_test_character("c1", "Test Name", "テスト", "main"); + ch.aliases = vec!["Aoki Umi (碧井海)".to_string()]; + builder.add_character(&ch, "Test"); + + let terms: Vec = builder + .base_entries() + .iter() + .filter_map(|e| e[0].as_str().map(|s| s.to_string())) + .collect(); + + // The Japanese part should be the term, not the full "Aoki Umi (碧井海)" + assert!( + terms.contains(&"碧井海".to_string()), + "Should extract Japanese part as term" + ); + assert!( + !terms.contains(&"Aoki Umi (碧井海)".to_string()), + "Full format string should not be a term" + ); + + // Check that the reading is derived from the romanized part + let entry = builder + .base_entries() + .iter() + .find(|e| e[0].as_str() == Some("碧井海")) + .unwrap() + .clone(); + assert_eq!(entry[1].as_str().unwrap(), "あおきうみ"); + } + + // === Spoiler alias tests === + + #[test] + fn test_spoiler_aliases_included_when_enabled() { + let settings = DictSettings { + honorifics: false, + show_spoilers: true, + ..DictSettings::default() + }; + let mut builder = DictBuilder::new(settings, None, "Test".to_string()); + let mut ch = make_test_character("c1", "Test Name", "テスト", "main"); + ch.aliases = vec!["通常名".to_string()]; + ch.spoiler_aliases = vec!["秘密名".to_string()]; + builder.add_character(&ch, "Test"); + + let terms: Vec = builder + .base_entries() + .iter() + .filter_map(|e| e[0].as_str().map(|s| s.to_string())) + .collect(); + + assert!( + terms.contains(&"通常名".to_string()), + "Normal alias should be present" + ); + assert!( + terms.contains(&"秘密名".to_string()), + "Spoiler alias should be present when spoilers enabled" + ); + } + + #[test] + fn test_spoiler_aliases_excluded_when_disabled() { + let settings = DictSettings { + honorifics: false, + show_spoilers: false, + ..DictSettings::default() + }; + let mut builder = DictBuilder::new(settings, None, "Test".to_string()); + let mut ch = make_test_character("c1", "Test Name", "テスト", "main"); + ch.aliases = vec!["通常名".to_string()]; + ch.spoiler_aliases = vec!["秘密名".to_string()]; + builder.add_character(&ch, "Test"); + + let terms: Vec = builder + .base_entries() + .iter() + .filter_map(|e| e[0].as_str().map(|s| s.to_string())) + .collect(); + + assert!( + terms.contains(&"通常名".to_string()), + "Normal alias should be present" + ); + assert!( + !terms.contains(&"秘密名".to_string()), + "Spoiler alias should NOT be present when spoilers disabled" + ); + } + + #[test] + fn test_spoiler_aliases_merged() { + let settings = DictSettings { + honorifics: false, + show_spoilers: true, + ..DictSettings::default() + }; + let mut builder = DictBuilder::new(settings, None, "Test".to_string()); + + let mut char1 = make_test_character("c1", "Name", "テスト", "main"); + char1.source = "anilist".to_string(); + char1.spoiler_aliases = vec!["秘密一".to_string()]; + + let mut char2 = make_test_character("c1", "Name", "テスト", "side"); + char2.source = "anilist".to_string(); + char2.spoiler_aliases = vec!["秘密二".to_string(), "秘密一".to_string()]; + + builder.add_character(&char1, "Game A"); + builder.add_character(&char2, "Game B"); + + let terms: Vec = builder + .base_entries() + .iter() + .filter_map(|e| e[0].as_str().map(|s| s.to_string())) + .collect(); + + assert!(terms.contains(&"秘密一".to_string())); + assert!(terms.contains(&"秘密二".to_string())); + // Deduplicated + let count = terms.iter().filter(|t| t.as_str() == "秘密一").count(); + assert_eq!(count, 1, "Spoiler aliases should be deduplicated"); + } } diff --git a/yomitan-dict-builder/src/kana.rs b/yomitan-dict-builder/src/kana.rs index f6ca307..04460e5 100644 --- a/yomitan-dict-builder/src/kana.rs +++ b/yomitan-dict-builder/src/kana.rs @@ -43,6 +43,22 @@ fn is_kanji(c: char) -> bool { || code == 0x3005 } +/// Returns true if the character is hiragana (U+3041–U+3096). +fn is_hiragana(c: char) -> bool { + (0x3041..=0x3096).contains(&(c as u32)) +} + +/// Returns true if the character is katakana (U+30A1–U+30F6, plus ー U+30FC). +fn is_katakana(c: char) -> bool { + let code = c as u32; + (0x30A1..=0x30F6).contains(&code) || code == 0x30FC +} + +/// Check if text contains any Japanese characters (kanji, hiragana, or katakana). +pub fn contains_japanese(text: &str) -> bool { + text.chars().any(|c| is_kanji(c) || is_hiragana(c) || is_katakana(c)) +} + /// Convert katakana to hiragana. /// Katakana range: U+30A1 (ァ) to U+30F6 (ヶ). Subtract 0x60 to get hiragana equivalent. pub fn kata_to_hira(text: &str) -> String { diff --git a/yomitan-dict-builder/src/models.rs b/yomitan-dict-builder/src/models.rs index da0936a..beef99b 100644 --- a/yomitan-dict-builder/src/models.rs +++ b/yomitan-dict-builder/src/models.rs @@ -35,6 +35,8 @@ pub struct Character { pub birthday: Option>, // [month, day] pub description: Option, pub aliases: Vec, + #[serde(default)] + pub spoiler_aliases: Vec, // Alternative names marked as spoilers (AniList only) pub personality: Vec, pub roles: Vec, pub engages_in: Vec, diff --git a/yomitan-dict-builder/src/name_parser.rs b/yomitan-dict-builder/src/name_parser.rs index 78bf4e0..cef4a9a 100644 --- a/yomitan-dict-builder/src/name_parser.rs +++ b/yomitan-dict-builder/src/name_parser.rs @@ -816,6 +816,25 @@ pub fn generate_name_readings( romanized_name: &str, first_name_hint: Option<&str>, last_name_hint: Option<&str>, +) -> NameReadings { + let mut readings = + generate_name_readings_inner(name_original, romanized_name, first_name_hint, last_name_hint); + + // Strip internal whitespace from readings — romaji→kana conversion can + // introduce spaces from multi-word romanized names (e.g. "Elaina no Haha" + // would produce "えらいな の はは" instead of "えらいなのはは"). + readings.full = readings.full.split_whitespace().collect(); + readings.family = readings.family.split_whitespace().collect(); + readings.given = readings.given.split_whitespace().collect(); + + readings +} + +fn generate_name_readings_inner( + name_original: &str, + romanized_name: &str, + first_name_hint: Option<&str>, + last_name_hint: Option<&str>, ) -> NameReadings { // Handle empty native name if name_original.is_empty() { @@ -1722,4 +1741,40 @@ mod tests { assert_eq!(r1.given, r2.given); assert_eq!(r1.full, r2.full); } + + // === Reading whitespace stripping === + + #[test] + fn test_readings_have_no_internal_whitespace() { + // Multi-word romanized names should not produce readings with spaces + // e.g. "Elaina no Haha" should produce "えれいなのはは", not "えれいな の はは" + let readings = generate_name_readings("イレイナの母", "Elaina no Haha", None, None); + assert!( + !readings.full.contains(' '), + "Full reading should not contain spaces, got: '{}'", + readings.full + ); + assert!( + !readings.family.contains(' '), + "Family reading should not contain spaces, got: '{}'", + readings.family + ); + assert!( + !readings.given.contains(' '), + "Given reading should not contain spaces, got: '{}'", + readings.given + ); + } + + #[test] + fn test_readings_with_hints_no_whitespace() { + // With hints, multi-word hints should also not produce spaces + let readings = + generate_name_readings("須々木 心一", "Shinichi Suzuki", Some("Shinichi"), Some("Suzuki")); + assert!( + !readings.full.contains(' '), + "Full reading should not contain spaces, got: '{}'", + readings.full + ); + } } diff --git a/yomitan-dict-builder/src/vndb_client.rs b/yomitan-dict-builder/src/vndb_client.rs index b63f31c..89abe1a 100644 --- a/yomitan-dict-builder/src/vndb_client.rs +++ b/yomitan-dict-builder/src/vndb_client.rs @@ -451,6 +451,7 @@ impl VndbClient { birthday, description: data["description"].as_str().map(|s| s.to_string()), aliases, + spoiler_aliases: Vec::new(), // VNDB has no spoiler alternative names personality, roles, engages_in,