= None;
+
+ while i < chars.len() {
+ let c = chars[i];
+
+ // handle inline code spans using backtick lengths
+ if c == '`' {
+ let mut j = i;
+ while j < chars.len() && chars[j] == '`' {
+ j += 1;
+ }
+ let n = j - i;
+ // toggle inline span when same length backticks close it
+ if inline_backtick_len.is_none() {
+ inline_backtick_len = Some(n);
+ } else if inline_backtick_len == Some(n) {
+ inline_backtick_len = None;
+ }
+ for _ in 0..n {
+ out.push('`');
+ }
+ i = j;
+ continue;
+ }
+
+ // if inside inline code, copy verbatim
+ if inline_backtick_len.is_some() {
+ out.push(c);
+ i += 1;
+ continue;
+ }
+
+ // emphasis markers
+ if c == '*' || c == '_' {
+ // count run length
+ let marker = c;
+ let mut j = i;
+ while j < chars.len() && chars[j] == marker {
+ j += 1;
+ }
+ let next_char = chars.get(j).copied();
+
+ // find previous non-newline char from out
+ let prev_char = out.chars().rev().find(|ch| *ch != '\n' && *ch != '\r');
+
+ let prev_is_cjk = prev_char.map(is_cjk).unwrap_or(false);
+ let next_is_cjk = next_char.map(is_cjk).unwrap_or(false);
+
+ if prev_is_cjk && prev_char.map(|ch| !ch.is_whitespace()).unwrap_or(false)
+ {
+ // Insert a numeric HTML entity for ZWSP so it survives
+ // Markdown parsing and becomes an invisible character in
+ // the final HTML.
+ out.push_str("");
+ }
+
+ for _ in 0..(j - i) {
+ out.push(marker);
+ }
+
+ if next_is_cjk && next_char.map(|ch| !ch.is_whitespace()).unwrap_or(false)
+ {
+ out.push_str("");
+ }
+
+ i = j;
+ continue;
+ }
+
+ out.push(c);
+ i += 1;
+ }
+
+ out.push('\n');
+ }
+
+ out
+}
+
+fn remove_zwsp_from_html(s: &mut String) {
+ if s.contains("​") {
+ *s = s.replace("​", "");
+ }
+ if s.contains("") {
+ *s = s.replace("", "");
+ }
+ if s.contains('\u{200B}') {
+ *s = s.replace('\u{200B}', "");
+ }
+}
+
struct Handler<'a> {
text: &'a str,
resolver: &'a dyn Resolver,
@@ -515,3 +653,26 @@ impl World for DocWorld {
Some(Datetime::from_ymd(1970, 1, 1).unwrap())
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn cjk_preprocess_inserts_entity() {
+ let src = "これは**強調**です。";
+ let out = cjk_friendly_preprocess(src);
+ assert!(
+ out.contains(""),
+ "preprocessed output did not contain : {}",
+ out
+ );
+ }
+
+ #[test]
+ fn remove_zwsp_from_html_cleans() {
+ let mut html = String::from("foobar​baz\u{200B}
");
+ remove_zwsp_from_html(&mut html);
+ assert_eq!(html, "foobarbaz
");
+ }
+}