Skip to content

Commit c40d5eb

Browse files
committed
Refactor gettext binary to make it testable in fuzzers
1 parent 86d47df commit c40d5eb

File tree

3 files changed

+354
-307
lines changed

3 files changed

+354
-307
lines changed

i18n-helpers/src/bin/mdbook-gettext.rs

Lines changed: 39 additions & 307 deletions
Original file line numberDiff line numberDiff line change
@@ -25,111 +25,62 @@
2525
//! book.
2626
2727
use anyhow::{anyhow, Context};
28-
use mdbook::book::Book;
2928
use mdbook::preprocess::{CmdPreprocessor, PreprocessorContext};
30-
use mdbook::BookItem;
31-
use mdbook_i18n_helpers::{extract_events, reconstruct_markdown, translate_events};
29+
use mdbook_i18n_helpers::gettext::{add_stripped_summary_translations, translate_book};
3230
use polib::catalog::Catalog;
33-
use polib::message::Message;
3431
use polib::po_file;
35-
use pulldown_cmark::Event;
3632
use semver::{Version, VersionReq};
33+
use std::path::PathBuf;
3734
use std::{io, process};
3835

39-
/// Strip formatting from a Markdown string.
36+
/// Check whether the book should be transalted.
4037
///
41-
/// The string can only contain inline text. Formatting such as
42-
/// emphasis and strong emphasis is removed.
43-
///
44-
/// Modelled after `mdbook::summary::stringify_events`.
45-
fn strip_formatting(text: &str) -> String {
46-
extract_events(text, None)
47-
.iter()
48-
.filter_map(|(_, event)| match event {
49-
Event::Text(text) | Event::Code(text) => Some(text.as_ref()),
50-
Event::SoftBreak => Some(" "),
51-
_ => None,
52-
})
53-
.collect()
54-
}
55-
56-
fn translate(text: &str, catalog: &Catalog) -> String {
57-
let events = extract_events(text, None);
58-
let translated_events = translate_events(&events, catalog);
59-
let (translated, _) = reconstruct_markdown(&translated_events, None);
60-
translated
61-
}
62-
63-
/// Update `catalog` with stripped messages from `SUMMARY.md`.
64-
///
65-
/// While it is permissible to include formatting in the `SUMMARY.md`
66-
/// file, `mdbook` will strip it out when rendering the book. It will
67-
/// also strip formatting when sending the book to preprocessors.
68-
///
69-
/// To be able to find the translations for the `SUMMARY.md` file, we
70-
/// append versions of these messages stripped of formatting.
71-
fn add_stripped_summary_translations(catalog: &mut Catalog) {
72-
let mut stripped_messages = Vec::new();
73-
for msg in catalog.messages() {
74-
// The `SUMMARY.md` filename is fixed, but we cannot assume
75-
// that the file is at `src/SUMMARY.md` since the `src/`
76-
// directory can be configured.
77-
if !msg.source().contains("SUMMARY.md") {
78-
continue;
79-
}
80-
81-
let message = Message::build_singular()
82-
.with_msgid(strip_formatting(msg.msgid()))
83-
.with_msgstr(strip_formatting(msg.msgstr().unwrap()))
84-
.done();
85-
stripped_messages.push(message);
38+
/// The book should be translated if:
39+
/// * `book.language` is defined in mdbook config
40+
/// * Corresponding {language}.po defined
41+
fn should_translate(ctx: &PreprocessorContext) -> bool {
42+
// Translation is a no-op when the target language is not set
43+
if ctx.config.book.language.is_none() {
44+
return false;
8645
}
8746

88-
for msg in stripped_messages {
89-
catalog.append_or_update(msg);
90-
}
47+
// Nothing to do if PO file is missing.
48+
get_catalog_path(ctx)
49+
.map(|path| path.try_exists().unwrap_or(false))
50+
.unwrap_or(false)
9151
}
9252

93-
/// Translte an entire book.
94-
fn translate_book(ctx: &PreprocessorContext, mut book: Book) -> anyhow::Result<Book> {
95-
// Translation is a no-op when the target language is not set
96-
let language = match &ctx.config.book.language {
97-
Some(language) => language,
98-
None => return Ok(book),
99-
};
53+
/// Compute the path of the Catalog file.
54+
fn get_catalog_path(ctx: &PreprocessorContext) -> anyhow::Result<PathBuf> {
55+
let language = ctx
56+
.config
57+
.book
58+
.language
59+
.as_ref()
60+
.ok_or_else(|| anyhow!("Language is not provided"))?;
10061

101-
// Find PO file for the target language.
10262
let cfg = ctx
10363
.config
10464
.get_preprocessor("gettext")
10565
.ok_or_else(|| anyhow!("Could not read preprocessor.gettext configuration"))?;
10666
let po_dir = cfg.get("po-dir").and_then(|v| v.as_str()).unwrap_or("po");
107-
let path = ctx.root.join(po_dir).join(format!("{language}.po"));
108-
// Nothing to do if PO file is missing.
109-
if !path.exists() {
110-
return Ok(book);
111-
}
67+
Ok(ctx.root.join(po_dir).join(format!("{language}.po")))
68+
}
11269

113-
let mut catalog = po_file::parse(&path)
70+
/// Load the catalog with translation strings.
71+
fn load_catalog(ctx: &PreprocessorContext) -> anyhow::Result<Catalog> {
72+
let path = get_catalog_path(ctx)?;
73+
74+
let catalog = po_file::parse(&path)
11475
.map_err(|err| anyhow!("{err}"))
11576
.with_context(|| format!("Could not parse {:?} as PO file", path))?;
116-
add_stripped_summary_translations(&mut catalog);
117-
book.for_each_mut(|item| match item {
118-
BookItem::Chapter(ch) => {
119-
ch.content = translate(&ch.content, &catalog);
120-
ch.name = translate(&ch.name, &catalog);
121-
}
122-
BookItem::Separator => {}
123-
BookItem::PartTitle(title) => {
124-
*title = translate(title, &catalog);
125-
}
126-
});
12777

128-
Ok(book)
78+
Ok(catalog)
12979
}
13080

81+
/// Execute main logic by this mdbook preprocessor.
13182
fn preprocess() -> anyhow::Result<()> {
132-
let (ctx, book) = CmdPreprocessor::parse_input(io::stdin())?;
83+
let (ctx, mut book) = CmdPreprocessor::parse_input(io::stdin())?;
13384
let book_version = Version::parse(&ctx.mdbook_version)?;
13485
let version_req = VersionReq::parse(mdbook::MDBOOK_VERSION)?;
13586
#[allow(clippy::print_stderr)]
@@ -142,8 +93,13 @@ fn preprocess() -> anyhow::Result<()> {
14293
);
14394
}
14495

145-
let translated_book = translate_book(&ctx, book)?;
146-
serde_json::to_writer(io::stdout(), &translated_book)?;
96+
if should_translate(&ctx) {
97+
let mut catalog = load_catalog(&ctx)?;
98+
add_stripped_summary_translations(&mut catalog);
99+
translate_book(&catalog, &mut book);
100+
}
101+
102+
serde_json::to_writer(io::stdout(), &book)?;
147103

148104
Ok(())
149105
}
@@ -161,227 +117,3 @@ fn main() -> anyhow::Result<()> {
161117

162118
preprocess()
163119
}
164-
165-
#[cfg(test)]
166-
mod tests {
167-
use super::*;
168-
use polib::message::{Message, MessageMutView};
169-
use polib::metadata::CatalogMetadata;
170-
use pretty_assertions::assert_eq;
171-
172-
fn create_catalog(translations: &[(&str, &str)]) -> Catalog {
173-
let mut catalog = Catalog::new(CatalogMetadata::new());
174-
for (msgid, msgstr) in translations {
175-
let message = Message::build_singular()
176-
.with_msgid(String::from(*msgid))
177-
.with_msgstr(String::from(*msgstr))
178-
.done();
179-
catalog.append_or_update(message);
180-
}
181-
catalog
182-
}
183-
184-
#[test]
185-
fn test_add_stripped_summary_translations() {
186-
// Add two messages which map to the same stripped message.
187-
let mut catalog = create_catalog(&[
188-
("foo `bar`", "FOO `BAR`"),
189-
("**foo** _bar_", "**FOO** _BAR_"),
190-
]);
191-
for (idx, mut msg) in catalog.messages_mut().enumerate() {
192-
// Set the source to SUMMARY.md to ensure
193-
// add_stripped_summary_translations will add a stripped
194-
// version.
195-
*msg.source_mut() = format!("src/SUMMARY.md:{idx}");
196-
}
197-
add_stripped_summary_translations(&mut catalog);
198-
199-
// We now have two messages, one with and one without
200-
// formatting. This lets us handle both the TOC and any
201-
// occurance on the page.
202-
assert_eq!(
203-
catalog
204-
.messages()
205-
.map(|msg| (msg.source(), msg.msgid(), msg.msgstr().unwrap()))
206-
.collect::<Vec<_>>(),
207-
&[
208-
("src/SUMMARY.md:0", "foo `bar`", "FOO `BAR`"),
209-
("src/SUMMARY.md:1", "**foo** _bar_", "**FOO** _BAR_"),
210-
("", "foo bar", "FOO BAR")
211-
]
212-
);
213-
}
214-
215-
#[test]
216-
fn test_translate_single_line() {
217-
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
218-
assert_eq!(translate("foo bar", &catalog), "FOO BAR");
219-
}
220-
221-
#[test]
222-
fn test_translate_single_paragraph() {
223-
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
224-
// The output is normalized so the newline disappears.
225-
assert_eq!(translate("foo bar\n", &catalog), "FOO BAR");
226-
}
227-
228-
#[test]
229-
fn test_translate_paragraph_with_leading_newlines() {
230-
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
231-
// The output is normalized so the newlines disappear.
232-
assert_eq!(translate("\n\n\nfoo bar\n", &catalog), "FOO BAR");
233-
}
234-
235-
#[test]
236-
fn test_translate_paragraph_with_trailing_newlines() {
237-
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
238-
// The output is normalized so the newlines disappear.
239-
assert_eq!(translate("foo bar\n\n\n", &catalog), "FOO BAR");
240-
}
241-
242-
#[test]
243-
fn test_translate_multiple_paragraphs() {
244-
let catalog = create_catalog(&[("foo bar", "FOO BAR")]);
245-
assert_eq!(
246-
translate(
247-
"first paragraph\n\
248-
\n\
249-
foo bar\n\
250-
\n\
251-
last paragraph\n",
252-
&catalog
253-
),
254-
"first paragraph\n\
255-
\n\
256-
FOO BAR\n\
257-
\n\
258-
last paragraph"
259-
);
260-
}
261-
262-
#[test]
263-
fn test_translate_multiple_paragraphs_extra_newlines() {
264-
// Notice how the translated paragraphs have more lines.
265-
let catalog = create_catalog(&[
266-
("first paragraph", "FIRST TRANSLATED PARAGRAPH"),
267-
("last paragraph", "LAST TRANSLATED PARAGRAPH"),
268-
]);
269-
// Paragraph separation is normalized when translating.
270-
assert_eq!(
271-
translate(
272-
"first\n\
273-
paragraph\n\
274-
\n\
275-
\n\
276-
last\n\
277-
paragraph\n",
278-
&catalog
279-
),
280-
"FIRST TRANSLATED PARAGRAPH\n\
281-
\n\
282-
LAST TRANSLATED PARAGRAPH"
283-
);
284-
}
285-
286-
#[test]
287-
fn test_translate_code_block() {
288-
let catalog = create_catalog(&[(
289-
"```rust,editable\n\
290-
fn foo() {\n\n let x = \"hello\";\n\n}\n\
291-
```",
292-
"```rust,editable\n\
293-
fn FOO() {\n\n let X = \"guten tag\";\n\n}\n\
294-
```",
295-
)]);
296-
assert_eq!(
297-
translate(
298-
"Text before.\n\
299-
\n\
300-
\n\
301-
```rust,editable\n\
302-
fn foo() {\n\n let x = \"hello\";\n\n}\n\
303-
```\n\
304-
\n\
305-
Text after.\n",
306-
&catalog
307-
),
308-
"Text before.\n\
309-
\n\
310-
```rust,editable\n\
311-
fn FOO() {\n\n let X = \"guten tag\";\n\n}\n\
312-
```\n\
313-
\n\
314-
Text after.",
315-
);
316-
}
317-
318-
#[test]
319-
fn test_translate_table() {
320-
let catalog = create_catalog(&[
321-
("Types", "TYPES"),
322-
("Literals", "LITERALS"),
323-
("Arrays", "ARRAYS"),
324-
("Tuples", "TUPLES"),
325-
]);
326-
// The alignment is lost when we generate new Markdown.
327-
assert_eq!(
328-
translate(
329-
"\
330-
| | Types | Literals |\n\
331-
|--------|-------------|-----------------|\n\
332-
| Arrays | `[T; N]` | `[20, 30, 40]` |\n\
333-
| Tuples | `()`, ... | `()`, `('x',)` |",
334-
&catalog
335-
),
336-
"\
337-
||TYPES|LITERALS|\n\
338-
|--|-----|--------|\n\
339-
|ARRAYS|`[T; N]`|`[20, 30, 40]`|\n\
340-
|TUPLES|`()`, ...|`()`, `('x',)`|",
341-
);
342-
}
343-
344-
#[test]
345-
fn test_footnote() {
346-
let catalog = create_catalog(&[
347-
("A footnote[^note].", "A FOOTNOTE[^note]."),
348-
("More details.", "MORE DETAILS."),
349-
]);
350-
assert_eq!(
351-
translate("A footnote[^note].\n\n[^note]: More details.", &catalog),
352-
"A FOOTNOTE[^note].\n\n[^note]: MORE DETAILS."
353-
);
354-
}
355-
356-
#[test]
357-
fn test_strikethrough() {
358-
let catalog = create_catalog(&[("~~foo~~", "~~FOO~~")]);
359-
assert_eq!(translate("~~foo~~", &catalog), "~~FOO~~");
360-
}
361-
362-
#[test]
363-
fn test_tasklists() {
364-
let catalog = create_catalog(&[("Foo", "FOO"), ("Bar", "BAR")]);
365-
assert_eq!(
366-
translate(
367-
"\
368-
- [x] Foo\n\
369-
- [ ] Bar\n\
370-
",
371-
&catalog
372-
),
373-
"\
374-
- [x] FOO\n\
375-
- [ ] BAR",
376-
);
377-
}
378-
379-
#[test]
380-
fn test_heading_attributes() {
381-
let catalog = create_catalog(&[("Foo", "FOO"), ("Bar", "BAR")]);
382-
assert_eq!(
383-
translate("# Foo { #id .foo }", &catalog),
384-
"# FOO {#id .foo}"
385-
);
386-
}
387-
}

0 commit comments

Comments
 (0)