Skip to content

Commit 1131de5

Browse files
committed
Added tests to rtf and pptx parsers, bringing the total to 282
1 parent d359fcd commit 1131de5

File tree

2 files changed

+151
-0
lines changed

2 files changed

+151
-0
lines changed

src/parser/pptx.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,79 @@ fn traverse_for_text(
191191
traverse_for_text(child, text, links, slide_start, rels);
192192
}
193193
}
194+
195+
#[cfg(test)]
196+
mod tests {
197+
use std::collections::HashMap;
198+
199+
use roxmltree::Document as XmlDocument;
200+
use rstest::rstest;
201+
202+
use super::{extract_slide_number, extract_slide_text, extract_slide_title, is_title_shape};
203+
204+
#[rstest]
205+
#[case("ppt/slides/slide1.xml", 1)]
206+
#[case("ppt/slides/slide12.xml", 12)]
207+
#[case("slide007.xml", 7)]
208+
#[case("ppt/slides/custom.xml", 0)]
209+
fn extract_slide_number_parses_digits(#[case] name: &str, #[case] expected: usize) {
210+
assert_eq!(extract_slide_number(name), expected);
211+
}
212+
213+
#[test]
214+
fn is_title_shape_true_for_title_and_center_title() {
215+
let xml = r#"
216+
<root>
217+
<sp><nvSpPr><nvPr><ph type="title" /></nvPr></nvSpPr></sp>
218+
<sp><nvSpPr><nvPr><ph type="ctrTitle" /></nvPr></nvSpPr></sp>
219+
</root>
220+
"#;
221+
let doc = XmlDocument::parse(xml).expect("xml parse");
222+
let shapes: Vec<_> = doc.descendants().filter(|n| n.tag_name().name() == "sp").collect();
223+
assert!(is_title_shape(shapes[0]));
224+
assert!(is_title_shape(shapes[1]));
225+
}
226+
227+
#[test]
228+
fn is_title_shape_false_without_title_placeholder() {
229+
let xml = r#"<root><sp><nvSpPr><nvPr><ph type="body" /></nvPr></nvSpPr></sp></root>"#;
230+
let doc = XmlDocument::parse(xml).expect("xml parse");
231+
let shape = doc.descendants().find(|n| n.tag_name().name() == "sp").expect("shape");
232+
assert!(!is_title_shape(shape));
233+
}
234+
235+
#[test]
236+
fn extract_slide_title_uses_first_non_empty_title_shape() {
237+
let xml = r#"
238+
<root>
239+
<sp><nvSpPr><nvPr><ph type="title" /></nvPr></nvSpPr><txBody><p><r><t> </t></r></p></txBody></sp>
240+
<sp><nvSpPr><nvPr><ph type="title" /></nvPr></nvSpPr><txBody><p><r><t>Agenda</t></r></p></txBody></sp>
241+
</root>
242+
"#;
243+
let doc = XmlDocument::parse(xml).expect("xml parse");
244+
assert_eq!(extract_slide_title(doc.root()), "Agenda");
245+
}
246+
247+
#[test]
248+
fn extract_slide_title_returns_empty_when_missing() {
249+
let xml = r#"<root><sp><txBody><p><r><t>Body text</t></r></p></txBody></sp></root>"#;
250+
let doc = XmlDocument::parse(xml).expect("xml parse");
251+
assert!(extract_slide_title(doc.root()).is_empty());
252+
}
253+
254+
#[test]
255+
fn extract_slide_text_collects_paragraphs_and_breaks() {
256+
let xml = r#"
257+
<root>
258+
<p><r><t>Hello</t></r><br/><r><t>World</t></r></p>
259+
<p><r><t>Next</t></r></p>
260+
</root>
261+
"#;
262+
let doc = XmlDocument::parse(xml).expect("xml parse");
263+
let mut links = Vec::new();
264+
let rels = HashMap::new();
265+
let text = extract_slide_text(doc.root(), &mut links, 0, &rels);
266+
assert_eq!(text, "Hello\nWorld\nNext\n");
267+
assert!(links.is_empty());
268+
}
269+
}

src/parser/rtf.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,78 @@ fn extract_content_from_tokens(tokens: &[Token]) -> DocumentBuffer {
221221
}
222222
result
223223
}
224+
225+
#[cfg(test)]
226+
mod tests {
227+
use encoding_rs::Encoding;
228+
use rstest::rstest;
229+
230+
use super::{encoding_for_codepage, extract_codepage, hex_digit, parse_hex_pair, resolve_hex_escapes};
231+
232+
fn enc_name(enc: &'static Encoding) -> &'static str {
233+
enc.name()
234+
}
235+
236+
#[rstest]
237+
#[case(1252, "windows-1252")]
238+
#[case(1251, "windows-1251")]
239+
#[case(1258, "windows-1258")]
240+
#[case(874, "windows-874")]
241+
#[case(9999, "windows-1252")]
242+
fn encoding_for_codepage_maps_supported_and_defaults(#[case] codepage: i32, #[case] expected: &str) {
243+
assert_eq!(enc_name(encoding_for_codepage(codepage)), expected);
244+
}
245+
246+
#[rstest]
247+
#[case("{\\rtf1\\ansi\\ansicpg1251 hello}", "windows-1251")]
248+
#[case("{\\rtf1\\ansi\\ansicpg1258 hello}", "windows-1258")]
249+
#[case("{\\rtf1\\ansi\\ansicpgNOTNUM hello}", "windows-1252")]
250+
#[case("{\\rtf1\\ansi hello}", "windows-1252")]
251+
fn extract_codepage_reads_ansicpg_when_present(#[case] rtf: &str, #[case] expected: &str) {
252+
assert_eq!(enc_name(extract_codepage(rtf)), expected);
253+
}
254+
255+
#[rstest]
256+
#[case(b'0', Some(0))]
257+
#[case(b'9', Some(9))]
258+
#[case(b'a', Some(10))]
259+
#[case(b'f', Some(15))]
260+
#[case(b'A', Some(10))]
261+
#[case(b'F', Some(15))]
262+
#[case(b'g', None)]
263+
#[case(b'/', None)]
264+
fn hex_digit_classifies_ascii_hex(#[case] input: u8, #[case] expected: Option<u8>) {
265+
assert_eq!(hex_digit(input), expected);
266+
}
267+
268+
#[rstest]
269+
#[case(b'4', b'1', Some(0x41))]
270+
#[case(b'e', b'9', Some(0xE9))]
271+
#[case(b'E', b'9', Some(0xE9))]
272+
#[case(b'Z', b'9', None)]
273+
#[case(b'1', b'X', None)]
274+
fn parse_hex_pair_parses_and_rejects_invalid(#[case] h1: u8, #[case] h2: u8, #[case] expected: Option<u8>) {
275+
assert_eq!(parse_hex_pair(h1, h2), expected);
276+
}
277+
278+
#[test]
279+
fn resolve_hex_escapes_decodes_high_bytes_only() {
280+
let input = "Cafe\\'e9 and plain";
281+
let output = resolve_hex_escapes(input, encoding_rs::WINDOWS_1252);
282+
assert_eq!(output, "Cafeé and plain");
283+
}
284+
285+
#[test]
286+
fn resolve_hex_escapes_keeps_ascii_escape_sequences() {
287+
let input = "Escaped brace: \\'7b and slash: \\'5c";
288+
let output = resolve_hex_escapes(input, encoding_rs::WINDOWS_1252);
289+
assert_eq!(output, input);
290+
}
291+
292+
#[test]
293+
fn resolve_hex_escapes_ignores_invalid_hex_sequences() {
294+
let input = "Broken: \\'zz and mixed: \\'G1";
295+
let output = resolve_hex_escapes(input, encoding_rs::WINDOWS_1252);
296+
assert_eq!(output, input);
297+
}
298+
}

0 commit comments

Comments
 (0)