@@ -221,3 +221,78 @@ fn extract_content_from_tokens(tokens: &[Token]) -> DocumentBuffer {
221221 }
222222 result
223223}
224+
225+ #[ cfg( test) ]
226+ mod tests {
227+ use encoding_rs:: Encoding ;
228+ use rstest:: rstest;
229+
230+ use super :: { encoding_for_codepage, extract_codepage, hex_digit, parse_hex_pair, resolve_hex_escapes} ;
231+
232+ fn enc_name ( enc : & ' static Encoding ) -> & ' static str {
233+ enc. name ( )
234+ }
235+
236+ #[ rstest]
237+ #[ case( 1252 , "windows-1252" ) ]
238+ #[ case( 1251 , "windows-1251" ) ]
239+ #[ case( 1258 , "windows-1258" ) ]
240+ #[ case( 874 , "windows-874" ) ]
241+ #[ case( 9999 , "windows-1252" ) ]
242+ fn encoding_for_codepage_maps_supported_and_defaults ( #[ case] codepage : i32 , #[ case] expected : & str ) {
243+ assert_eq ! ( enc_name( encoding_for_codepage( codepage) ) , expected) ;
244+ }
245+
246+ #[ rstest]
247+ #[ case( "{\\ rtf1\\ ansi\\ ansicpg1251 hello}" , "windows-1251" ) ]
248+ #[ case( "{\\ rtf1\\ ansi\\ ansicpg1258 hello}" , "windows-1258" ) ]
249+ #[ case( "{\\ rtf1\\ ansi\\ ansicpgNOTNUM hello}" , "windows-1252" ) ]
250+ #[ case( "{\\ rtf1\\ ansi hello}" , "windows-1252" ) ]
251+ fn extract_codepage_reads_ansicpg_when_present ( #[ case] rtf : & str , #[ case] expected : & str ) {
252+ assert_eq ! ( enc_name( extract_codepage( rtf) ) , expected) ;
253+ }
254+
255+ #[ rstest]
256+ #[ case( b'0' , Some ( 0 ) ) ]
257+ #[ case( b'9' , Some ( 9 ) ) ]
258+ #[ case( b'a' , Some ( 10 ) ) ]
259+ #[ case( b'f' , Some ( 15 ) ) ]
260+ #[ case( b'A' , Some ( 10 ) ) ]
261+ #[ case( b'F' , Some ( 15 ) ) ]
262+ #[ case( b'g' , None ) ]
263+ #[ case( b'/' , None ) ]
264+ fn hex_digit_classifies_ascii_hex ( #[ case] input : u8 , #[ case] expected : Option < u8 > ) {
265+ assert_eq ! ( hex_digit( input) , expected) ;
266+ }
267+
268+ #[ rstest]
269+ #[ case( b'4' , b'1' , Some ( 0x41 ) ) ]
270+ #[ case( b'e' , b'9' , Some ( 0xE9 ) ) ]
271+ #[ case( b'E' , b'9' , Some ( 0xE9 ) ) ]
272+ #[ case( b'Z' , b'9' , None ) ]
273+ #[ case( b'1' , b'X' , None ) ]
274+ fn parse_hex_pair_parses_and_rejects_invalid ( #[ case] h1 : u8 , #[ case] h2 : u8 , #[ case] expected : Option < u8 > ) {
275+ assert_eq ! ( parse_hex_pair( h1, h2) , expected) ;
276+ }
277+
278+ #[ test]
279+ fn resolve_hex_escapes_decodes_high_bytes_only ( ) {
280+ let input = "Cafe\\ 'e9 and plain" ;
281+ let output = resolve_hex_escapes ( input, encoding_rs:: WINDOWS_1252 ) ;
282+ assert_eq ! ( output, "Cafeé and plain" ) ;
283+ }
284+
285+ #[ test]
286+ fn resolve_hex_escapes_keeps_ascii_escape_sequences ( ) {
287+ let input = "Escaped brace: \\ '7b and slash: \\ '5c" ;
288+ let output = resolve_hex_escapes ( input, encoding_rs:: WINDOWS_1252 ) ;
289+ assert_eq ! ( output, input) ;
290+ }
291+
292+ #[ test]
293+ fn resolve_hex_escapes_ignores_invalid_hex_sequences ( ) {
294+ let input = "Broken: \\ 'zz and mixed: \\ 'G1" ;
295+ let output = resolve_hex_escapes ( input, encoding_rs:: WINDOWS_1252 ) ;
296+ assert_eq ! ( output, input) ;
297+ }
298+ }
0 commit comments