Skip to content

Commit 1dbd53e

Browse files
committed
Fix RTF parser splitting paragraphs at source line boundaries
The rtf_parser lexer only treats \n as a token delimiter, leaving \r from \r\n line endings embedded in PlainText tokens. These stray carriage returns caused visible line breaks mid-word in the output. Strip \r from the source before lexing.
1 parent d62aa3b commit 1dbd53e

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

src/parser/rtf.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ impl Parser for RtfParser {
3535
let content_str = content_str.trim_end_matches(|c: char| c == '\0' || c.is_whitespace());
3636
let encoding = extract_codepage(content_str);
3737
let content_str = resolve_hex_escapes(content_str, encoding);
38+
// Strip \r so that \r\n line endings don't leave stray carriage returns in text tokens
39+
let content_str = content_str.replace('\r', "");
3840
let tokens = Lexer::scan(&content_str).map_err(|e| anyhow::anyhow!("Failed to parse RTF document: {e}"))?;
3941
let buffer = extract_content_from_tokens(&tokens);
4042
let title = extract_title_from_path(&context.file_path);

0 commit comments

Comments
 (0)