Skip to content

Commit bcc02e9

Browse files
authored
Merge pull request #7897 from aaron-ang/ptx-panic
ptx: use char count instead of byte index to handle utf-8 characters
2 parents de32281 + 1cfb19a commit bcc02e9

File tree

2 files changed

+63
-32
lines changed

2 files changed

+63
-32
lines changed

src/uu/ptx/src/ptx.rs

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55

66
// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS
77

8-
use clap::{Arg, ArgAction, Command};
9-
use regex::Regex;
108
use std::cmp;
119
use std::collections::{BTreeSet, HashMap, HashSet};
1210
use std::fmt::Write as FmtWrite;
1311
use std::fs::File;
1412
use std::io::{BufRead, BufReader, BufWriter, Read, Write, stdin, stdout};
1513
use std::num::ParseIntError;
14+
15+
use clap::{Arg, ArgAction, Command};
16+
use regex::Regex;
1617
use thiserror::Error;
1718
use uucore::display::Quotable;
1819
use uucore::error::{FromIo, UError, UResult, UUsageError};
@@ -551,26 +552,14 @@ fn format_tex_line(
551552
) -> String {
552553
let mut output = String::new();
553554
write!(output, "\\{} ", config.macro_name).unwrap();
554-
let all_before = if config.input_ref {
555-
let before = &line[0..word_ref.position];
556-
let before_start_trim_offset =
557-
word_ref.position - before.trim_start_matches(reference).trim_start().len();
558-
let before_end_index = before.len();
559-
&chars_line[before_start_trim_offset..cmp::max(before_end_index, before_start_trim_offset)]
560-
} else {
561-
let before_chars_trim_idx = (0, word_ref.position);
562-
&chars_line[before_chars_trim_idx.0..before_chars_trim_idx.1]
563-
};
564-
let keyword = &line[word_ref.position..word_ref.position_end];
565-
let after_chars_trim_idx = (word_ref.position_end, chars_line.len());
566-
let all_after = &chars_line[after_chars_trim_idx.0..after_chars_trim_idx.1];
567-
let (tail, before, after, head) = get_output_chunks(all_before, keyword, all_after, config);
555+
let (tail, before, keyword, after, head) =
556+
prepare_line_chunks(config, word_ref, line, chars_line, reference);
568557
write!(
569558
output,
570559
"{{{0}}}{{{1}}}{{{2}}}{{{3}}}{{{4}}}",
571560
format_tex_field(&tail),
572561
format_tex_field(&before),
573-
format_tex_field(keyword),
562+
format_tex_field(&keyword),
574563
format_tex_field(&after),
575564
format_tex_field(&head),
576565
)
@@ -594,26 +583,14 @@ fn format_roff_line(
594583
) -> String {
595584
let mut output = String::new();
596585
write!(output, ".{}", config.macro_name).unwrap();
597-
let all_before = if config.input_ref {
598-
let before = &line[0..word_ref.position];
599-
let before_start_trim_offset =
600-
word_ref.position - before.trim_start_matches(reference).trim_start().len();
601-
let before_end_index = before.len();
602-
&chars_line[before_start_trim_offset..cmp::max(before_end_index, before_start_trim_offset)]
603-
} else {
604-
let before_chars_trim_idx = (0, word_ref.position);
605-
&chars_line[before_chars_trim_idx.0..before_chars_trim_idx.1]
606-
};
607-
let keyword = &line[word_ref.position..word_ref.position_end];
608-
let after_chars_trim_idx = (word_ref.position_end, chars_line.len());
609-
let all_after = &chars_line[after_chars_trim_idx.0..after_chars_trim_idx.1];
610-
let (tail, before, after, head) = get_output_chunks(all_before, keyword, all_after, config);
586+
let (tail, before, keyword, after, head) =
587+
prepare_line_chunks(config, word_ref, line, chars_line, reference);
611588
write!(
612589
output,
613590
" \"{}\" \"{}\" \"{}{}\" \"{}\"",
614591
format_roff_field(&tail),
615592
format_roff_field(&before),
616-
format_roff_field(keyword),
593+
format_roff_field(&keyword),
617594
format_roff_field(&after),
618595
format_roff_field(&head)
619596
)
@@ -624,6 +601,46 @@ fn format_roff_line(
624601
output
625602
}
626603

604+
/// Extract and prepare text chunks for formatting in both TeX and roff output
605+
fn prepare_line_chunks(
606+
config: &Config,
607+
word_ref: &WordRef,
608+
line: &str,
609+
chars_line: &[char],
610+
reference: &str,
611+
) -> (String, String, String, String, String) {
612+
// Convert byte positions to character positions
613+
let ref_char_position = line[..word_ref.position].chars().count();
614+
let char_position_end = ref_char_position
615+
+ line[word_ref.position..word_ref.position_end]
616+
.chars()
617+
.count();
618+
619+
// Extract the text before the keyword
620+
let all_before = if config.input_ref {
621+
let before = &line[..word_ref.position];
622+
let before_char_count = before.chars().count();
623+
let trimmed_char_count = before
624+
.trim_start_matches(reference)
625+
.trim_start()
626+
.chars()
627+
.count();
628+
let trim_offset = before_char_count - trimmed_char_count;
629+
&chars_line[trim_offset..before_char_count]
630+
} else {
631+
&chars_line[..ref_char_position]
632+
};
633+
634+
// Extract the keyword and text after it
635+
let keyword = line[word_ref.position..word_ref.position_end].to_string();
636+
let all_after = &chars_line[char_position_end..];
637+
638+
// Get formatted output chunks
639+
let (tail, before, after, head) = get_output_chunks(all_before, &keyword, all_after, config);
640+
641+
(tail, before, keyword, after, head)
642+
}
643+
627644
fn write_traditional_output(
628645
config: &Config,
629646
file_map: &FileMap,

tests/by-util/test_ptx.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,17 @@ fn test_failed_write_is_reported() {
174174
.fails()
175175
.stderr_is("ptx: write failed: No space left on device\n");
176176
}
177+
178+
#[test]
179+
fn test_utf8() {
180+
new_ucmd!()
181+
.args(&["-G"])
182+
.pipe_in("it’s disabled\n")
183+
.succeeds()
184+
.stdout_only(".xx \"\" \"it’s\" \"disabled\" \"\"\n.xx \"\" \"\" \"it’s disabled\" \"\"\n");
185+
new_ucmd!()
186+
.args(&["-G", "-T"])
187+
.pipe_in("it’s disabled\n")
188+
.succeeds()
189+
.stdout_only("\\xx {}{it’s}{disabled}{}{}\n\\xx {}{}{it’s}{ disabled}{}\n");
190+
}

0 commit comments

Comments
 (0)