@@ -185,6 +185,7 @@ fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
185185 if s. is_empty ( ) {
186186 return String :: new ( ) ;
187187 }
188+
188189 let total_chars = s. chars ( ) . count ( ) ;
189190 let max_bytes = policy. byte_budget ( ) ;
190191
@@ -204,24 +205,55 @@ fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
204205 let total_bytes = s. len ( ) ;
205206
206207 let ( left_budget, right_budget) = split_budget ( max_bytes) ;
207- let prefix_end = pick_prefix_end ( s, left_budget) ;
208- let mut suffix_start = pick_suffix_start ( s, right_budget) ;
209- if suffix_start < prefix_end {
210- suffix_start = prefix_end;
211- }
212208
213- let left_chars = s[ ..prefix_end] . chars ( ) . count ( ) ;
214- let right_chars = s[ suffix_start..] . chars ( ) . count ( ) ;
215- let removed_chars = total_chars
216- . saturating_sub ( left_chars)
217- . saturating_sub ( right_chars) ;
209+ let ( removed_chars, left, right) = split_string ( s, left_budget, right_budget) ;
218210
219211 let marker = format_truncation_marker (
220212 policy,
221213 removed_units_for_source ( policy, total_bytes. saturating_sub ( max_bytes) , removed_chars) ,
222214 ) ;
223215
224- assemble_truncated_output ( & s[ ..prefix_end] , & s[ suffix_start..] , & marker)
216+ assemble_truncated_output ( left, right, & marker)
217+ }
218+
219+ fn split_string ( s : & str , beginning_bytes : usize , end_bytes : usize ) -> ( usize , & str , & str ) {
220+ if s. is_empty ( ) {
221+ return ( 0 , "" , "" ) ;
222+ }
223+
224+ let len = s. len ( ) ;
225+ let tail_start_target = len. saturating_sub ( end_bytes) ;
226+ let mut prefix_end = 0usize ;
227+ let mut suffix_start = len;
228+ let mut removed_chars = 0usize ;
229+ let mut suffix_started = false ;
230+
231+ for ( idx, ch) in s. char_indices ( ) {
232+ let char_end = idx + ch. len_utf8 ( ) ;
233+ if char_end <= beginning_bytes {
234+ prefix_end = char_end;
235+ continue ;
236+ }
237+
238+ if idx >= tail_start_target {
239+ if !suffix_started {
240+ suffix_start = idx;
241+ suffix_started = true ;
242+ }
243+ continue ;
244+ }
245+
246+ removed_chars = removed_chars. saturating_add ( 1 ) ;
247+ }
248+
249+ if suffix_start < prefix_end {
250+ suffix_start = prefix_end;
251+ }
252+
253+ let before = & s[ ..prefix_end] ;
254+ let after = & s[ suffix_start..] ;
255+
256+ ( removed_chars, before, after)
225257}
226258
227259fn format_truncation_marker ( policy : TruncationPolicy , removed_count : u64 ) -> String {
@@ -270,42 +302,54 @@ fn approx_tokens_from_byte_count(bytes: usize) -> u64 {
270302 / ( APPROX_BYTES_PER_TOKEN as u64 )
271303}
272304
273- fn truncate_on_boundary ( input : & str , max_len : usize ) -> & str {
274- if input. len ( ) <= max_len {
275- return input;
276- }
277- let mut end = max_len;
278- while end > 0 && !input. is_char_boundary ( end) {
279- end -= 1 ;
280- }
281- & input[ ..end]
282- }
283-
284- fn pick_prefix_end ( s : & str , left_budget : usize ) -> usize {
285- truncate_on_boundary ( s, left_budget) . len ( )
286- }
287-
288- fn pick_suffix_start ( s : & str , right_budget : usize ) -> usize {
289- let start_tail = s. len ( ) . saturating_sub ( right_budget) ;
290- let mut idx = start_tail. min ( s. len ( ) ) ;
291- while idx < s. len ( ) && !s. is_char_boundary ( idx) {
292- idx += 1 ;
293- }
294- idx
295- }
296-
297305#[ cfg( test) ]
298306mod tests {
299307
300308 use super :: TruncationPolicy ;
301309 use super :: approx_token_count;
302310 use super :: formatted_truncate_text;
311+ use super :: split_string;
303312 use super :: truncate_function_output_items_with_policy;
304313 use super :: truncate_text;
305314 use super :: truncate_with_token_budget;
306315 use codex_protocol:: models:: FunctionCallOutputContentItem ;
307316 use pretty_assertions:: assert_eq;
308317
318+ #[ test]
319+ fn split_string_works ( ) {
320+ assert_eq ! ( split_string( "hello world" , 5 , 5 ) , ( 1 , "hello" , "world" ) ) ;
321+ assert_eq ! ( split_string( "abc" , 0 , 0 ) , ( 3 , "" , "" ) ) ;
322+ }
323+
324+ #[ test]
325+ fn split_string_handles_empty_string ( ) {
326+ assert_eq ! ( split_string( "" , 4 , 4 ) , ( 0 , "" , "" ) ) ;
327+ }
328+
329+ #[ test]
330+ fn split_string_only_keeps_prefix_when_tail_budget_is_zero ( ) {
331+ assert_eq ! ( split_string( "abcdef" , 3 , 0 ) , ( 3 , "abc" , "" ) ) ;
332+ }
333+
334+ #[ test]
335+ fn split_string_only_keeps_suffix_when_prefix_budget_is_zero ( ) {
336+ assert_eq ! ( split_string( "abcdef" , 0 , 3 ) , ( 3 , "" , "def" ) ) ;
337+ }
338+
339+ #[ test]
340+ fn split_string_handles_overlapping_budgets_without_removal ( ) {
341+ assert_eq ! ( split_string( "abcdef" , 4 , 4 ) , ( 0 , "abcd" , "ef" ) ) ;
342+ }
343+
344+ #[ test]
345+ fn split_string_respects_utf8_boundaries ( ) {
346+ assert_eq ! ( split_string( "😀abc😀" , 5 , 5 ) , ( 1 , "😀a" , "c😀" ) ) ;
347+
348+ assert_eq ! ( split_string( "😀😀😀😀😀" , 1 , 1 ) , ( 5 , "" , "" ) ) ;
349+ assert_eq ! ( split_string( "😀😀😀😀😀" , 7 , 7 ) , ( 3 , "😀" , "😀" ) ) ;
350+ assert_eq ! ( split_string( "😀😀😀😀😀" , 8 , 8 ) , ( 1 , "😀😀" , "😀😀" ) ) ;
351+ }
352+
309353 #[ test]
310354 fn truncate_bytes_less_than_placeholder_returns_placeholder ( ) {
311355 let content = "example output" ;
0 commit comments