@@ -1882,9 +1882,7 @@ impl Screen<'_> {
18821882 post_processing : true ,
18831883 persist : false ,
18841884 action : rio_backend:: config:: hints:: HintAction :: Command {
1885- command : rio_backend:: config:: hints:: HintCommand :: Simple (
1886- "xdg-open" . to_string ( ) ,
1887- ) ,
1885+ command : rio_backend:: config:: hints:: default_url_command ( ) ,
18881886 } ,
18891887 mouse : rio_backend:: config:: hints:: HintMouse :: default ( ) ,
18901888 binding : None ,
@@ -1921,19 +1919,23 @@ impl Screen<'_> {
19211919 return None ;
19221920 }
19231921
1924- // Extract text from the line
1922+ // Extract line text and map byte offsets to column indices
1923+ // (regex returns byte offsets which diverge from columns for non-ASCII)
19251924 let mut line_text = String :: new ( ) ;
19261925 for col in 0 ..grid. columns ( ) {
19271926 let cell = & grid[ point. row ] [ rio_backend:: crosswords:: pos:: Column ( col) ] ;
19281927 line_text. push ( cell. c ) ;
19291928 }
1929+ let byte_to_col = build_byte_to_col ( line_text. chars ( ) ) ;
19301930 let line_text = line_text. trim_end ( ) ;
19311931
19321932 // Find all matches in this line and check if point is within any of them
19331933 for mat in regex. find_iter ( line_text) {
1934- let start_col = rio_backend:: crosswords:: pos:: Column ( mat. start ( ) ) ;
1935- let end_col =
1936- rio_backend:: crosswords:: pos:: Column ( mat. end ( ) . saturating_sub ( 1 ) ) ;
1934+ let start_col =
1935+ rio_backend:: crosswords:: pos:: Column ( byte_to_col[ mat. start ( ) ] ) ;
1936+ let end_col = rio_backend:: crosswords:: pos:: Column (
1937+ byte_to_col[ mat. end ( ) . saturating_sub ( 1 ) ] ,
1938+ ) ;
19371939
19381940 // Check if the point is within this match
19391941 if point. col >= start_col && point. col <= end_col {
@@ -2030,6 +2032,15 @@ impl Screen<'_> {
20302032 }
20312033 }
20322034
2035+ /// Clear the highlighted hint to prevent double-fire on click
2036+ #[ inline]
2037+ pub fn clear_highlighted_hint ( & mut self ) {
2038+ self . context_manager
2039+ . current_mut ( )
2040+ . renderable_content
2041+ . highlighted_hint = None ;
2042+ }
2043+
20332044 fn open_hyperlink ( & self , hyperlink : Hyperlink ) {
20342045 // Apply post-processing to remove trailing delimiters and handle uneven brackets
20352046 let processed_uri = post_process_hyperlink_uri ( hyperlink. uri ( ) ) ;
@@ -3711,6 +3722,18 @@ fn post_process_hyperlink_uri(uri: &str) -> String {
37113722 chars. into_iter ( ) . take ( end_idx + 1 ) . collect ( )
37123723}
37133724
3725+ /// Build a mapping from byte offsets to column indices for a sequence of chars.
3726+ /// Each char occupies one grid column but may be 1-4 bytes in UTF-8.
3727+ fn build_byte_to_col ( chars : impl Iterator < Item = char > ) -> Vec < usize > {
3728+ let mut byte_to_col = Vec :: new ( ) ;
3729+ for ( col, ch) in chars. enumerate ( ) {
3730+ for _ in 0 ..ch. len_utf8 ( ) {
3731+ byte_to_col. push ( col) ;
3732+ }
3733+ }
3734+ byte_to_col
3735+ }
3736+
37143737#[ cfg( test) ]
37153738mod tests {
37163739 use super :: * ;
@@ -3771,4 +3794,44 @@ mod tests {
37713794 "https://example.com/path[with]brackets"
37723795 ) ;
37733796 }
3797+
3798+ #[ test]
3799+ fn test_byte_to_col_with_regex_match ( ) {
3800+ // Reproduces the bug from #1457: regex byte offsets used as column
3801+ // indices cause URL truncation when non-ASCII chars precede the URL
3802+ let url_re =
3803+ regex:: Regex :: new ( rio_backend:: config:: hints:: DEFAULT_URL_REGEX ) . unwrap ( ) ;
3804+
3805+ // ASCII-only: byte offsets happen to equal column indices
3806+ let line = "see https://example.com ok" ;
3807+ let byte_to_col = build_byte_to_col ( line. chars ( ) ) ;
3808+ let mat = url_re. find ( line) . unwrap ( ) ;
3809+ assert_eq ! ( mat. as_str( ) , "https://example.com" ) ;
3810+ assert_eq ! ( byte_to_col[ mat. start( ) ] , 4 ) ; // correct column
3811+ assert_eq ! ( mat. start( ) , 4 ) ; // byte offset matches column for ASCII
3812+
3813+ // 2-byte char (é) before URL: byte offset diverges from column
3814+ let line = "café https://example.com ok" ;
3815+ let byte_to_col = build_byte_to_col ( line. chars ( ) ) ;
3816+ let mat = url_re. find ( line) . unwrap ( ) ;
3817+ assert_eq ! ( mat. as_str( ) , "https://example.com" ) ;
3818+ assert_eq ! ( byte_to_col[ mat. start( ) ] , 5 ) ; // correct column
3819+ assert_ne ! ( mat. start( ) , 5 ) ; // raw byte offset is 6, NOT 5 (the bug)
3820+
3821+ // 3-byte CJK char: offset diverges further
3822+ let line = "中 https://example.com ok" ;
3823+ let byte_to_col = build_byte_to_col ( line. chars ( ) ) ;
3824+ let mat = url_re. find ( line) . unwrap ( ) ;
3825+ assert_eq ! ( mat. as_str( ) , "https://example.com" ) ;
3826+ assert_eq ! ( byte_to_col[ mat. start( ) ] , 2 ) ; // correct column
3827+ assert_ne ! ( mat. start( ) , 2 ) ; // raw byte offset is 4 (the bug)
3828+
3829+ // 4-byte emoji: worst divergence
3830+ let line = "😀 https://example.com ok" ;
3831+ let byte_to_col = build_byte_to_col ( line. chars ( ) ) ;
3832+ let mat = url_re. find ( line) . unwrap ( ) ;
3833+ assert_eq ! ( mat. as_str( ) , "https://example.com" ) ;
3834+ assert_eq ! ( byte_to_col[ mat. start( ) ] , 2 ) ; // correct column
3835+ assert_ne ! ( mat. start( ) , 2 ) ; // raw byte offset is 5 (the bug)
3836+ }
37743837}
0 commit comments