@@ -1885,9 +1885,7 @@ impl Screen<'_> {
18851885 post_processing : true ,
18861886 persist : false ,
18871887 action : rio_backend:: config:: hints:: HintAction :: Command {
1888- command : rio_backend:: config:: hints:: HintCommand :: Simple (
1889- "xdg-open" . to_string ( ) ,
1890- ) ,
1888+ command : rio_backend:: config:: hints:: default_url_command ( ) ,
18911889 } ,
18921890 mouse : rio_backend:: config:: hints:: HintMouse :: default ( ) ,
18931891 binding : None ,
@@ -1924,19 +1922,23 @@ impl Screen<'_> {
19241922 return None ;
19251923 }
19261924
1927- // Extract text from the line
1925+ // Extract line text and map byte offsets to column indices
1926+ // (regex returns byte offsets which diverge from columns for non-ASCII)
19281927 let mut line_text = String :: new ( ) ;
19291928 for col in 0 ..grid. columns ( ) {
19301929 let cell = & grid[ point. row ] [ rio_backend:: crosswords:: pos:: Column ( col) ] ;
19311930 line_text. push ( cell. c ) ;
19321931 }
1932+ let byte_to_col = build_byte_to_col ( line_text. chars ( ) ) ;
19331933 let line_text = line_text. trim_end ( ) ;
19341934
19351935 // Find all matches in this line and check if point is within any of them
19361936 for mat in regex. find_iter ( line_text) {
1937- let start_col = rio_backend:: crosswords:: pos:: Column ( mat. start ( ) ) ;
1938- let end_col =
1939- rio_backend:: crosswords:: pos:: Column ( mat. end ( ) . saturating_sub ( 1 ) ) ;
1937+ let start_col =
1938+ rio_backend:: crosswords:: pos:: Column ( byte_to_col[ mat. start ( ) ] ) ;
1939+ let end_col = rio_backend:: crosswords:: pos:: Column (
1940+ byte_to_col[ mat. end ( ) . saturating_sub ( 1 ) ] ,
1941+ ) ;
19401942
19411943 // Check if the point is within this match
19421944 if point. col >= start_col && point. col <= end_col {
@@ -2033,6 +2035,15 @@ impl Screen<'_> {
20332035 }
20342036 }
20352037
2038+ /// Clear the highlighted hint to prevent double-fire on click
2039+ #[ inline]
2040+ pub fn clear_highlighted_hint ( & mut self ) {
2041+ self . context_manager
2042+ . current_mut ( )
2043+ . renderable_content
2044+ . highlighted_hint = None ;
2045+ }
2046+
20362047 fn open_hyperlink ( & self , hyperlink : Hyperlink ) {
20372048 // Apply post-processing to remove trailing delimiters and handle uneven brackets
20382049 let processed_uri = post_process_hyperlink_uri ( hyperlink. uri ( ) ) ;
@@ -3717,6 +3728,18 @@ fn post_process_hyperlink_uri(uri: &str) -> String {
37173728 chars. into_iter ( ) . take ( end_idx + 1 ) . collect ( )
37183729}
37193730
3731+ /// Build a mapping from byte offsets to column indices for a sequence of chars.
3732+ /// Each char occupies one grid column but may be 1-4 bytes in UTF-8.
3733+ fn build_byte_to_col ( chars : impl Iterator < Item = char > ) -> Vec < usize > {
3734+ let mut byte_to_col = Vec :: new ( ) ;
3735+ for ( col, ch) in chars. enumerate ( ) {
3736+ for _ in 0 ..ch. len_utf8 ( ) {
3737+ byte_to_col. push ( col) ;
3738+ }
3739+ }
3740+ byte_to_col
3741+ }
3742+
37203743#[ cfg( test) ]
37213744mod tests {
37223745 use super :: * ;
@@ -3777,4 +3800,44 @@ mod tests {
37773800 "https://example.com/path[with]brackets"
37783801 ) ;
37793802 }
3803+
3804+ #[ test]
3805+ fn test_byte_to_col_with_regex_match ( ) {
3806+ // Reproduces the bug from #1457: regex byte offsets used as column
3807+ // indices cause URL truncation when non-ASCII chars precede the URL
3808+ let url_re =
3809+ regex:: Regex :: new ( rio_backend:: config:: hints:: DEFAULT_URL_REGEX ) . unwrap ( ) ;
3810+
3811+ // ASCII-only: byte offsets happen to equal column indices
3812+ let line = "see https://example.com ok" ;
3813+ let byte_to_col = build_byte_to_col ( line. chars ( ) ) ;
3814+ let mat = url_re. find ( line) . unwrap ( ) ;
3815+ assert_eq ! ( mat. as_str( ) , "https://example.com" ) ;
3816+ assert_eq ! ( byte_to_col[ mat. start( ) ] , 4 ) ; // correct column
3817+ assert_eq ! ( mat. start( ) , 4 ) ; // byte offset matches column for ASCII
3818+
3819+ // 2-byte char (é) before URL: byte offset diverges from column
3820+ let line = "café https://example.com ok" ;
3821+ let byte_to_col = build_byte_to_col ( line. chars ( ) ) ;
3822+ let mat = url_re. find ( line) . unwrap ( ) ;
3823+ assert_eq ! ( mat. as_str( ) , "https://example.com" ) ;
3824+ assert_eq ! ( byte_to_col[ mat. start( ) ] , 5 ) ; // correct column
3825+ assert_eq ! ( mat. start( ) , 6 ) ; // raw byte offset is 6 (é = 2 bytes)
3826+
3827+ // 3-byte CJK char: offset diverges further
3828+ let line = "中 https://example.com ok" ;
3829+ let byte_to_col = build_byte_to_col ( line. chars ( ) ) ;
3830+ let mat = url_re. find ( line) . unwrap ( ) ;
3831+ assert_eq ! ( mat. as_str( ) , "https://example.com" ) ;
3832+ assert_eq ! ( byte_to_col[ mat. start( ) ] , 2 ) ; // correct column
3833+ assert_eq ! ( mat. start( ) , 4 ) ; // raw byte offset is 4 (中 = 3 bytes)
3834+
3835+ // 4-byte emoji: worst divergence
3836+ let line = "😀 https://example.com ok" ;
3837+ let byte_to_col = build_byte_to_col ( line. chars ( ) ) ;
3838+ let mat = url_re. find ( line) . unwrap ( ) ;
3839+ assert_eq ! ( mat. as_str( ) , "https://example.com" ) ;
3840+ assert_eq ! ( byte_to_col[ mat. start( ) ] , 2 ) ; // correct column
3841+ assert_eq ! ( mat. start( ) , 5 ) ; // raw byte offset is 5 (😀 = 4 bytes)
3842+ }
37803843}
0 commit comments