@@ -944,13 +944,37 @@ impl VerifiedInvoiceRequest {
944944 InvoiceRequestFields {
945945 payer_signing_pubkey : * payer_signing_pubkey,
946946 quantity : * quantity,
947- payer_note_truncated : payer_note. clone ( )
948- . map ( |mut s| { s. truncate ( PAYER_NOTE_LIMIT ) ; UntrustedString ( s) } ) ,
947+ payer_note_truncated : payer_note
948+ . clone ( )
949+ // Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding
950+ // down to the nearest valid UTF-8 code point boundary.
951+ . map ( |s| UntrustedString ( string_truncate_safe ( s, PAYER_NOTE_LIMIT ) ) ) ,
949952 human_readable_name : self . offer_from_hrn ( ) . clone ( ) ,
950953 }
951954 }
952955}
953956
957+ /// `String::truncate(new_len)` panics if you split inside a UTF-8 code point,
958+ /// which would leave the `String` containing invalid UTF-8. This function will
959+ /// instead truncate the string to the next smaller code point boundary so the
960+ /// truncated string always remains valid UTF-8.
961+ ///
962+ /// This can still split a grapheme cluster, but that's probably fine.
963+ /// We'd otherwise have to pull in the `unicode-segmentation` crate and its big
964+ /// unicode tables to find the next smaller grapheme cluster boundary.
965+ fn string_truncate_safe ( mut s : String , new_len : usize ) -> String {
966+ // Finds the largest byte index `x` not exceeding byte index `index` where
967+ // `s.is_char_boundary(x)` is true.
968+ // TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes.
969+ let truncated_len = if new_len >= s. len ( ) {
970+ s. len ( )
971+ } else {
972+ ( 0 ..=new_len) . rev ( ) . find ( |idx| s. is_char_boundary ( * idx) ) . unwrap_or ( 0 )
973+ } ;
974+ s. truncate ( truncated_len) ;
975+ s
976+ }
977+
954978impl InvoiceRequestContents {
955979 pub ( super ) fn metadata ( & self ) -> & [ u8 ] {
956980 self . inner . metadata ( )
@@ -1339,7 +1363,8 @@ mod tests {
13391363 use crate :: ln:: inbound_payment:: ExpandedKey ;
13401364 use crate :: ln:: msgs:: { DecodeError , MAX_VALUE_MSAT } ;
13411365 use crate :: offers:: invoice:: { Bolt12Invoice , SIGNATURE_TAG as INVOICE_SIGNATURE_TAG } ;
1342- use crate :: offers:: merkle:: { SignatureTlvStreamRef , TaggedHash , TlvStream , self } ;
1366+ use crate :: offers:: invoice_request:: string_truncate_safe;
1367+ use crate :: offers:: merkle:: { self , SignatureTlvStreamRef , TaggedHash , TlvStream } ;
13431368 use crate :: offers:: nonce:: Nonce ;
13441369 use crate :: offers:: offer:: { Amount , ExperimentalOfferTlvStreamRef , OfferTlvStreamRef , Quantity } ;
13451370 #[ cfg( not( c_bindings) ) ]
@@ -2611,12 +2636,22 @@ mod tests {
26112636 . build ( ) . unwrap ( ) ;
26122637 assert_eq ! ( offer. issuer_signing_pubkey( ) , Some ( node_id) ) ;
26132638
2639+ // UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)`
2640+ // because it would split a multi-byte UTF-8 code point.
2641+ let payer_note = "❤️" . repeat ( 86 ) ;
2642+ assert_eq ! ( payer_note. len( ) , PAYER_NOTE_LIMIT + 4 ) ;
2643+ let expected_payer_note = "❤️" . repeat ( 85 ) ;
2644+
26142645 let invoice_request = offer
2615- . request_invoice ( & expanded_key, nonce, & secp_ctx, payment_id) . unwrap ( )
2616- . chain ( Network :: Testnet ) . unwrap ( )
2617- . quantity ( 1 ) . unwrap ( )
2618- . payer_note ( "0" . repeat ( PAYER_NOTE_LIMIT * 2 ) )
2619- . build_and_sign ( ) . unwrap ( ) ;
2646+ . request_invoice ( & expanded_key, nonce, & secp_ctx, payment_id)
2647+ . unwrap ( )
2648+ . chain ( Network :: Testnet )
2649+ . unwrap ( )
2650+ . quantity ( 1 )
2651+ . unwrap ( )
2652+ . payer_note ( payer_note)
2653+ . build_and_sign ( )
2654+ . unwrap ( ) ;
26202655 match invoice_request. verify_using_metadata ( & expanded_key, & secp_ctx) {
26212656 Ok ( invoice_request) => {
26222657 let fields = invoice_request. fields ( ) ;
@@ -2626,7 +2661,7 @@ mod tests {
26262661 InvoiceRequestFields {
26272662 payer_signing_pubkey: invoice_request. payer_signing_pubkey( ) ,
26282663 quantity: Some ( 1 ) ,
2629- payer_note_truncated: Some ( UntrustedString ( "0" . repeat ( PAYER_NOTE_LIMIT ) ) ) ,
2664+ payer_note_truncated: Some ( UntrustedString ( expected_payer_note ) ) ,
26302665 human_readable_name: None ,
26312666 }
26322667 ) ;
@@ -2641,4 +2676,31 @@ mod tests {
26412676 Err ( _) => panic ! ( "unexpected error" ) ,
26422677 }
26432678 }
2679+
2680+ #[ test]
2681+ fn test_string_truncate_safe ( ) {
2682+ // We'll correctly truncate to the nearest UTF-8 code point boundary:
2683+ // ❤ variation-selector
2684+ // e29da4 efb88f
2685+ let s = String :: from ( "❤️" ) ;
2686+ assert_eq ! ( s. len( ) , 6 ) ;
2687+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , 7 ) ) ;
2688+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , 6 ) ) ;
2689+ assert_eq ! ( "❤" , string_truncate_safe( s. clone( ) , 5 ) ) ;
2690+ assert_eq ! ( "❤" , string_truncate_safe( s. clone( ) , 4 ) ) ;
2691+ assert_eq ! ( "❤" , string_truncate_safe( s. clone( ) , 3 ) ) ;
2692+ assert_eq ! ( "" , string_truncate_safe( s. clone( ) , 2 ) ) ;
2693+ assert_eq ! ( "" , string_truncate_safe( s. clone( ) , 1 ) ) ;
2694+ assert_eq ! ( "" , string_truncate_safe( s. clone( ) , 0 ) ) ;
2695+
2696+ // Every byte in an ASCII string is also a full UTF-8 code point.
2697+ let s = String :: from ( "my ASCII string!" ) ;
2698+ for new_len in 0 ..( s. len ( ) + 5 ) {
2699+ if new_len >= s. len ( ) {
2700+ assert_eq ! ( s, string_truncate_safe( s. clone( ) , new_len) ) ;
2701+ } else {
2702+ assert_eq ! ( s[ ..new_len] , string_truncate_safe( s. clone( ) , new_len) ) ;
2703+ }
2704+ }
2705+ }
26442706}
0 commit comments