@@ -162,7 +162,14 @@ impl ToUnicodeCMap {
162162 ret_vec
163163 }
164164 UTF16CodePoint { offset } => vec ! [ u32 :: wrapping_add( code, * offset) as u16 ] ,
165- ArrayOfHexStrings ( vec_of_strings) => vec_of_strings[ ( code - range. start ( ) ) as usize ] . clone ( ) ,
165+ ArrayOfHexStrings ( vec_of_strings) => {
166+ let idx = ( code - range. start ( ) ) as usize ;
167+ if idx < vec_of_strings. len ( ) {
168+ vec_of_strings[ idx] . clone ( )
169+ } else {
170+ vec ! [ ToUnicodeCMap :: REPLACEMENT_CHAR ]
171+ }
172+ }
166173 } )
167174 }
168175
@@ -250,4 +257,26 @@ mod tests {
250257 cmap. put_char ( char_code, 5 , char_value. clone ( ) ) ;
251258 cmap. put_char ( char_code, 0 , char_value. clone ( ) ) ;
252259 }
260+
261+ #[ test]
262+ fn array_of_hex_strings_out_of_bounds_returns_replacement ( ) {
263+ // Simulate a malformed CMap bfrange where the array has fewer entries
264+ // than the declared source code range covers.
265+ let mut cmap = ToUnicodeCMap :: new ( ) ;
266+ let array = BfRangeTarget :: ArrayOfHexStrings ( vec ! [
267+ vec![ 0x0041 ] , // 'A' — for code 0x10
268+ vec![ 0x0042 ] , // 'B' — for code 0x11
269+ ] ) ;
270+ // Range 0x10..=0x14 but only 2 entries in array (needs 5)
271+ cmap. put ( 0x10 , 0x14 , 2 , array) ;
272+
273+ // In-bounds lookups work
274+ assert_eq ! ( cmap. get( 0x10 , 2 ) , Some ( vec![ 0x0041 ] ) ) ;
275+ assert_eq ! ( cmap. get( 0x11 , 2 ) , Some ( vec![ 0x0042 ] ) ) ;
276+
277+ // Out-of-bounds lookups return replacement char instead of panicking
278+ assert_eq ! ( cmap. get( 0x12 , 2 ) , Some ( vec![ ToUnicodeCMap :: REPLACEMENT_CHAR ] ) ) ;
279+ assert_eq ! ( cmap. get( 0x13 , 2 ) , Some ( vec![ ToUnicodeCMap :: REPLACEMENT_CHAR ] ) ) ;
280+ assert_eq ! ( cmap. get( 0x14 , 2 ) , Some ( vec![ ToUnicodeCMap :: REPLACEMENT_CHAR ] ) ) ;
281+ }
253282}
0 commit comments