@@ -57,6 +57,8 @@ public class CMap
5757 private final Map <Integer , String > charToUnicodeOneByte = new HashMap <>();
5858 // two byte input values
5959 private final Map <Integer , String > charToUnicodeTwoBytes = new HashMap <>();
60+ // 3 / 4 byte input values
61+ private final Map <Integer , String > charToUnicodeMoreBytes = new HashMap <>();
6062
6163 // CID mappings
6264 // map with all code to cid mappings organized by the origin byte length of the input value
@@ -93,7 +95,7 @@ public boolean hasCIDMappings()
9395 */
9496 public boolean hasUnicodeMappings ()
9597 {
96- return !charToUnicodeOneByte .isEmpty () || !charToUnicodeTwoBytes .isEmpty ();
98+ return !charToUnicodeOneByte .isEmpty () || !charToUnicodeTwoBytes .isEmpty () || ! charToUnicodeMoreBytes . isEmpty () ;
9799 }
98100
99101 /**
@@ -113,7 +115,15 @@ public String toUnicode(int code)
113115 String unicode = code < 256 ? toUnicode (code , 1 ) : null ;
114116 if (unicode == null )
115117 {
116- unicode = toUnicode (code , 2 );
118+ if (code <= 0xFFFF )
119+ {
120+ return toUnicode (code , 2 );
121+ }
122+ if (code <= 0xFFFFFF )
123+ {
124+ return toUnicode (code , 3 );
125+ }
126+ return toUnicode (code , 4 );
117127 }
118128 return unicode ;
119129 }
@@ -135,8 +145,7 @@ public String toUnicode(int code, int length)
135145 {
136146 return charToUnicodeTwoBytes .get (code );
137147 }
138- LOG .warn ("Mappings with more than 2 bytes aren't supported" );
139- return null ;
148+ return charToUnicodeMoreBytes .get (code );
140149 }
141150
142151 /**
@@ -350,9 +359,14 @@ else if (codes.length == 2)
350359 charToUnicodeTwoBytes .put (CMapStrings .getIndexValue (codes ), unicode );
351360 unicodeToByteCodes .put (unicode , CMapStrings .getByteValue (codes ));
352361 }
362+ else if (codes .length == 3 || codes .length == 4 )
363+ {
364+ charToUnicodeMoreBytes .put (toInt (codes ), unicode );
365+ unicodeToByteCodes .put (unicode , codes .clone ());
366+ }
353367 else
354368 {
355- LOG .warn ("Mappings with more than 2 bytes aren't supported yet" );
369+ LOG .warn ("Mappings with more than 4 bytes (here: {}) aren't supported yet" , codes . length );
356370 }
357371 // fixme: ugly little hack
358372 if (SPACE .equals (unicode ))
@@ -442,10 +456,28 @@ void useCmap(CMap cmap)
442456 cmap .codespaceRanges .forEach (this ::addCodespaceRange );
443457 charToUnicodeOneByte .putAll (cmap .charToUnicodeOneByte );
444458 charToUnicodeTwoBytes .putAll (cmap .charToUnicodeTwoBytes );
459+ charToUnicodeMoreBytes .putAll (cmap .charToUnicodeMoreBytes );
445460 cmap .charToUnicodeOneByte .forEach ((k , v ) -> unicodeToByteCodes .put (v , new byte []{(byte ) (k % 0xFF )}));
446461 cmap .charToUnicodeTwoBytes .forEach ((k , v ) -> unicodeToByteCodes .put (v ,
447462 new byte []{(byte ) ((k >>> 8 ) & 0xFF ), (byte ) (k & 0xFF )})
448463 );
464+ cmap .charToUnicodeMoreBytes .forEach ((k , v ) ->
465+ {
466+ byte [] bar ;
467+ if (k <= 0xFFFFFF )
468+ {
469+ // 3 bytes
470+ bar = new byte []{(byte ) ((k >>> 16 ) & 0xFF ), (byte ) ((k >>> 8 ) & 0xFF ),
471+ (byte ) (k & 0xFF )};
472+ }
473+ else
474+ {
475+ // 4 bytes
476+ bar = new byte []{(byte ) ((k >>> 24 ) & 0xFF ), (byte ) ((k >>> 16 ) & 0xFF ),
477+ (byte ) ((k >>> 8 ) & 0xFF ), (byte ) (k & 0xFF )};
478+ }
479+ unicodeToByteCodes .put (v , bar );
480+ });
449481 cmap .codeToCid .forEach ((key , value ) ->
450482 {
451483 Map <Integer , Integer > existingMapping = codeToCid .putIfAbsent (key , value );
0 commit comments