@@ -91,6 +91,8 @@ public class PdfType0Font extends PdfFont {
91
91
protected int cidFontType ;
92
92
protected char [] specificUnicodeDifferences ;
93
93
94
+ private final CMapToUnicode embeddedToUnicode ;
95
+
94
96
PdfType0Font (TrueTypeFont ttf , String cmap ) {
95
97
super ();
96
98
if (!PdfEncodings .IDENTITY_H .equals (cmap ) && !PdfEncodings .IDENTITY_V .equals (cmap )) {
@@ -107,6 +109,7 @@ public class PdfType0Font extends PdfFont {
107
109
cmapEncoding = new CMapEncoding (cmap );
108
110
usedGlyphs = new TreeSet <>();
109
111
cidFontType = CID_FONT_TYPE_2 ;
112
+ embeddedToUnicode = null ;
110
113
if (ttf .isFontSpecific ()) {
111
114
specificUnicodeDifferences = new char [256 ];
112
115
byte [] bytes = new byte [1 ];
@@ -135,6 +138,7 @@ public class PdfType0Font extends PdfFont {
135
138
cmapEncoding = new CMapEncoding (cmap , uniMap );
136
139
usedGlyphs = new TreeSet <>();
137
140
cidFontType = CID_FONT_TYPE_0 ;
141
+ embeddedToUnicode = null ;
138
142
}
139
143
140
144
PdfType0Font (PdfDictionary fontDictionary ) {
@@ -151,8 +155,10 @@ public class PdfType0Font extends PdfFont {
151
155
PdfObject toUnicode = fontDictionary .get (PdfName .ToUnicode );
152
156
if (toUnicode == null ) {
153
157
toUnicodeCMap = FontUtil .parseUniversalToUnicodeCMap (ordering );
158
+ embeddedToUnicode = null ;
154
159
} else {
155
160
toUnicodeCMap = FontUtil .processToUnicode (toUnicode );
161
+ embeddedToUnicode = toUnicodeCMap ;
156
162
}
157
163
158
164
if (cmap .isName () && (PdfEncodings .IDENTITY_H .equals (((PdfName ) cmap ).getValue ()) ||
@@ -555,6 +561,11 @@ public GlyphLine decodeIntoGlyphLine(PdfString characterCodes) {
555
561
public boolean appendDecodedCodesToGlyphsList (List <Glyph > list , PdfString characterCodes ) {
556
562
boolean allCodesDecoded = true ;
557
563
564
+ final boolean isToUnicodeEmbedded = embeddedToUnicode != null ;
565
+ final CMapEncoding cmap = getCmap ();
566
+ final FontProgram fontProgram = getFontProgram ();
567
+ final List <byte []> codeSpaceRanges = isToUnicodeEmbedded ? embeddedToUnicode .getCodeSpaceRanges () : cmap .getCodeSpaceRanges ();
568
+
558
569
String charCodesSequence = characterCodes .getValue ();
559
570
// A sequence of one or more bytes shall be extracted from the string and matched against the codespace
560
571
// ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
@@ -568,13 +579,18 @@ public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString charac
568
579
for (int codeLength = 1 ; codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence .length ();
569
580
codeLength ++) {
570
581
code = (code << 8 ) + charCodesSequence .charAt (i + codeLength - 1 );
571
- if (!getCmap ().containsCodeInCodeSpaceRange (code , codeLength )) {
572
- continue ;
573
- } else {
582
+
583
+ if (PdfType0Font .containsCodeInCodeSpaceRange (codeSpaceRanges , code , codeLength )) {
574
584
codeSpaceMatchedLength = codeLength ;
585
+ } else {
586
+ continue ;
575
587
}
576
- int glyphCode = getCmap ().getCidCode (code );
577
- glyph = getFontProgram ().getGlyphByCode (glyphCode );
588
+
589
+ // According to paragraph 9.10.2 of PDF Specification ISO 32000-2, if toUnicode is embedded, it is
590
+ // necessary to use it to map directly code points to unicode. If not embedded, use CMap to map code
591
+ // points to CIDs and then CIDFont to map CIDs to unicode.
592
+ int glyphCode = isToUnicodeEmbedded ? code : cmap .getCidCode (code );
593
+ glyph = fontProgram .getGlyphByCode (glyphCode );
578
594
if (glyph != null ) {
579
595
i += codeLength - 1 ;
580
596
break ;
@@ -594,11 +610,11 @@ public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString charac
594
610
}
595
611
i += codeSpaceMatchedLength - 1 ;
596
612
}
597
- if (glyph != null && glyph .getChars () != null ) {
598
- list .add (glyph );
599
- } else {
600
- list .add (new Glyph (0 , getFontProgram ().getGlyphByCode (0 ).getWidth (), -1 ));
613
+ if (glyph == null || glyph .getChars () == null ) {
614
+ list .add (new Glyph (0 , fontProgram .getGlyphByCode (0 ).getWidth (), -1 ));
601
615
allCodesDecoded = false ;
616
+ } else {
617
+ list .add (glyph );
602
618
}
603
619
}
604
620
return allCodesDecoded ;
@@ -674,6 +690,28 @@ private static String getOrdering(PdfDictionary cidFont) {
674
690
return cidinfo .containsKey (PdfName .Ordering ) ? cidinfo .get (PdfName .Ordering ).toString () : null ;
675
691
}
676
692
693
+ private static boolean containsCodeInCodeSpaceRange (List <byte []> codeSpaceRanges , int code , int length ) {
694
+ for (int i = 0 ; i < codeSpaceRanges .size (); i += 2 ) {
695
+ if (length == codeSpaceRanges .get (i ).length ) {
696
+ int mask = 0xff ;
697
+ int totalShift = 0 ;
698
+ byte [] low = codeSpaceRanges .get (i );
699
+ byte [] high = codeSpaceRanges .get (i + 1 );
700
+ boolean fitsIntoRange = true ;
701
+ for (int ind = length - 1 ; ind >= 0 ; ind --, totalShift += 8 , mask <<= 8 ) {
702
+ int actualByteValue = (code & mask ) >> totalShift ;
703
+ if (!(actualByteValue >= (0xff & low [ind ]) && actualByteValue <= (0xff & high [ind ]))) {
704
+ fitsIntoRange = false ;
705
+ }
706
+ }
707
+ if (fitsIntoRange ) {
708
+ return true ;
709
+ }
710
+ }
711
+ }
712
+ return false ;
713
+ }
714
+
677
715
private void flushFontData () {
678
716
if (cidFontType == CID_FONT_TYPE_0 ) {
679
717
getPdfObject ().put (PdfName .Type , PdfName .Font );
0 commit comments