Skip to content

Commit 20c9c74

Browse files
author
glenn.volckaert
committed
Add correct unicode mapping for IDENTITY_V encoding
DEVSIX-7046
1 parent 1040385 commit 20c9c74

File tree

2 files changed

+62
-22
lines changed

2 files changed

+62
-22
lines changed

kernel/src/main/java/com/itextpdf/kernel/font/PdfType0Font.java

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ This file is part of the iText (R) project.
4343
*/
4444
package com.itextpdf.kernel.font;
4545

46-
import com.itextpdf.io.logs.IoLogMessageConstant;
46+
import com.itextpdf.commons.utils.MessageFormatUtil;
4747
import com.itextpdf.io.font.CFFFontSubset;
4848
import com.itextpdf.io.font.CMapEncoding;
4949
import com.itextpdf.io.font.CidFont;
@@ -56,14 +56,14 @@ This file is part of the iText (R) project.
5656
import com.itextpdf.io.font.cmap.CMapToUnicode;
5757
import com.itextpdf.io.font.otf.Glyph;
5858
import com.itextpdf.io.font.otf.GlyphLine;
59+
import com.itextpdf.io.logs.IoLogMessageConstant;
5960
import com.itextpdf.io.source.ByteArrayOutputStream;
6061
import com.itextpdf.io.source.ByteBuffer;
6162
import com.itextpdf.io.source.OutputStream;
62-
import com.itextpdf.commons.utils.MessageFormatUtil;
6363
import com.itextpdf.io.util.StreamUtil;
6464
import com.itextpdf.io.util.TextUtil;
65-
import com.itextpdf.kernel.exceptions.PdfException;
6665
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
66+
import com.itextpdf.kernel.exceptions.PdfException;
6767
import com.itextpdf.kernel.pdf.PdfArray;
6868
import com.itextpdf.kernel.pdf.PdfDictionary;
6969
import com.itextpdf.kernel.pdf.PdfLiteral;
@@ -74,6 +74,7 @@ This file is part of the iText (R) project.
7474
import com.itextpdf.kernel.pdf.PdfStream;
7575
import com.itextpdf.kernel.pdf.PdfString;
7676
import com.itextpdf.kernel.pdf.PdfVersion;
77+
7778
import java.io.IOException;
7879
import java.util.ArrayList;
7980
import java.util.List;
@@ -164,9 +165,11 @@ public class PdfType0Font extends PdfFont {
164165
PdfObject cmap = fontDictionary.get(PdfName.Encoding);
165166
PdfObject toUnicode = fontDictionary.get(PdfName.ToUnicode);
166167
CMapToUnicode toUnicodeCMap = FontUtil.processToUnicode(toUnicode);
167-
if (cmap.isName() && (PdfEncodings.IDENTITY_H.equals(((PdfName) cmap).getValue()) || PdfEncodings.IDENTITY_V.equals(((PdfName) cmap).getValue()))) {
168+
if (cmap.isName() && (PdfEncodings.IDENTITY_H.equals(((PdfName) cmap).getValue()) ||
169+
PdfEncodings.IDENTITY_V.equals(((PdfName) cmap).getValue()))) {
168170
if (toUnicodeCMap == null) {
169-
String uniMap = getUniMapFromOrdering(getOrdering(cidFont));
171+
String uniMap = getUniMapFromOrdering(getOrdering(cidFont),
172+
PdfEncodings.IDENTITY_H.equals(((PdfName) cmap).getValue()));
170173
toUnicodeCMap = FontUtil.getToUnicodeFromUniMap(uniMap);
171174
if (toUnicodeCMap == null) {
172175
toUnicodeCMap = FontUtil.getToUnicodeFromUniMap(PdfEncodings.IDENTITY_H);
@@ -180,7 +183,7 @@ public class PdfType0Font extends PdfFont {
180183
embedded = ((IDocFontProgram) fontProgram).getFontFile() != null;
181184
} else {
182185
String cidFontName = cidFont.getAsName(PdfName.BaseFont).getValue();
183-
String uniMap = getUniMapFromOrdering(getOrdering(cidFont));
186+
String uniMap = getUniMapFromOrdering(getOrdering(cidFont), true);
184187
if (uniMap != null && uniMap.startsWith("Uni") && CidFontProperties.isCidFont(cidFontName, uniMap)) {
185188
try {
186189
fontProgram = FontProgramFactory.createFont(cidFontName);
@@ -201,10 +204,12 @@ public class PdfType0Font extends PdfFont {
201204
}
202205
if (fontProgram == null) {
203206
throw new PdfException(MessageFormatUtil.format(
204-
KernelExceptionMessageConstant.CANNOT_RECOGNISE_DOCUMENT_FONT_WITH_ENCODING, cidFontName, cmap));
207+
KernelExceptionMessageConstant.CANNOT_RECOGNISE_DOCUMENT_FONT_WITH_ENCODING,
208+
cidFontName, cmap));
205209
}
206210
}
207-
// DescendantFonts is a one-element array specifying the CIDFont dictionary that is the descendant of this Type 0 font.
211+
// DescendantFonts is a one-element array specifying the CIDFont dictionary
212+
// that is the descendant of this Type 0 font.
208213
PdfDictionary cidFontDictionary = fontDictionary.getAsArray(PdfName.DescendantFonts).getAsDictionary(0);
209214
// Required according to the spec
210215
PdfName subtype = cidFontDictionary.getAsName(PdfName.Subtype);
@@ -219,6 +224,41 @@ public class PdfType0Font extends PdfFont {
219224
subset = false;
220225
}
221226

227+
public static String getUniMapFromOrdering(String ordering, boolean horizontal) {
228+
String result = null;
229+
switch (ordering) {
230+
case "CNS1":
231+
result = "UniCNS-UTF16-";
232+
break;
233+
case "Japan1":
234+
result = "UniJIS-UTF16-";
235+
break;
236+
case "Korea1":
237+
result = "UniKS-UTF16-";
238+
break;
239+
case "GB1":
240+
result = "UniGB-UTF16-";
241+
break;
242+
case "Identity":
243+
result = "Identity-";
244+
break;
245+
default:
246+
return null;
247+
}
248+
if (horizontal) {
249+
return result + 'H';
250+
}
251+
return result + 'V';
252+
}
253+
254+
/**
255+
* Get Unicode mapping name from ordering.
256+
* @param ordering the text ordering to base to unicode mapping on
257+
* @return Unicode mapping name
258+
* @deprecated Replaced by {@link #getUniMapFromOrdering(String, boolean)}
259+
* for proper handling of IDENTITY_V encoding.
260+
*/
261+
@Deprecated
222262
public static String getUniMapFromOrdering(String ordering) {
223263
switch (ordering) {
224264
case "CNS1":
@@ -841,18 +881,18 @@ private PdfObject generateWidthsArray() {
841881
public PdfStream getToUnicode() {
842882
OutputStream<ByteArrayOutputStream> stream = new OutputStream<>(new ByteArrayOutputStream());
843883
stream.writeString("/CIDInit /ProcSet findresource begin\n" +
844-
"12 dict begin\n" +
845-
"begincmap\n" +
846-
"/CIDSystemInfo\n" +
847-
"<< /Registry (Adobe)\n" +
848-
"/Ordering (UCS)\n" +
849-
"/Supplement 0\n" +
850-
">> def\n" +
851-
"/CMapName /Adobe-Identity-UCS def\n" +
852-
"/CMapType 2 def\n" +
853-
"1 begincodespacerange\n" +
854-
"<0000><FFFF>\n" +
855-
"endcodespacerange\n");
884+
"12 dict begin\n" +
885+
"begincmap\n" +
886+
"/CIDSystemInfo\n" +
887+
"<< /Registry (Adobe)\n" +
888+
"/Ordering (UCS)\n" +
889+
"/Supplement 0\n" +
890+
">> def\n" +
891+
"/CMapName /Adobe-Identity-UCS def\n" +
892+
"/CMapType 2 def\n" +
893+
"1 begincodespacerange\n" +
894+
"<0000><FFFF>\n" +
895+
"endcodespacerange\n");
856896

857897
//accumulate long tag into a subset and write it.
858898
ArrayList<Glyph> glyphGroup = new ArrayList<>(100);
@@ -937,4 +977,4 @@ private static String normalizeEncoding(String encoding) {
937977
? PdfEncodings.IDENTITY_H
938978
: encoding;
939979
}
940-
}
980+
}

kernel/src/test/java/com/itextpdf/kernel/pdf/canvas/parser/TextRenderInfoTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public void testCharacterRenderInfos() throws Exception {
8585
* Japanese. TextRenderInfo threw an AIOOBE for some characters.
8686
*/
8787
@Test
88-
@LogMessages(messages = {@LogMessage(messageTemplate = IoLogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, count = 2)})
88+
//@LogMessages(messages = {@LogMessage(messageTemplate = IoLogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, count = 2)})
8989
public void testUnicodeEmptyString() throws Exception {
9090
StringBuilder sb = new StringBuilder();
9191
String inFile = "japanese_text.pdf";

0 commit comments

Comments
 (0)