Skip to content

Commit ded92fc

Browse files
yulian-gaponenkoiText-CI
authored andcommitted
Add PdfFont#appendDecodedCodesToGlyphsList methods to get success status of decoding to glyph line
DEVSIX-5102
1 parent 79e8eb9 commit ded92fc

File tree

4 files changed

+110
-37
lines changed

4 files changed

+110
-37
lines changed

kernel/src/main/java/com/itextpdf/kernel/font/PdfFont.java

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,30 @@ public boolean containsGlyph(int unicode) {
175175
public abstract String decode(PdfString content);
176176

177177
/**
178-
* Decodes a given {@link PdfString} containing encoded string (e.g. from content stream) into a {@link GlyphLine}
178+
* Decodes sequence of character codes (e.g. from content stream) into a {@link GlyphLine}
179+
*
180+
* @param characterCodes the string which is interpreted as a sequence of character codes. Note, that {@link
181+
* PdfString} acts as a storage for char code values specific to given font, therefore
182+
* individual character codes must not be interpreted as code units of the UTF-16 encoding
179183
*
180-
* @param content the encoded string
181184
* @return the {@link GlyphLine} containing the glyphs encoded by the passed string
182185
*/
183-
public abstract GlyphLine decodeIntoGlyphLine(PdfString content);
186+
public abstract GlyphLine decodeIntoGlyphLine(PdfString characterCodes);
187+
188+
/**
189+
* Decodes sequence of character codes (e.g. from content stream) to sequence of glyphs
190+
* and appends them to the passed list.
191+
*
192+
* @param list the list to the end of which decoded glyphs are to be added
193+
* @param characterCodes the string which is interpreted as a sequence of character codes. Note, that {@link
194+
* PdfString} acts as a storage for char code values specific to given font, therefore
195+
* individual character codes must not be interpreted as code units of the UTF-16 encoding
196+
*
197+
* @return true if all codes where successfully decoded, false otherwise
198+
*/
199+
public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
200+
return false;
201+
}
184202

185203
public abstract float getContentWidth(PdfString content);
186204

kernel/src/main/java/com/itextpdf/kernel/font/PdfSimpleFont.java

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ This file is part of the iText (R) project.
4343
*/
4444
package com.itextpdf.kernel.font;
4545

46+
import com.itextpdf.io.LogMessageConstant;
4647
import com.itextpdf.io.font.FontEncoding;
4748
import com.itextpdf.io.font.FontMetrics;
4849
import com.itextpdf.io.font.FontNames;
@@ -53,6 +54,7 @@ This file is part of the iText (R) project.
5354
import com.itextpdf.io.font.otf.Glyph;
5455
import com.itextpdf.io.font.otf.GlyphLine;
5556
import com.itextpdf.io.util.ArrayUtil;
57+
import com.itextpdf.io.util.MessageFormatUtil;
5658
import com.itextpdf.io.util.StreamUtil;
5759
import com.itextpdf.io.util.TextUtil;
5860
import com.itextpdf.kernel.pdf.PdfArray;
@@ -65,6 +67,8 @@ This file is part of the iText (R) project.
6567
import java.util.ArrayList;
6668
import java.util.Arrays;
6769
import java.util.List;
70+
import org.slf4j.Logger;
71+
import org.slf4j.LoggerFactory;
6872

6973
public abstract class PdfSimpleFont<T extends FontProgram> extends PdfFont {
7074

@@ -189,6 +193,15 @@ public FontEncoding getFontEncoding() {
189193
return fontEncoding;
190194
}
191195

196+
/**
197+
* Get the mapping of character codes to unicode values based on /ToUnicode entry of font dictionary.
198+
*
199+
* @return the {@link CMapToUnicode} built based on /ToUnicode, or null if /ToUnicode is not available
200+
*/
201+
public CMapToUnicode getToUnicode() {
202+
return toUnicode;
203+
}
204+
192205
@Override
193206
public byte[] convertToBytes(String text) {
194207
byte[] bytes = fontEncoding.convertToBytes(text);
@@ -279,32 +292,53 @@ public String decode(PdfString content) {
279292
*/
280293
@Override
281294
public GlyphLine decodeIntoGlyphLine(PdfString content) {
282-
byte[] contentBytes = content.getValueBytes();
283-
List<Glyph> glyphs = new ArrayList<>(contentBytes.length);
295+
List<Glyph> glyphs = new ArrayList<>(content.getValue().length());
296+
appendDecodedCodesToGlyphsList(glyphs, content);
297+
return new GlyphLine(glyphs);
298+
}
299+
300+
/**
301+
* {@inheritDoc}
302+
*/
303+
@Override
304+
public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
305+
boolean allCodesDecoded = true;
306+
307+
FontEncoding enc = getFontEncoding();
308+
byte[] contentBytes = characterCodes.getValueBytes();
284309
for (byte b : contentBytes) {
285310
int code = b & 0xff;
286311
Glyph glyph = null;
287-
if (toUnicode != null && toUnicode.lookup(code) != null && (glyph = fontProgram.getGlyphByCode(code)) != null) {
288-
if (!Arrays.equals(toUnicode.lookup(code), glyph.getChars())) {
312+
CMapToUnicode toUnicodeCMap = getToUnicode();
313+
if (toUnicodeCMap != null && toUnicodeCMap.lookup(code) != null
314+
&& (glyph = getFontProgram().getGlyphByCode(code)) != null) {
315+
if (!Arrays.equals(toUnicodeCMap.lookup(code), glyph.getChars())) {
289316
// Copy the glyph because the original one may be reused (e.g. standard Helvetica font program)
290317
glyph = new Glyph(glyph);
291-
glyph.setChars(toUnicode.lookup(code));
318+
glyph.setChars(toUnicodeCMap.lookup(code));
292319
}
293320
} else {
294-
int uni = fontEncoding.getUnicode(code);
321+
int uni = enc.getUnicode(code);
295322
if (uni > -1) {
296323
glyph = getGlyph(uni);
297-
} else if (fontEncoding.getBaseEncoding() == null) {
298-
glyph = fontProgram.getGlyphByCode(code);
324+
} else if (enc.getBaseEncoding() == null) {
325+
glyph = getFontProgram().getGlyphByCode(code);
299326
}
300327
}
301328
if (glyph != null) {
302-
glyphs.add(glyph);
329+
list.add(glyph);
330+
} else {
331+
Logger logger = LoggerFactory.getLogger(this.getClass());
332+
if (logger.isWarnEnabled()) {
333+
logger.warn(MessageFormatUtil.format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, code));
334+
}
335+
allCodesDecoded = false;
303336
}
304337
}
305-
return new GlyphLine(glyphs);
338+
return allCodesDecoded;
306339
}
307340

341+
308342
@Override
309343
public float getContentWidth(PdfString content) {
310344
float width = 0;

kernel/src/main/java/com/itextpdf/kernel/font/PdfType0Font.java

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ public class PdfType0Font extends PdfFont {
8787

8888
private static final long serialVersionUID = -8033620300884193397L;
8989

90+
/**
91+
* The code length shall not be greater than 4.
92+
*/
93+
private static final int MAX_CID_CODE_LENGTH = 4;
9094
private static final byte[] rotbits = {(byte) 0x80, (byte) 0x40, (byte) 0x20, (byte) 0x10, (byte) 0x08, (byte) 0x04, (byte) 0x02, (byte) 0x01};
9195

9296
/**
@@ -523,49 +527,66 @@ public String decode(PdfString content) {
523527
* {@inheritDoc}
524528
*/
525529
@Override
526-
public GlyphLine decodeIntoGlyphLine(PdfString content) {
527-
//A sequence of one or more bytes shall be extracted from the string and matched against the codespace
528-
//ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
529-
//found, a second byte shall be extracted, and the 2-byte code shall be matched against 2-byte codespace
530-
//ranges. This process continues for successively longer codes until a match is found or all codespace ranges
531-
//have been tested. There will be at most one match because codespace ranges shall not overlap.
532-
String cids = content.getValue();
530+
public GlyphLine decodeIntoGlyphLine(PdfString characterCodes) {
533531
List<Glyph> glyphs = new ArrayList<>();
534-
for (int i = 0; i < cids.length(); i++) {
535-
//The code length shall not be greater than 4.
532+
appendDecodedCodesToGlyphsList(glyphs, characterCodes);
533+
return new GlyphLine(glyphs);
534+
}
535+
536+
/**
537+
* {@inheritDoc}
538+
*/
539+
@Override
540+
public boolean appendDecodedCodesToGlyphsList(List<Glyph> list, PdfString characterCodes) {
541+
boolean allCodesDecoded = true;
542+
543+
String charCodesSequence = characterCodes.getValue();
544+
// A sequence of one or more bytes shall be extracted from the string and matched against the codespace
545+
// ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
546+
// found, a second byte shall be extracted, and the 2-byte code shall be matched against 2-byte codespace
547+
// ranges. This process continues for successively longer codes until a match is found or all codespace ranges
548+
// have been tested. There will be at most one match because codespace ranges shall not overlap.
549+
for (int i = 0; i < charCodesSequence.length(); i++) {
536550
int code = 0;
537551
Glyph glyph = null;
538552
int codeSpaceMatchedLength = 1;
539-
for (int codeLength = 1; codeLength <= 4 && i + codeLength <= cids.length(); codeLength++) {
540-
code = (code << 8) + cids.charAt(i + codeLength - 1);
541-
if (!cmapEncoding.containsCodeInCodeSpaceRange(code, codeLength)) {
553+
for (int codeLength = 1; codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence.length();
554+
codeLength++) {
555+
code = (code << 8) + charCodesSequence.charAt(i + codeLength - 1);
556+
if (!getCmap().containsCodeInCodeSpaceRange(code, codeLength)) {
542557
continue;
543558
} else {
544559
codeSpaceMatchedLength = codeLength;
545560
}
546-
int glyphCode = cmapEncoding.getCidCode(code);
547-
glyph = fontProgram.getGlyphByCode(glyphCode);
561+
int glyphCode = getCmap().getCidCode(code);
562+
glyph = getFontProgram().getGlyphByCode(glyphCode);
548563
if (glyph != null) {
549564
i += codeLength - 1;
550565
break;
551566
}
552567
}
553568
if (glyph == null) {
554-
StringBuilder failedCodes = new StringBuilder();
555-
for (int codeLength = 1; codeLength <= 4 && i + codeLength <= cids.length(); codeLength++) {
556-
failedCodes.append((int) cids.charAt(i + codeLength - 1)).append(" ");
557-
}
558569
Logger logger = LoggerFactory.getLogger(PdfType0Font.class);
559-
logger.warn(MessageFormatUtil.format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, failedCodes.toString()));
570+
if (logger.isWarnEnabled()) {
571+
StringBuilder failedCodes = new StringBuilder();
572+
for (int codeLength = 1;
573+
codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence.length();
574+
codeLength++) {
575+
failedCodes.append((int) charCodesSequence.charAt(i + codeLength - 1)).append(" ");
576+
}
577+
logger.warn(MessageFormatUtil
578+
.format(LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, failedCodes.toString()));
579+
}
560580
i += codeSpaceMatchedLength - 1;
561581
}
562582
if (glyph != null && glyph.getChars() != null) {
563-
glyphs.add(glyph);
583+
list.add(glyph);
564584
} else {
565-
glyphs.add(new Glyph(0, fontProgram.getGlyphByCode(0).getWidth(), -1));
585+
list.add(new Glyph(0, getFontProgram().getGlyphByCode(0).getWidth(), -1));
586+
allCodesDecoded = false;
566587
}
567588
}
568-
return new GlyphLine(glyphs);
589+
return allCodesDecoded;
569590
}
570591

571592
@Override

kernel/src/main/java/com/itextpdf/kernel/font/PdfType3Font.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,8 @@ private void addGlyphsFromCharProcs(PdfDictionary charProcsDic, int[] widths) {
422422
return;
423423
}
424424
Map<Integer, Integer> unicodeToCode = null;
425-
if (toUnicode != null) {
426-
try { unicodeToCode = toUnicode.createReverseMapping(); } catch (Exception ignored) {}
425+
if (getToUnicode() != null) {
426+
try { unicodeToCode = getToUnicode().createReverseMapping(); } catch (Exception ignored) {}
427427
}
428428

429429
for (PdfName glyphName : charProcsDic.keySet()) {

0 commit comments

Comments
 (0)