Skip to content

Commit 81f2f78

Browse files
yulian-gaponenkoiText-CI
authored andcommitted
Add PdfFont#appendDecodedCodesToGlyphsList methods to get success status of decoding to glyph line
DEVSIX-5102 Autoported commit. Original commit hash: [ded92fc11]
1 parent 0289621 commit 81f2f78

File tree

5 files changed

+103
-42
lines changed

5 files changed

+103
-42
lines changed

itext/itext.kernel/itext/kernel/font/PdfFont.cs

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,18 +159,37 @@ public virtual bool ContainsGlyph(int unicode) {
159159
public abstract String Decode(PdfString content);
160160

161161
/// <summary>
162-
/// Decodes a given
163-
/// <see cref="iText.Kernel.Pdf.PdfString"/>
164-
/// containing encoded string (e.g. from content stream) into a
162+
/// Decodes sequence of character codes (e.g. from content stream) into a
165163
/// <see cref="iText.IO.Font.Otf.GlyphLine"/>
166164
/// </summary>
167-
/// <param name="content">the encoded string</param>
165+
/// <param name="characterCodes">
166+
/// the string which is interpreted as a sequence of character codes. Note, that
167+
/// <see cref="iText.Kernel.Pdf.PdfString"/>
168+
/// acts as a storage for char code values specific to given font, therefore
169+
/// individual character codes must not be interpreted as code units of the UTF-16 encoding
170+
/// </param>
168171
/// <returns>
169172
/// the
170173
/// <see cref="iText.IO.Font.Otf.GlyphLine"/>
171174
/// containing the glyphs encoded by the passed string
172175
/// </returns>
173-
public abstract GlyphLine DecodeIntoGlyphLine(PdfString content);
176+
public abstract GlyphLine DecodeIntoGlyphLine(PdfString characterCodes);
177+
178+
/// <summary>
179+
/// Decodes sequence of character codes (e.g. from content stream) to sequence of glyphs
180+
/// and appends them to the passed list.
181+
/// </summary>
182+
/// <param name="list">the list to the end of which decoded glyphs are to be added</param>
183+
/// <param name="characterCodes">
184+
/// the string which is interpreted as a sequence of character codes. Note, that
185+
/// <see cref="iText.Kernel.Pdf.PdfString"/>
186+
/// acts as a storage for char code values specific to given font, therefore
187+
/// individual character codes must not be interpreted as code units of the UTF-16 encoding
188+
/// </param>
189+
/// <returns>true if all codes where successfully decoded, false otherwise</returns>
190+
public virtual bool AppendDecodedCodesToGlyphsList(IList<Glyph> list, PdfString characterCodes) {
191+
return false;
192+
}
174193

175194
public abstract float GetContentWidth(PdfString content);
176195

itext/itext.kernel/itext/kernel/font/PdfSimpleFont.cs

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ source product.
4343
*/
4444
using System;
4545
using System.Collections.Generic;
46+
using Common.Logging;
4647
using iText.IO.Font;
4748
using iText.IO.Font.Cmap;
4849
using iText.IO.Font.Constants;
@@ -171,6 +172,17 @@ public virtual FontEncoding GetFontEncoding() {
171172
return fontEncoding;
172173
}
173174

175+
/// <summary>Get the mapping of character codes to unicode values based on /ToUnicode entry of font dictionary.
176+
/// </summary>
177+
/// <returns>
178+
/// the
179+
/// <see cref="iText.IO.Font.Cmap.CMapToUnicode"/>
180+
/// built based on /ToUnicode, or null if /ToUnicode is not available
181+
/// </returns>
182+
public virtual CMapToUnicode GetToUnicode() {
183+
return toUnicode;
184+
}
185+
174186
public override byte[] ConvertToBytes(String text) {
175187
byte[] bytes = fontEncoding.ConvertToBytes(text);
176188
foreach (byte b in bytes) {
@@ -256,35 +268,51 @@ public override String Decode(PdfString content) {
256268

257269
/// <summary><inheritDoc/></summary>
258270
public override GlyphLine DecodeIntoGlyphLine(PdfString content) {
259-
byte[] contentBytes = content.GetValueBytes();
260-
IList<Glyph> glyphs = new List<Glyph>(contentBytes.Length);
271+
IList<Glyph> glyphs = new List<Glyph>(content.GetValue().Length);
272+
AppendDecodedCodesToGlyphsList(glyphs, content);
273+
return new GlyphLine(glyphs);
274+
}
275+
276+
/// <summary><inheritDoc/></summary>
277+
public override bool AppendDecodedCodesToGlyphsList(IList<Glyph> list, PdfString characterCodes) {
278+
bool allCodesDecoded = true;
279+
FontEncoding enc = GetFontEncoding();
280+
byte[] contentBytes = characterCodes.GetValueBytes();
261281
foreach (byte b in contentBytes) {
262282
int code = b & 0xff;
263283
Glyph glyph = null;
264-
if (toUnicode != null && toUnicode.Lookup(code) != null && (glyph = fontProgram.GetGlyphByCode(code)) != null
265-
) {
266-
if (!JavaUtil.ArraysEquals(toUnicode.Lookup(code), glyph.GetChars())) {
284+
CMapToUnicode toUnicodeCMap = GetToUnicode();
285+
if (toUnicodeCMap != null && toUnicodeCMap.Lookup(code) != null && (glyph = GetFontProgram().GetGlyphByCode
286+
(code)) != null) {
287+
if (!JavaUtil.ArraysEquals(toUnicodeCMap.Lookup(code), glyph.GetChars())) {
267288
// Copy the glyph because the original one may be reused (e.g. standard Helvetica font program)
268289
glyph = new Glyph(glyph);
269-
glyph.SetChars(toUnicode.Lookup(code));
290+
glyph.SetChars(toUnicodeCMap.Lookup(code));
270291
}
271292
}
272293
else {
273-
int uni = fontEncoding.GetUnicode(code);
294+
int uni = enc.GetUnicode(code);
274295
if (uni > -1) {
275296
glyph = GetGlyph(uni);
276297
}
277298
else {
278-
if (fontEncoding.GetBaseEncoding() == null) {
279-
glyph = fontProgram.GetGlyphByCode(code);
299+
if (enc.GetBaseEncoding() == null) {
300+
glyph = GetFontProgram().GetGlyphByCode(code);
280301
}
281302
}
282303
}
283304
if (glyph != null) {
284-
glyphs.Add(glyph);
305+
list.Add(glyph);
306+
}
307+
else {
308+
ILog logger = LogManager.GetLogger(this.GetType());
309+
if (logger.IsWarnEnabled) {
310+
logger.Warn(MessageFormatUtil.Format(iText.IO.LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, code));
311+
}
312+
allCodesDecoded = false;
285313
}
286314
}
287-
return new GlyphLine(glyphs);
315+
return allCodesDecoded;
288316
}
289317

290318
public override float GetContentWidth(PdfString content) {

itext/itext.kernel/itext/kernel/font/PdfType0Font.cs

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ source product.
5555

5656
namespace iText.Kernel.Font {
5757
public class PdfType0Font : PdfFont {
58+
/// <summary>The code length shall not be greater than 4.</summary>
59+
private const int MAX_CID_CODE_LENGTH = 4;
60+
5861
private static readonly byte[] rotbits = new byte[] { (byte)0x80, (byte)0x40, (byte)0x20, (byte)0x10, (byte
5962
)0x08, (byte)0x04, (byte)0x02, (byte)0x01 };
6063

@@ -533,52 +536,63 @@ public override String Decode(PdfString content) {
533536
}
534537

535538
/// <summary><inheritDoc/></summary>
536-
public override GlyphLine DecodeIntoGlyphLine(PdfString content) {
537-
//A sequence of one or more bytes shall be extracted from the string and matched against the codespace
538-
//ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
539-
//found, a second byte shall be extracted, and the 2-byte code shall be matched against 2-byte codespace
540-
//ranges. This process continues for successively longer codes until a match is found or all codespace ranges
541-
//have been tested. There will be at most one match because codespace ranges shall not overlap.
542-
String cids = content.GetValue();
539+
public override GlyphLine DecodeIntoGlyphLine(PdfString characterCodes) {
543540
IList<Glyph> glyphs = new List<Glyph>();
544-
for (int i = 0; i < cids.Length; i++) {
545-
//The code length shall not be greater than 4.
541+
AppendDecodedCodesToGlyphsList(glyphs, characterCodes);
542+
return new GlyphLine(glyphs);
543+
}
544+
545+
/// <summary><inheritDoc/></summary>
546+
public override bool AppendDecodedCodesToGlyphsList(IList<Glyph> list, PdfString characterCodes) {
547+
bool allCodesDecoded = true;
548+
String charCodesSequence = characterCodes.GetValue();
549+
// A sequence of one or more bytes shall be extracted from the string and matched against the codespace
550+
// ranges in the CMap. That is, the first byte shall be matched against 1-byte codespace ranges; if no match is
551+
// found, a second byte shall be extracted, and the 2-byte code shall be matched against 2-byte codespace
552+
// ranges. This process continues for successively longer codes until a match is found or all codespace ranges
553+
// have been tested. There will be at most one match because codespace ranges shall not overlap.
554+
for (int i = 0; i < charCodesSequence.Length; i++) {
546555
int code = 0;
547556
Glyph glyph = null;
548557
int codeSpaceMatchedLength = 1;
549-
for (int codeLength = 1; codeLength <= 4 && i + codeLength <= cids.Length; codeLength++) {
550-
code = (code << 8) + cids[i + codeLength - 1];
551-
if (!cmapEncoding.ContainsCodeInCodeSpaceRange(code, codeLength)) {
558+
for (int codeLength = 1; codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence.Length;
559+
codeLength++) {
560+
code = (code << 8) + charCodesSequence[i + codeLength - 1];
561+
if (!GetCmap().ContainsCodeInCodeSpaceRange(code, codeLength)) {
552562
continue;
553563
}
554564
else {
555565
codeSpaceMatchedLength = codeLength;
556566
}
557-
int glyphCode = cmapEncoding.GetCidCode(code);
558-
glyph = fontProgram.GetGlyphByCode(glyphCode);
567+
int glyphCode = GetCmap().GetCidCode(code);
568+
glyph = GetFontProgram().GetGlyphByCode(glyphCode);
559569
if (glyph != null) {
560570
i += codeLength - 1;
561571
break;
562572
}
563573
}
564574
if (glyph == null) {
565-
StringBuilder failedCodes = new StringBuilder();
566-
for (int codeLength = 1; codeLength <= 4 && i + codeLength <= cids.Length; codeLength++) {
567-
failedCodes.Append((int)cids[i + codeLength - 1]).Append(" ");
568-
}
569575
ILog logger = LogManager.GetLogger(typeof(iText.Kernel.Font.PdfType0Font));
570-
logger.Warn(MessageFormatUtil.Format(iText.IO.LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, failedCodes
571-
.ToString()));
576+
if (logger.IsWarnEnabled) {
577+
StringBuilder failedCodes = new StringBuilder();
578+
for (int codeLength = 1; codeLength <= MAX_CID_CODE_LENGTH && i + codeLength <= charCodesSequence.Length;
579+
codeLength++) {
580+
failedCodes.Append((int)charCodesSequence[i + codeLength - 1]).Append(" ");
581+
}
582+
logger.Warn(MessageFormatUtil.Format(iText.IO.LogMessageConstant.COULD_NOT_FIND_GLYPH_WITH_CODE, failedCodes
583+
.ToString()));
584+
}
572585
i += codeSpaceMatchedLength - 1;
573586
}
574587
if (glyph != null && glyph.GetChars() != null) {
575-
glyphs.Add(glyph);
588+
list.Add(glyph);
576589
}
577590
else {
578-
glyphs.Add(new Glyph(0, fontProgram.GetGlyphByCode(0).GetWidth(), -1));
591+
list.Add(new Glyph(0, GetFontProgram().GetGlyphByCode(0).GetWidth(), -1));
592+
allCodesDecoded = false;
579593
}
580594
}
581-
return new GlyphLine(glyphs);
595+
return allCodesDecoded;
582596
}
583597

584598
public override float GetContentWidth(PdfString content) {

itext/itext.kernel/itext/kernel/font/PdfType3Font.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,9 @@ private void AddGlyphsFromCharProcs(PdfDictionary charProcsDic, int[] widths) {
388388
return;
389389
}
390390
IDictionary<int, int?> unicodeToCode = null;
391-
if (toUnicode != null) {
391+
if (GetToUnicode() != null) {
392392
try {
393-
unicodeToCode = toUnicode.CreateReverseMapping();
393+
unicodeToCode = GetToUnicode().CreateReverseMapping();
394394
}
395395
catch (Exception) {
396396
}

port-hash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
79e8eb90e75984beab3aa95b2aebeb8bd30e54da
1+
ded92fc117a2041b90240ff26a2d1795c7763630

0 commit comments

Comments
 (0)