Skip to content

Commit 980f467

Browse files
Snipxitext-teamcity
authored andcommitted
Merge branch 'feature/DEVSIX-1058' into develop
Autoported commit. Original commit hash: [4e4cfb451]
2 parents 1561fda + 515ef81 commit 980f467

File tree

16 files changed

+301
-91
lines changed

16 files changed

+301
-91
lines changed

itext.tests/itext.kernel.tests/itext/kernel/pdf/EncodingTest.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ source product.
4646
using iText.IO.Font.Constants;
4747
using iText.Kernel.Font;
4848
using iText.Kernel.Pdf.Canvas;
49+
using iText.Kernel.Pdf.Canvas.Parser;
4950
using iText.Kernel.Utils;
5051
using iText.Test;
5152

@@ -288,5 +289,23 @@ public virtual void SymbolTrueTypeFontSameCharsIdentityTest() {
288289
NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(outputFolder + fileName, sourceFolder + "cmp_"
289290
+ fileName, outputFolder, "diff_"));
290291
}
292+
293+
/// <exception cref="System.IO.IOException"/>
294+
[NUnit.Framework.Test]
295+
public virtual void EncodingStreamExtractionTest() {
296+
String fileName = sourceFolder + "encodingStream01.pdf";
297+
PdfDocument pdfDocument = new PdfDocument(new PdfReader(fileName));
298+
String extractedText = PdfTextExtractor.GetTextFromPage(pdfDocument.GetPage(1));
299+
NUnit.Framework.Assert.AreEqual("abc", extractedText);
300+
}
301+
302+
/// <exception cref="System.IO.IOException"/>
303+
[NUnit.Framework.Test]
304+
public virtual void DifferentCodeSpaceRangeLengthsExtractionTest() {
305+
String fileName = sourceFolder + "differentCodeSpaceRangeLengths01.pdf";
306+
PdfDocument pdfDocument = new PdfDocument(new PdfReader(fileName));
307+
String extractedText = PdfTextExtractor.GetTextFromPage(pdfDocument.GetPage(1));
308+
NUnit.Framework.Assert.AreEqual("Hello\u7121\u540dworld\u6b98\u528d", extractedText);
309+
}
291310
}
292311
}

itext.tests/itext.kernel.tests/itext/kernel/pdf/PdfFontTest.cs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,31 @@ public virtual void CreateDocumentWithKozmin() {
105105
"diff_"));
106106
}
107107

108+
/// <exception cref="System.IO.IOException"/>
109+
/// <exception cref="System.Exception"/>
110+
[NUnit.Framework.Test]
111+
public virtual void CreateDocumentWithKozminAndDifferentCodespaceRanges() {
112+
String filename = destinationFolder + "DocumentWithKozminDifferentCodespaceRanges.pdf";
113+
String cmpFilename = sourceFolder + "cmp_DocumentWithKozminDifferentCodespaceRanges.pdf";
114+
String title = "Type 0 test";
115+
PdfWriter writer = new PdfWriter(filename);
116+
writer.SetCompressionLevel(CompressionConstants.NO_COMPRESSION);
117+
PdfDocument pdfDoc = new PdfDocument(writer);
118+
pdfDoc.GetDocumentInfo().SetAuthor(author).SetCreator(creator).SetTitle(title);
119+
PdfFont type0Font = PdfFontFactory.CreateFont("KozMinPro-Regular", "83pv-RKSJ-H", true);
120+
NUnit.Framework.Assert.IsTrue(type0Font is PdfType0Font, "Type0Font expected");
121+
NUnit.Framework.Assert.IsTrue(type0Font.GetFontProgram() is CidFont, "CidFont expected");
122+
PdfPage page = pdfDoc.AddNewPage();
123+
PdfCanvas canvas = new PdfCanvas(page);
124+
canvas.SaveState().BeginText().MoveText(36, 700).SetFontAndSize(type0Font, 50).ShowText(type0Font.CreateGlyphLine
125+
("Hello\u7121\u540dworld\u6b98\u528d")).EndText().RestoreState();
126+
canvas.Release();
127+
page.Flush();
128+
pdfDoc.Close();
129+
NUnit.Framework.Assert.IsNull(new CompareTool().CompareByContent(filename, cmpFilename, destinationFolder,
130+
"diff_"));
131+
}
132+
108133
/// <exception cref="System.IO.IOException"/>
109134
/// <exception cref="System.Exception"/>
110135
[NUnit.Framework.Test]
Binary file not shown.

itext/itext.io/itext/io/LogMessageConstant.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ public const String ENCRYPTED_PAYLOAD_FILE_SPEC_SHALL_HAVE_AFRELATIONSHIP_FILED_
117117

118118
public const String EXISTING_TAG_STRUCTURE_ROOT_IS_NOT_STANDARD = "Existing tag structure of the document has a root of \"{0}\" role in \"{1}\" namespace that is not mapped to the standard role.";
119119

120+
public const String FAILED_TO_DETERMINE_CID_FONT_SUBTYPE = "Failed to determine CIDFont subtype. The type of CIDFont shall be CIDFontType0 or CIDFontType2.";
121+
122+
public const String FAILED_TO_PARSE_ENCODING_STREAM = "Failed to parse encoding stream.";
123+
120124
public const String FLUSHED_OBJECT_CONTAINS_FREE_REFERENCE = "Flushed object contains indirect reference which is free. Null object will be written instead.";
121125

122126
public const String FLUSHED_OBJECT_CONTAINS_REFERENCE_WHICH_NOT_REFER_TO_ANY_OBJECT = "Flushed object contains indirect reference which doesn't refer to any other object. Null object will be written instead.";

itext/itext.io/itext/io/font/CMapEncoding.cs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,17 @@ source product.
4242
4343
*/
4444
using System;
45+
using System.Collections.Generic;
46+
using Common.Logging;
4547
using iText.IO.Font.Cmap;
48+
using iText.IO.Source;
4649
using iText.IO.Util;
4750

4851
namespace iText.IO.Font {
4952
public class CMapEncoding {
53+
private static readonly IList<byte[]> IDENTITY_H_V_CODESPACE_RANGES = iText.IO.Util.JavaUtil.ArraysAsList(
54+
new byte[] { 0, 0 }, new byte[] { (byte)0xff, (byte)0xff });
55+
5056
private String cmap;
5157

5258
private String uniMap;
@@ -59,13 +65,18 @@ public class CMapEncoding {
5965

6066
private IntHashtable code2Cid;
6167

68+
private IList<byte[]> codeSpaceRanges;
69+
6270
/// <param name="cmap">CMap name.</param>
6371
public CMapEncoding(String cmap) {
6472
// true if CMap is Identity-H/V
6573
this.cmap = cmap;
6674
if (cmap.Equals(PdfEncodings.IDENTITY_H) || cmap.Equals(PdfEncodings.IDENTITY_V)) {
6775
isDirect = true;
6876
}
77+
// Actually this constructor is only called for Identity-H/V cmaps currently.
78+
// Even for hypothetical case of non-Identity-H/V, let's use Identity-H/V ranges (two byte ranges) for compatibility with previous behavior
79+
this.codeSpaceRanges = IDENTITY_H_V_CODESPACE_RANGES;
6980
}
7081

7182
/// <param name="cmap">CMap name.</param>
@@ -76,10 +87,25 @@ public CMapEncoding(String cmap, String uniMap) {
7687
if (cmap.Equals(PdfEncodings.IDENTITY_H) || cmap.Equals(PdfEncodings.IDENTITY_V)) {
7788
cid2Uni = FontCache.GetCid2UniCmap(uniMap);
7889
isDirect = true;
90+
this.codeSpaceRanges = IDENTITY_H_V_CODESPACE_RANGES;
7991
}
8092
else {
8193
cid2Code = FontCache.GetCid2Byte(cmap);
8294
code2Cid = cid2Code.GetReversMap();
95+
this.codeSpaceRanges = cid2Code.GetCodeSpaceRanges();
96+
}
97+
}
98+
99+
public CMapEncoding(String cmap, byte[] cmapBytes) {
100+
this.cmap = cmap;
101+
cid2Code = new CMapCidByte();
102+
try {
103+
CMapParser.ParseCid(cmap, cid2Code, new CMapLocationFromBytes(cmapBytes));
104+
code2Cid = cid2Code.GetReversMap();
105+
this.codeSpaceRanges = cid2Code.GetCodeSpaceRanges();
106+
}
107+
catch (System.IO.IOException) {
108+
LogManager.GetLogger(GetType()).Error(iText.IO.LogMessageConstant.FAILED_TO_PARSE_ENCODING_STREAM);
83109
}
84110
}
85111

@@ -126,6 +152,7 @@ public virtual String GetCmapName() {
126152
return cmap;
127153
}
128154

155+
[System.ObsoleteAttribute(@"Will be removed in 7.2. Use GetCmapBytes(int) instead.")]
129156
public virtual int GetCmapCode(int cid) {
130157
if (isDirect) {
131158
return cid;
@@ -135,6 +162,47 @@ public virtual int GetCmapCode(int cid) {
135162
}
136163
}
137164

165+
public virtual byte[] GetCmapBytes(int cid) {
166+
int length = GetCmapBytesLength(cid);
167+
byte[] result = new byte[length];
168+
FillCmapBytes(cid, result, 0);
169+
return result;
170+
}
171+
172+
public virtual int FillCmapBytes(int cid, byte[] array, int offset) {
173+
if (isDirect) {
174+
array[offset++] = (byte)((cid & 0xff00) >> 8);
175+
array[offset++] = (byte)(cid & 0xff);
176+
}
177+
else {
178+
byte[] bytes = cid2Code.Lookup(cid);
179+
for (int i = 0; i < bytes.Length; i++) {
180+
array[offset++] = bytes[i];
181+
}
182+
}
183+
return offset;
184+
}
185+
186+
public virtual void FillCmapBytes(int cid, ByteBuffer buffer) {
187+
if (isDirect) {
188+
buffer.Append((byte)((cid & 0xff00) >> 8));
189+
buffer.Append((byte)(cid & 0xff));
190+
}
191+
else {
192+
byte[] bytes = cid2Code.Lookup(cid);
193+
buffer.Append(bytes);
194+
}
195+
}
196+
197+
public virtual int GetCmapBytesLength(int cid) {
198+
if (isDirect) {
199+
return 2;
200+
}
201+
else {
202+
return cid2Code.Lookup(cid).Length;
203+
}
204+
}
205+
138206
public virtual int GetCidCode(int cmapCode) {
139207
if (isDirect) {
140208
return cmapCode;
@@ -144,6 +212,28 @@ public virtual int GetCidCode(int cmapCode) {
144212
}
145213
}
146214

215+
public virtual bool ContainsCodeInCodeSpaceRange(int code, int length) {
216+
for (int i = 0; i < codeSpaceRanges.Count; i += 2) {
217+
if (length == codeSpaceRanges[i].Length) {
218+
int mask = 0xff;
219+
int totalShift = 0;
220+
byte[] low = codeSpaceRanges[i];
221+
byte[] high = codeSpaceRanges[i + 1];
222+
bool fitsIntoRange = true;
223+
for (int ind = length - 1; ind >= 0; ind--, totalShift += 8, mask <<= 8) {
224+
int actualByteValue = (code & mask) >> totalShift;
225+
if (!(actualByteValue >= (0xff & low[ind]) && actualByteValue <= (0xff & high[ind]))) {
226+
fitsIntoRange = false;
227+
}
228+
}
229+
if (fitsIntoRange) {
230+
return true;
231+
}
232+
}
233+
}
234+
return false;
235+
}
236+
147237
private static int ToInteger(byte[] bytes) {
148238
int result = 0;
149239
foreach (byte b in bytes) {

itext/itext.io/itext/io/font/cmap/AbstractCMap.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ internal virtual void SetSupplement(int supplement) {
9090

9191
internal abstract void AddChar(String mark, CMapObject code);
9292

93+
internal virtual void AddCodeSpaceRange(byte[] low, byte[] high) {
94+
}
95+
9396
internal virtual void AddRange(String from, String to, CMapObject code) {
9497
byte[] a1 = DecodeStringToByte(from);
9598
byte[] a2 = DecodeStringToByte(to);

itext/itext.io/itext/io/font/cmap/CMapCidByte.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ public class CMapCidByte : AbstractCMap {
5252

5353
private readonly byte[] EMPTY = new byte[] { };
5454

55+
private IList<byte[]> codeSpaceRanges = new List<byte[]>();
56+
5557
internal override void AddChar(String mark, CMapObject code) {
5658
if (code.IsNumber()) {
5759
byte[] ser = DecodeStringToByte(mark);
@@ -82,5 +84,18 @@ public virtual IntHashtable GetReversMap() {
8284
}
8385
return code2cid;
8486
}
87+
88+
/// <summary>
89+
/// Returns a list containing sequential pairs of code space beginning and endings:
90+
/// (begincodespacerange1, endcodespacerange1, begincodespacerange2, endcodespacerange1, ...)
91+
/// </summary>
92+
public virtual IList<byte[]> GetCodeSpaceRanges() {
93+
return codeSpaceRanges;
94+
}
95+
96+
internal override void AddCodeSpaceRange(byte[] low, byte[] high) {
97+
codeSpaceRanges.Add(low);
98+
codeSpaceRanges.Add(high);
99+
}
85100
}
86101
}

itext/itext.io/itext/io/font/cmap/CMapObject.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,5 +131,14 @@ public override String ToString() {
131131
}
132132
return value.ToString();
133133
}
134+
135+
public virtual byte[] ToHexByteArray() {
136+
if (type == HEX_STRING) {
137+
return (byte[])value;
138+
}
139+
else {
140+
return null;
141+
}
142+
}
134143
}
135144
}

0 commit comments

Comments
 (0)