Skip to content

Commit 21a8606

Browse files
dzmitry.kachkouiText-CI
authored andcommitted
Add check for String lengths in PDFA module
DEVSIX-2978
1 parent 66faa47 commit 21a8606

File tree

8 files changed

+609
-16
lines changed

8 files changed

+609
-16
lines changed

pdfa/src/main/java/com/itextpdf/pdfa/checker/PdfA1Checker.java

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,14 @@ This file is part of the iText (R) project.
4444
package com.itextpdf.pdfa.checker;
4545

4646
import com.itextpdf.io.font.PdfEncodings;
47+
import com.itextpdf.io.source.PdfTokenizer;
48+
import com.itextpdf.io.source.RandomAccessFileOrArray;
49+
import com.itextpdf.io.source.RandomAccessSourceFactory;
50+
import com.itextpdf.kernel.PdfException;
51+
import com.itextpdf.kernel.colors.PatternColor;
4752
import com.itextpdf.kernel.font.PdfFont;
4853
import com.itextpdf.kernel.font.PdfTrueTypeFont;
54+
import com.itextpdf.kernel.font.PdfType3Font;
4955
import com.itextpdf.kernel.pdf.canvas.CanvasGraphicsState;
5056
import com.itextpdf.kernel.colors.Color;
5157
import com.itextpdf.kernel.pdf.PdfAConformanceLevel;
@@ -58,13 +64,18 @@ This file is part of the iText (R) project.
5864
import com.itextpdf.kernel.pdf.PdfStream;
5965
import com.itextpdf.kernel.pdf.PdfString;
6066
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
67+
import com.itextpdf.kernel.pdf.canvas.parser.util.PdfCanvasParser;
6168
import com.itextpdf.kernel.pdf.colorspace.PdfColorSpace;
6269
import com.itextpdf.kernel.pdf.colorspace.PdfDeviceCs;
70+
import com.itextpdf.kernel.pdf.colorspace.PdfPattern;
6371
import com.itextpdf.kernel.pdf.colorspace.PdfSpecialCs;
6472
import com.itextpdf.pdfa.PdfAConformanceException;
6573

74+
import java.io.IOException;
75+
import java.util.ArrayList;
6676
import java.util.Arrays;
6777
import java.util.HashSet;
78+
import java.util.List;
6879
import java.util.Set;
6980

7081
import com.itextpdf.pdfa.PdfAConformanceLogMessageConstant;
@@ -137,6 +148,12 @@ public void checkColor(Color color, PdfDictionary currentColorSpaces, Boolean fi
137148
@Override
138149
public void checkColor(Color color, PdfDictionary currentColorSpaces, Boolean fill, PdfStream stream) {
139150
checkColorSpace(color.getColorSpace(), currentColorSpaces, true, fill);
151+
if (color instanceof PatternColor) {
152+
PdfPattern pattern = ((PatternColor) color).getPattern();
153+
if (pattern instanceof PdfPattern.Tiling) {
154+
checkContentStream((PdfStream) pattern.getPdfObject());
155+
}
156+
}
140157
}
141158

142159
@Override
@@ -254,6 +271,55 @@ public void checkFont(PdfFont pdfFont) {
254271
checkNonSymbolicTrueTypeFont(trueTypeFont);
255272
}
256273
}
274+
275+
if (pdfFont instanceof PdfType3Font) {
276+
PdfDictionary charProcs = pdfFont.getPdfObject().getAsDictionary(PdfName.CharProcs);
277+
for (PdfName charName : charProcs.keySet()) {
278+
checkContentStream(charProcs.getAsStream(charName));
279+
}
280+
}
281+
}
282+
283+
@Override
284+
protected void checkContentStream(PdfStream contentStream) {
285+
if (isFullCheckMode() || contentStream.isModified()) {
286+
byte[] contentBytes = contentStream.getBytes();
287+
PdfTokenizer tokenizer = new PdfTokenizer(
288+
new RandomAccessFileOrArray(new RandomAccessSourceFactory().createSource(contentBytes)));
289+
290+
PdfCanvasParser parser = new PdfCanvasParser(tokenizer);
291+
List<PdfObject> operands = new ArrayList<>();
292+
try {
293+
while (parser.parse(operands).size() > 0) {
294+
for (PdfObject operand : operands) {
295+
checkContentStreamObject(operand);
296+
}
297+
}
298+
} catch (IOException e) {
299+
throw new PdfException(PdfException.CannotParseContentStream, e);
300+
}
301+
}
302+
}
303+
304+
@Override
305+
protected void checkContentStreamObject(PdfObject object) {
306+
byte type = object.getType();
307+
switch (type) {
308+
case PdfObject.STRING:
309+
checkPdfString((PdfString) object);
310+
break;
311+
case PdfObject.ARRAY:
312+
for (PdfObject obj : (PdfArray) object) {
313+
checkContentStreamObject(obj);
314+
}
315+
break;
316+
case PdfObject.DICTIONARY:
317+
PdfDictionary dictionary = (PdfDictionary) object;
318+
for (PdfObject obj : dictionary.values()) {
319+
checkContentStreamObject(obj);
320+
}
321+
break;
322+
}
257323
}
258324

259325
@Override
@@ -330,6 +396,7 @@ protected void checkFormXObject(PdfStream form) {
330396
}
331397

332398
checkResources(form.getAsDictionary(PdfName.Resources));
399+
checkContentStream(form);
333400
}
334401

335402
@Override

pdfa/src/main/java/com/itextpdf/pdfa/checker/PdfA2Checker.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,12 @@ public void checkColor(Color color, PdfDictionary currentColorSpaces, Boolean fi
167167
}
168168
};
169169
checkExtGState(gState, contentStream);
170+
} else if (pattern instanceof PdfPattern.Tiling) {
171+
checkContentStream((PdfStream) pattern.getPdfObject());
170172
}
171173
}
172174

173-
checkColorSpace(color.getColorSpace(), currentColorSpaces, true, fill);
175+
super.checkColor(color, currentColorSpaces, fill, contentStream);
174176
}
175177

176178
@Override
@@ -823,6 +825,7 @@ protected void checkFormXObject(PdfStream form, PdfStream contentStream) {
823825
}
824826

825827
checkResources(form.getAsDictionary(PdfName.Resources));
828+
checkContentStream(form);
826829
}
827830

828831
private void checkContentsForTransparency(PdfDictionary pageDict) {

pdfa/src/main/java/com/itextpdf/pdfa/checker/PdfAChecker.java

Lines changed: 72 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ public abstract class PdfAChecker implements Serializable {
148148
protected Set<PdfObject> checkedObjects = new HashSet<>();
149149
protected Map<PdfObject, PdfColorSpace> checkedObjectsColorspace = new HashMap<>();
150150

151+
private boolean fullCheckMode = false;
152+
151153
protected PdfAChecker(PdfAConformanceLevel conformanceLevel) {
152154
this.conformanceLevel = conformanceLevel;
153155
}
@@ -196,18 +198,24 @@ public void checkPdfObject(PdfObject obj) {
196198
case PdfObject.NUMBER:
197199
checkPdfNumber((PdfNumber) obj);
198200
break;
199-
case PdfObject.STREAM:
200-
checkPdfStream((PdfStream) obj);
201-
break;
202201
case PdfObject.STRING:
203202
checkPdfString((PdfString) obj);
204203
break;
204+
case PdfObject.ARRAY:
205+
checkArrayRecursively((PdfArray) obj);
206+
break;
205207
case PdfObject.DICTIONARY:
206208
PdfDictionary dict = (PdfDictionary) obj;
207209
PdfName type = dict.getAsName(PdfName.Type);
208210
if (PdfName.Filespec.equals(type)) {
209211
checkFileSpec(dict);
210212
}
213+
checkDictionaryRecursively(dict);
214+
break;
215+
case PdfObject.STREAM:
216+
PdfStream stream = (PdfStream) obj;
217+
checkPdfStream(stream);
218+
checkDictionaryRecursively(stream);
211219
break;
212220
}
213221
}
@@ -221,6 +229,28 @@ public PdfAConformanceLevel getConformanceLevel() {
221229
return conformanceLevel;
222230
}
223231

232+
/**
233+
* In full check mode all objects will be tested for ISO conformance. If full check mode is
234+
* switched off objects which were not modified might be skipped to speed up the validation
235+
* of the document
236+
* @return true if full check mode is switched on
237+
* @see PdfObject#isModified()
238+
*/
239+
public boolean isFullCheckMode() {
240+
return fullCheckMode;
241+
}
242+
243+
/**
244+
* In full check mode all objects will be tested for ISO conformance. If full check mode is
245+
* switched off objects which were not modified might be skipped to speed up the validation
246+
* of the document
247+
* @param fullCheckMode is a new value for full check mode switcher
248+
* @see PdfObject#isModified()
249+
*/
250+
public void setFullCheckMode(boolean fullCheckMode) {
251+
this.fullCheckMode = fullCheckMode;
252+
}
253+
224254
/**
225255
* Remembers which objects have already been checked, in order to avoid
226256
* redundant checks.
@@ -351,6 +381,24 @@ public void checkFontGlyphs(PdfFont font, PdfStream contentStream) {
351381
protected void checkPageTransparency(PdfDictionary pageDict, PdfDictionary pageResources) {
352382
}
353383

384+
/**
385+
* Attest content stream conformance with appropriate specification.
386+
* Throws PdfAConformanceException if any discrepancy was found
387+
*
388+
* @param contentStream is a content stream to validate
389+
*/
390+
protected void checkContentStream(PdfStream contentStream) {
391+
}
392+
393+
/**
394+
* Verify the conformity of the operand of content stream with appropriate
395+
* specification. Throws PdfAConformanceException if any discrepancy was found
396+
*
397+
* @param object is an operand of content stream to validate
398+
*/
399+
protected void checkContentStreamObject(PdfObject object) {
400+
}
401+
354402
protected abstract Set<PdfName> getForbiddenActions();
355403
protected abstract Set<PdfName> getAllowedNamedActions();
356404
protected abstract void checkAction(PdfDictionary action);
@@ -454,6 +502,24 @@ protected void checkResourcesOfAppearanceStreams(PdfDictionary appearanceStreams
454502
}
455503
}
456504

505+
private void checkArrayRecursively(PdfArray array) {
506+
for (int i = 0; i < array.size(); i++) {
507+
PdfObject object = array.get(i, false);
508+
if (object != null && ! object.isIndirect()) {
509+
checkPdfObject(object);
510+
}
511+
}
512+
}
513+
514+
private void checkDictionaryRecursively(PdfDictionary dictionary) {
515+
for (PdfName name: dictionary.keySet()) {
516+
PdfObject object = dictionary.get(name, false);
517+
if (object != null && ! object.isIndirect()) {
518+
checkPdfObject(object);
519+
}
520+
}
521+
}
522+
457523
private void checkPages(PdfDocument document) {
458524
for (int i = 1; i <= document.getNumberOfPages(); i++) {
459525
checkPage(document.getPage(i));
@@ -474,7 +540,9 @@ private void checkPage(PdfPage page) {
474540

475541
int contentStreamCount = page.getContentStreamCount();
476542
for (int j = 0; j < contentStreamCount; ++j) {
477-
checkedObjects.add(page.getContentStream(j));
543+
PdfStream contentStream = page.getContentStream(j);
544+
checkContentStream(contentStream);
545+
checkedObjects.add(contentStream);
478546
}
479547
}
480548

pdfa/src/test/java/com/itextpdf/pdfa/PdfALongStringTest.java

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,19 @@ This file is part of the iText (R) project.
4747
import com.itextpdf.kernel.pdf.PdfAConformanceLevel;
4848
import com.itextpdf.kernel.pdf.PdfOutputIntent;
4949
import com.itextpdf.kernel.pdf.PdfWriter;
50-
import com.itextpdf.kernel.utils.CompareTool;
5150
import com.itextpdf.layout.Document;
5251
import com.itextpdf.layout.element.Paragraph;
5352
import com.itextpdf.test.ExtendedITextTest;
5453
import com.itextpdf.test.annotations.type.IntegrationTest;
55-
import org.junit.Assert;
56-
import org.junit.BeforeClass;
57-
import org.junit.Test;
58-
import org.junit.experimental.categories.Category;
5954

6055
import java.io.FileInputStream;
6156
import java.io.FileOutputStream;
6257
import java.io.InputStream;
58+
import org.junit.BeforeClass;
59+
import org.junit.Rule;
60+
import org.junit.Test;
61+
import org.junit.experimental.categories.Category;
62+
import org.junit.rules.ExpectedException;
6363

6464
@Category(IntegrationTest.class)
6565
public class PdfALongStringTest extends ExtendedITextTest {
@@ -68,21 +68,27 @@ public class PdfALongStringTest extends ExtendedITextTest {
6868
private static final String LOREM_IPSUM = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis condimentum, tortor sit amet fermentum pharetra, sem felis finibus enim, vel consectetur nunc justo at nisi. In hac habitasse platea dictumst. Donec quis suscipit eros. Nam urna purus, scelerisque in placerat in, convallis vel sapien. Suspendisse sed lacus sit amet orci ornare vulputate. In hac habitasse platea dictumst. Ut eu aliquet felis, at consectetur neque.";
6969
private static final int STRING_LENGTH_LIMIT = 32767;
7070

71+
@Rule
72+
public ExpectedException junitExpectedException = ExpectedException.none();
73+
7174
@BeforeClass
7275
public static void beforeClass() {
7376
createDestinationFolder(destinationFolder);
7477
}
7578

7679
@Test
77-
//TODO(DEVSIX-2978): Produces non-conforming PDF/A document
7880
public void runTest() throws Exception {
81+
junitExpectedException.expect(PdfAConformanceException.class);
82+
junitExpectedException.expectMessage(PdfAConformanceException.PDF_STRING_IS_TOO_LONG);
7983
String file = "pdfALongString.pdf";
8084
String filename = destinationFolder + file;
8185
try (InputStream icm = new FileInputStream(sourceFolder + "sRGB Color Space Profile.icm");
82-
PdfADocument pdf = new PdfADocument(new PdfWriter(new FileOutputStream(filename)),
83-
PdfAConformanceLevel.PDF_A_3U,
84-
new PdfOutputIntent("Custom", "", "http://www.color.org", "sRGB ICC preference", icm));
85-
Document document = new Document(pdf)) {
86+
Document document = new Document(
87+
new PdfADocument(new PdfWriter(new FileOutputStream(filename)),
88+
PdfAConformanceLevel.PDF_A_3U,
89+
new PdfOutputIntent("Custom", "",
90+
"http://www.color.org", "sRGB ICC preference", icm))
91+
)) {
8692
StringBuilder stringBuilder = new StringBuilder(LOREM_IPSUM);
8793
while (stringBuilder.length() < STRING_LENGTH_LIMIT) {
8894
stringBuilder.append(stringBuilder.toString());
@@ -93,7 +99,10 @@ public void runTest() throws Exception {
9399
p.setMinWidth(1e6f);
94100
p.setFont(font);
95101
document.add(p);
102+
103+
// when document is auto-closing, ISO conformance check is performed
104+
// this document contain a string which is longer than it is allowed
105+
// per specification. That is why conformance exception should be thrown
96106
}
97-
Assert.assertNull(new CompareTool().compareByContent(filename, sourceFolder + "cmp/PdfALongStringTest/cmp_" + file, destinationFolder, "diff_"));
98107
}
99108
}

0 commit comments

Comments
 (0)