Skip to content

Commit 49206c2

Browse files
author
Eugene Bochilo
committed
Support Text string objects related UA-2 rules
DEVSIX-9005
1 parent 9dfc503 commit 49206c2

File tree

5 files changed

+237
-0
lines changed

5 files changed

+237
-0
lines changed

pdfua/src/main/java/com/itextpdf/pdfua/checkers/PdfUA2Checker.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ This file is part of the iText (R) project.
2323
package com.itextpdf.pdfua.checkers;
2424

2525
import com.itextpdf.commons.utils.MessageFormatUtil;
26+
import com.itextpdf.kernel.pdf.PdfArray;
2627
import com.itextpdf.kernel.pdf.PdfCatalog;
2728
import com.itextpdf.kernel.pdf.PdfConformance;
2829
import com.itextpdf.kernel.pdf.PdfDictionary;
2930
import com.itextpdf.kernel.pdf.PdfDocument;
3031
import com.itextpdf.kernel.pdf.PdfName;
3132
import com.itextpdf.kernel.pdf.PdfObject;
33+
import com.itextpdf.kernel.pdf.PdfString;
3234
import com.itextpdf.kernel.pdf.tagging.PdfNamespace;
3335
import com.itextpdf.kernel.pdf.tagging.PdfStructTreeRoot;
3436
import com.itextpdf.kernel.pdf.tagutils.IRoleMappingResolver;
@@ -40,6 +42,7 @@ This file is part of the iText (R) project.
4042
import com.itextpdf.kernel.validation.context.FontValidationContext;
4143
import com.itextpdf.kernel.validation.context.PdfDestinationAdditionContext;
4244
import com.itextpdf.kernel.validation.context.PdfDocumentValidationContext;
45+
import com.itextpdf.kernel.validation.context.PdfObjectValidationContext;
4346
import com.itextpdf.kernel.xmp.XMPConst;
4447
import com.itextpdf.kernel.xmp.XMPException;
4548
import com.itextpdf.kernel.xmp.XMPMeta;
@@ -57,6 +60,7 @@ This file is part of the iText (R) project.
5760
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2LinkChecker;
5861
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2ListChecker;
5962
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2NotesChecker;
63+
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2StringChecker;
6064
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2TableOfContentsChecker;
6165
import com.itextpdf.pdfua.checkers.utils.ua2.PdfUA2XfaChecker;
6266
import com.itextpdf.pdfua.exceptions.PdfUAConformanceException;
@@ -124,6 +128,10 @@ public void validate(IValidationContext context) {
124128
PdfDestinationAdditionContext destinationAdditionContext = (PdfDestinationAdditionContext) context;
125129
new PdfUA2DestinationsChecker(destinationAdditionContext, pdfDocument).checkDestinationsOnCreation();
126130
break;
131+
case PDF_OBJECT:
132+
PdfObjectValidationContext validationContext = (PdfObjectValidationContext) context;
133+
checkPdfObject(validationContext.getObject());
134+
break;
127135
}
128136
}
129137

@@ -156,6 +164,39 @@ protected void checkMetadata(PdfCatalog catalog) {
156164
}
157165
}
158166

167+
private void checkPdfObject(PdfObject obj) {
168+
switch (obj.getType()) {
169+
case PdfObject.STRING:
170+
PdfUA2StringChecker.checkPdfString((PdfString) obj);
171+
break;
172+
case PdfObject.ARRAY:
173+
checkArrayRecursively((PdfArray) obj);
174+
break;
175+
case PdfObject.DICTIONARY:
176+
case PdfObject.STREAM:
177+
checkDictionaryRecursively((PdfDictionary) obj);
178+
break;
179+
}
180+
}
181+
182+
private void checkArrayRecursively(PdfArray array) {
183+
for (int i = 0; i < array.size(); i++) {
184+
PdfObject object = array.get(i, false);
185+
if (object != null && !object.isIndirect()) {
186+
checkPdfObject(object);
187+
}
188+
}
189+
}
190+
191+
private void checkDictionaryRecursively(PdfDictionary dictionary) {
192+
for (PdfName name : dictionary.keySet()) {
193+
PdfObject object = dictionary.get(name, false);
194+
if (object != null && !object.isIndirect()) {
195+
checkPdfObject(object);
196+
}
197+
}
198+
}
199+
159200
/**
160201
* Validates document catalog dictionary against PDF/UA-2 standard.
161202
*
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package com.itextpdf.pdfua.checkers.utils.ua2;
2+
3+
import com.itextpdf.io.font.PdfEncodings;
4+
import com.itextpdf.kernel.pdf.PdfString;
5+
import com.itextpdf.pdfua.exceptions.PdfUAConformanceException;
6+
import com.itextpdf.pdfua.exceptions.PdfUAExceptionMessageConstants;
7+
8+
/**
9+
* Utility class which performs UA-2 checks related to PdfString objects.
10+
*/
11+
public final class PdfUA2StringChecker {
12+
13+
private PdfUA2StringChecker() {
14+
// Private constructor will prevent the instantiation of this class directly.
15+
}
16+
17+
/**
18+
* Checks PdfString object to be UA-2 compatible.
19+
*
20+
* @param string {@link PdfString} to be checked
21+
*/
22+
public static void checkPdfString(PdfString string) {
23+
// Only perform this check if PdfString is text string (intended to be human-readable).
24+
if (PdfEncodings.PDF_DOC_ENCODING.equals(string.getEncoding()) ||
25+
PdfEncodings.UTF8.equals(string.getEncoding()) ||
26+
PdfEncodings.UNICODE_BIG.equals(string.getEncoding())) {
27+
for (int i = 0; i < string.getValue().length(); ++i) {
28+
int code = string.getValue().codePointAt(i);
29+
boolean isPrivateArea = code >= 0xE000 && code <= 0xF8FF;
30+
boolean isSupplementaryPrivateAreaA = code >= 0xF0000 && code <= 0xFFFFD;
31+
boolean isSupplementaryPrivateAreaB = code >= 0x100000 && code <= 0x10FFFD;
32+
if (isPrivateArea || isSupplementaryPrivateAreaA || isSupplementaryPrivateAreaB) {
33+
throw new PdfUAConformanceException(PdfUAExceptionMessageConstants.TEXT_STRING_USES_UNICODE_PUA);
34+
}
35+
}
36+
}
37+
}
38+
}

pdfua/src/main/java/com/itextpdf/pdfua/exceptions/PdfUAExceptionMessageConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ public final class PdfUAExceptionMessageConstants {
169169
"Encryption dictionary should be 1 if the document is tagged.";
170170
public static final String TEXT_FIELD_V_AND_RV_SHALL_BE_TEXTUALLY_EQUIVALENT = "For text fields, when RV entry " +
171171
"is present, a V entry shall also be present, and they shall be textually equivalent.";
172+
public static final String TEXT_STRING_USES_UNICODE_PUA =
173+
"Text strings intended to be human readable shall not use the Unicode PUA.";
172174
public static final String TOCI_SHALL_IDENTIFY_REF =
173175
"Each TOCI structure element shall contain the Ref entry, either directly on the TOCI structure element" +
174176
" itself or on at least one of its descendant structure elements.";
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
package com.itextpdf.pdfua.checkers;
2+
3+
import com.itextpdf.io.font.PdfEncodings;
4+
import com.itextpdf.kernel.geom.Rectangle;
5+
import com.itextpdf.kernel.pdf.PdfName;
6+
import com.itextpdf.kernel.pdf.PdfPage;
7+
import com.itextpdf.kernel.pdf.PdfString;
8+
import com.itextpdf.kernel.pdf.PdfUAConformance;
9+
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
10+
import com.itextpdf.kernel.pdf.annot.PdfTextAnnotation;
11+
import com.itextpdf.pdfua.UaValidationTestFramework;
12+
import com.itextpdf.pdfua.exceptions.PdfUAExceptionMessageConstants;
13+
import com.itextpdf.test.ExtendedITextTest;
14+
import com.itextpdf.test.TestUtil;
15+
16+
import java.io.IOException;
17+
import java.util.Arrays;
18+
import java.util.List;
19+
import org.junit.jupiter.api.BeforeAll;
20+
import org.junit.jupiter.api.BeforeEach;
21+
import org.junit.jupiter.api.Tag;
22+
import org.junit.jupiter.api.Test;
23+
import org.junit.jupiter.params.ParameterizedTest;
24+
import org.junit.jupiter.params.provider.MethodSource;
25+
26+
@Tag("IntegrationTest")
27+
public class PdfUAStringTest extends ExtendedITextTest {
28+
private static final String DESTINATION_FOLDER = TestUtil.getOutputPath() + "/pdfua/PdfUAStringTest/";
29+
private static final Rectangle RECTANGLE = new Rectangle(100, 100, 100, 100);
30+
31+
private UaValidationTestFramework framework;
32+
33+
@BeforeAll
34+
public static void before() {
35+
createOrClearDestinationFolder(DESTINATION_FOLDER);
36+
}
37+
38+
@BeforeEach
39+
public void setUp() {
40+
framework = new UaValidationTestFramework(DESTINATION_FOLDER, false);
41+
}
42+
43+
public static List<Integer> privateUseAreaSymbols() {
44+
return Arrays.asList(0xE004, 0xF0009, 0x10FFFA);
45+
}
46+
47+
@Test
48+
public void validValueWithDocEncodingTest() throws IOException {
49+
framework.addBeforeGenerationHook(document -> {
50+
document.addNewPage();
51+
PdfString pdfString = new PdfString("value", PdfEncodings.PDF_DOC_ENCODING);
52+
document.getCatalog().put(PdfName.Lang, pdfString);
53+
});
54+
framework.assertBothValid("validValueWithDocEncoding", PdfUAConformance.PDF_UA_2);
55+
}
56+
57+
@ParameterizedTest
58+
@MethodSource("privateUseAreaSymbols")
59+
public void puaValueWithDocEncodingTest(Integer puaSymbol) throws IOException {
60+
String filename = "puaValueWithDocEncoding_" + getPuaValueName(puaSymbol);
61+
framework.addBeforeGenerationHook(document -> {
62+
PdfString pdfString = new PdfString("hello_" + new String(Character.toChars((int) puaSymbol)), PdfEncodings.PDF_DOC_ENCODING);
63+
PdfPage page = document.addNewPage();
64+
PdfAnnotation textAnnotation = new PdfTextAnnotation(RECTANGLE).setContents(pdfString);
65+
page.addAnnotation(textAnnotation);
66+
});
67+
framework.assertITextFail(filename, PdfUAExceptionMessageConstants.TEXT_STRING_USES_UNICODE_PUA, PdfUAConformance.PDF_UA_2);
68+
// In this particular case validators which reopen the document cannot identify the problem, and strictly speaking PDF document is valid.
69+
// Since PDFDocEncoding doesn't have enough space to allocate this Unicode PUA symbol, it is simply not present in the resulting file.
70+
// Even though the file is valid, there was clearly an attempt to create human-readable PdfString with Unicode PUA, that's why we fail.
71+
framework.assertVeraPdfValid(filename, PdfUAConformance.PDF_UA_2);
72+
}
73+
74+
@ParameterizedTest
75+
@MethodSource("privateUseAreaSymbols")
76+
public void puaValueWithUTF8Test(Integer puaSymbol) throws IOException {
77+
String filename = "puaValueWithUTF8_" + getPuaValueName(puaSymbol);
78+
framework.addBeforeGenerationHook(document -> {
79+
PdfString pdfString = new PdfString("hello_" + new String(Character.toChars((int) puaSymbol)), PdfEncodings.UTF8);
80+
PdfPage page = document.addNewPage();
81+
PdfAnnotation textAnnotation = new PdfTextAnnotation(RECTANGLE).setSubject(pdfString);
82+
page.addAnnotation(textAnnotation);
83+
});
84+
framework.assertITextFail(filename, PdfUAExceptionMessageConstants.TEXT_STRING_USES_UNICODE_PUA, PdfUAConformance.PDF_UA_2);
85+
// VeraPdf doesn't fail because they mistakenly don't check all the PdfString entries in the document.
86+
framework.assertVeraPdfValid(filename, PdfUAConformance.PDF_UA_2);
87+
}
88+
89+
@ParameterizedTest
90+
@MethodSource("privateUseAreaSymbols")
91+
public void puaValueWithUTF16Test(Integer puaSymbol) throws IOException {
92+
String filename = "puaValueWithUTF16_" + getPuaValueName(puaSymbol);
93+
framework.addBeforeGenerationHook(document -> {
94+
PdfString pdfString = new PdfString("hello_" + new String(Character.toChars((int) puaSymbol)), PdfEncodings.UNICODE_BIG);
95+
PdfPage page = document.addNewPage();
96+
PdfAnnotation textAnnotation = new PdfTextAnnotation(RECTANGLE).setSubject(pdfString);
97+
page.addAnnotation(textAnnotation);
98+
});
99+
framework.assertITextFail(filename, PdfUAExceptionMessageConstants.TEXT_STRING_USES_UNICODE_PUA, PdfUAConformance.PDF_UA_2);
100+
// VeraPdf doesn't fail because they mistakenly don't check all the PdfString entries in the document.
101+
framework.assertVeraPdfValid(filename, PdfUAConformance.PDF_UA_2);
102+
}
103+
104+
@ParameterizedTest
105+
@MethodSource("privateUseAreaSymbols")
106+
public void puaValueWithUTF16UnmarkedTest(Integer puaSymbol) throws IOException {
107+
String filename = "puaValueWithUTF16Unmarked_" + getPuaValueName(puaSymbol);
108+
framework.addBeforeGenerationHook(document -> {
109+
PdfString pdfString = new PdfString("hello_" + new String(Character.toChars((int) puaSymbol)), PdfEncodings.UNICODE_BIG_UNMARKED);
110+
PdfPage page = document.addNewPage();
111+
PdfAnnotation textAnnotation = new PdfTextAnnotation(RECTANGLE).setSubject(pdfString);
112+
page.addAnnotation(textAnnotation);
113+
});
114+
framework.assertBothValid(filename, PdfUAConformance.PDF_UA_2);
115+
}
116+
117+
@ParameterizedTest
118+
@MethodSource("privateUseAreaSymbols")
119+
public void puaValueInLangTest(Integer puaSymbol) throws IOException {
120+
String filename = "puaValueInLang_" + getPuaValueName(puaSymbol);
121+
framework.addBeforeGenerationHook(document -> {
122+
PdfString pdfString = new PdfString("hello_" + new String(Character.toChars((int) puaSymbol)), PdfEncodings.UTF8);
123+
document.addNewPage();
124+
document.getCatalog().setLang(pdfString);
125+
});
126+
// This test is only needed to reproduce veraPdf failure.
127+
// For now, we only were able to reproduce it when lang entry in catalog dictionary contains PUA.
128+
// However, iText logic fails earlier, because Lang entry must contain valid language identifier.
129+
framework.assertBothFail(filename, PdfUAExceptionMessageConstants.DOCUMENT_SHALL_CONTAIN_VALID_LANG_ENTRY, PdfUAConformance.PDF_UA_2);
130+
}
131+
132+
@Test
133+
public void puaValueWithTest() throws IOException {
134+
framework.addBeforeGenerationHook(document -> {
135+
document.addNewPage();
136+
PdfString pdfString = new PdfString(new String(Character.toChars(0xE005)), PdfEncodings.WINANSI);
137+
document.getCatalog().put(PdfName.Lang, pdfString);
138+
});
139+
framework.assertBothFail("puaValueWithUTF16", PdfUAConformance.PDF_UA_2);
140+
}
141+
142+
private static String getPuaValueName(Integer puaSymbol) {
143+
switch (puaSymbol) {
144+
case 0xE004:
145+
return "PrivateArea";
146+
case 0xF0009:
147+
return "SupplementaryPrivateAreaA";
148+
case 0x10FFFA:
149+
return "SupplementaryPrivateAreaB";
150+
}
151+
return null;
152+
}
153+
}

sharpenConfiguration.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@
9090
<file path="com/itextpdf/io/source/FileChannelRandomAccessSource.java"/>
9191
<file path="com/itextpdf/io/source/PagedChannelRandomAccessSourceTest.java"/>
9292
</fileset>
93+
<fileset reason="String#codePointAt needs manual implementation in .Net">
94+
<file path="com/itextpdf/pdfua/checkers/utils/ua2/PdfUA2StringChecker.java"/>
95+
</fileset>
9396
<fileset reason="Read method with parameters is mapped to JRead which cannot be overriden (should be mapped to the Read with parameters).
9497
Read method without parameters cannot be overriden and not necessary in dotnet.
9598
There are additional inherited abstract methods that should be implemented (but in java version there is no such implementations)">

0 commit comments

Comments
 (0)