Skip to content

Commit 180fb5f

Browse files
committed
Add pdf/a-4 checks for metadata
DEVSIX-7747
1 parent 4c2a9b5 commit 180fb5f

File tree

13 files changed

+985
-16
lines changed

13 files changed

+985
-16
lines changed

kernel/src/main/java/com/itextpdf/kernel/xmp/XMPConst.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,4 +221,8 @@ public interface XMPConst
221221
* Conformance, A, B, or U.
222222
*/
223223
String CONFORMANCE = "conformance";
224+
/**
225+
* History
226+
*/
227+
String HISTORY = "History";
224228
}

pdfa/src/main/java/com/itextpdf/pdfa/checker/PdfA4Checker.java

Lines changed: 161 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ This file is part of the iText (R) project.
2323
package com.itextpdf.pdfa.checker;
2424

2525
import com.itextpdf.commons.utils.MessageFormatUtil;
26+
import com.itextpdf.kernel.exceptions.PdfException;
2627
import com.itextpdf.kernel.pdf.PdfAConformanceLevel;
2728
import com.itextpdf.kernel.pdf.PdfArray;
2829
import com.itextpdf.kernel.pdf.PdfCatalog;
@@ -34,15 +35,22 @@ This file is part of the iText (R) project.
3435
import com.itextpdf.kernel.pdf.PdfString;
3536
import com.itextpdf.kernel.pdf.canvas.CanvasGraphicsState;
3637
import com.itextpdf.kernel.pdf.colorspace.PdfSpecialCs;
38+
import com.itextpdf.kernel.xmp.XMPConst;
39+
import com.itextpdf.kernel.xmp.XMPException;
40+
import com.itextpdf.kernel.xmp.XMPMeta;
41+
import com.itextpdf.kernel.xmp.XMPMetaFactory;
42+
import com.itextpdf.kernel.xmp.properties.XMPProperty;
3743
import com.itextpdf.pdfa.exceptions.PdfAConformanceException;
3844
import com.itextpdf.pdfa.exceptions.PdfaExceptionMessageConstant;
3945
import com.itextpdf.pdfa.logs.PdfAConformanceLogMessageConstant;
4046

47+
import java.io.ByteArrayInputStream;
4148
import java.util.Arrays;
4249
import java.util.Collections;
4350
import java.util.HashSet;
4451
import java.util.Map;
4552
import java.util.Set;
53+
import java.util.regex.Pattern;
4654
import org.slf4j.Logger;
4755
import org.slf4j.LoggerFactory;
4856

@@ -129,6 +137,7 @@ public class PdfA4Checker extends PdfA3Checker {
129137
PdfName.Bl
130138
)));
131139

140+
132141
/**
133142
* Creates a PdfA4Checker with the required conformance level
134143
*
@@ -148,7 +157,8 @@ protected void checkTrailer(PdfDictionary trailer) {
148157
if (trailer.get(PdfName.Info) != null) {
149158
PdfDictionary info = trailer.getAsDictionary(PdfName.Info);
150159
if (info.size() != 1 || info.get(PdfName.ModDate) == null) {
151-
throw new PdfAConformanceException(PdfaExceptionMessageConstant.DOCUMENT_INFO_DICTIONARY_SHALL_ONLY_CONTAIN_MOD_DATE);
160+
throw new PdfAConformanceException(
161+
PdfaExceptionMessageConstant.DOCUMENT_INFO_DICTIONARY_SHALL_ONLY_CONTAIN_MOD_DATE);
152162
}
153163
}
154164
}
@@ -160,18 +170,21 @@ protected void checkTrailer(PdfDictionary trailer) {
160170
protected void checkCatalog(PdfCatalog catalog) {
161171
if ('2' != catalog.getDocument().getPdfVersion().toString().charAt(4)) {
162172
throw new PdfAConformanceException(
163-
MessageFormatUtil.format(PdfaExceptionMessageConstant.THE_FILE_HEADER_SHALL_CONTAIN_RIGHT_PDF_VERSION, "2"));
173+
MessageFormatUtil.format(
174+
PdfaExceptionMessageConstant.THE_FILE_HEADER_SHALL_CONTAIN_RIGHT_PDF_VERSION, "2"));
164175
}
165176
PdfDictionary trailer = catalog.getDocument().getTrailer();
166177
if (trailer.get(PdfName.Info) != null) {
167178
if (catalog.getPdfObject().get(PdfName.PieceInfo) == null) {
168-
throw new PdfAConformanceException(PdfaExceptionMessageConstant.DOCUMENT_SHALL_NOT_CONTAIN_INFO_UNLESS_THERE_IS_PIECE_INFO);
179+
throw new PdfAConformanceException(
180+
PdfaExceptionMessageConstant.DOCUMENT_SHALL_NOT_CONTAIN_INFO_UNLESS_THERE_IS_PIECE_INFO);
169181
}
170182
}
171183

172184
if ("F".equals(conformanceLevel.getConformance())) {
173185
if (!catalog.nameTreeContainsKey(PdfName.EmbeddedFiles)) {
174-
throw new PdfAConformanceException(PdfaExceptionMessageConstant.NAME_DICTIONARY_SHALL_CONTAIN_EMBEDDED_FILES_KEY);
186+
throw new PdfAConformanceException(
187+
PdfaExceptionMessageConstant.NAME_DICTIONARY_SHALL_CONTAIN_EMBEDDED_FILES_KEY);
175188
}
176189
}
177190
}
@@ -186,7 +199,8 @@ protected void checkCatalogValidEntries(PdfDictionary catalogDict) {
186199
if (version != null && (version.toString().charAt(0) != '2'
187200
|| version.toString().charAt(1) != '.' || !Character.isDigit(version.toString().charAt(2)))) {
188201
throw new PdfAConformanceException(
189-
MessageFormatUtil.format(PdfaExceptionMessageConstant.THE_CATALOG_VERSION_SHALL_CONTAIN_RIGHT_PDF_VERSION, "2"));
202+
MessageFormatUtil.format(
203+
PdfaExceptionMessageConstant.THE_CATALOG_VERSION_SHALL_CONTAIN_RIGHT_PDF_VERSION, "2"));
190204
}
191205
}
192206

@@ -196,10 +210,12 @@ protected void checkCatalogValidEntries(PdfDictionary catalogDict) {
196210
@Override
197211
protected void checkFileSpec(PdfDictionary fileSpec) {
198212
if (fileSpec.getAsName(PdfName.AFRelationship) == null) {
199-
throw new PdfAConformanceException(PdfaExceptionMessageConstant.FILE_SPECIFICATION_DICTIONARY_SHALL_CONTAIN_AFRELATIONSHIP_KEY);
213+
throw new PdfAConformanceException(
214+
PdfaExceptionMessageConstant.FILE_SPECIFICATION_DICTIONARY_SHALL_CONTAIN_AFRELATIONSHIP_KEY);
200215
}
201216
if (!fileSpec.containsKey(PdfName.F) || !fileSpec.containsKey(PdfName.UF)) {
202-
throw new PdfAConformanceException(PdfAConformanceException.FILE_SPECIFICATION_DICTIONARY_SHALL_CONTAIN_F_KEY_AND_UF_KEY);
217+
throw new PdfAConformanceException(
218+
PdfAConformanceException.FILE_SPECIFICATION_DICTIONARY_SHALL_CONTAIN_F_KEY_AND_UF_KEY);
203219
}
204220
if (!fileSpec.containsKey(PdfName.Desc)) {
205221
LOGGER.warn(PdfAConformanceLogMessageConstant.FILE_SPECIFICATION_DICTIONARY_SHOULD_CONTAIN_DESC_KEY);
@@ -219,7 +235,8 @@ protected void checkPageTransparency(PdfDictionary pageDict, PdfDictionary pageR
219235
}
220236
if (pdfAOutputIntentColorSpace == null && pdfAPageOutputIntent == null
221237
&& transparencyObjects.size() > 0
222-
&& (pageDict.getAsDictionary(PdfName.Group) == null || pageDict.getAsDictionary(PdfName.Group).get(PdfName.CS) == null)) {
238+
&& (pageDict.getAsDictionary(PdfName.Group) == null
239+
|| pageDict.getAsDictionary(PdfName.Group).get(PdfName.CS) == null)) {
223240
checkContentsForTransparency(pageDict);
224241
checkAnnotationsForTransparency(pageDict.getAsArray(PdfName.Annots));
225242
checkResourcesForTransparency(pageResources, new HashSet<PdfObject>());
@@ -257,6 +274,7 @@ protected void checkPageAAConformance(PdfDictionary dict) {
257274
}
258275

259276
//There are no limits for numbers in pdf-a/4
277+
260278
/**
261279
* {@inheritDoc}
262280
*/
@@ -266,6 +284,7 @@ protected void checkPdfNumber(PdfNumber number) {
266284
}
267285

268286
//There is no limit for canvas stack in pdf-a/4
287+
269288
/**
270289
* {@inheritDoc}
271290
*/
@@ -284,6 +303,7 @@ protected int getMaxStringLength() {
284303
}
285304

286305
//There is no limit for DeviceN components count in pdf-a/4
306+
287307
/**
288308
* {@inheritDoc}
289309
*/
@@ -340,7 +360,8 @@ protected void checkAnnotation(PdfDictionary annotDic) {
340360
// Extra check for blending mode
341361
PdfName blendMode = annotDic.getAsName(PdfName.BM);
342362
if (blendMode != null && !allowedBlendModes4.contains(blendMode)) {
343-
throw new PdfAConformanceException(PdfaExceptionMessageConstant.ONLY_STANDARD_BLEND_MODES_SHALL_BE_USED_FOR_THE_VALUE_OF_THE_BM_KEY_IN_A_GRAPHIC_STATE_AND_ANNOTATION_DICTIONARY);
363+
throw new PdfAConformanceException(
364+
PdfaExceptionMessageConstant.ONLY_STANDARD_BLEND_MODES_SHALL_BE_USED_FOR_THE_VALUE_OF_THE_BM_KEY_IN_A_GRAPHIC_STATE_AND_ANNOTATION_DICTIONARY);
344365
}
345366

346367
// And then treat the annotation as an object with transparency
@@ -386,6 +407,24 @@ protected void checkWidgetAAConformance(PdfDictionary dict) {
386407
}
387408
}
388409

410+
/**
411+
* @param catalog the catalog {@link PdfDictionary} to check
412+
*/
413+
@Override
414+
protected void checkMetaData(PdfDictionary catalog) {
415+
super.checkMetaData(catalog);
416+
try {
417+
final PdfStream xmpMetadata = catalog.getAsStream(PdfName.Metadata);
418+
byte[] bytes = xmpMetadata.getBytes();
419+
checkPacketHeader(bytes);
420+
final XMPMeta meta = XMPMetaFactory.parse(new ByteArrayInputStream(bytes));
421+
checkVersionIdentification(meta);
422+
checkFileProvenanceSpec(meta);
423+
} catch (XMPException ex) {
424+
throw new PdfException(ex);
425+
}
426+
}
427+
389428
/**
390429
* {@inheritDoc}
391430
*/
@@ -440,7 +479,119 @@ protected String getTransparencyErrorMessage() {
440479
@Override
441480
protected void checkBlendMode(PdfName blendMode) {
442481
if (!allowedBlendModes4.contains(blendMode)) {
443-
throw new PdfAConformanceException(PdfAConformanceException.ONLY_STANDARD_BLEND_MODES_SHALL_BE_USED_FOR_THE_VALUE_OF_THE_BM_KEY_IN_AN_EXTENDED_GRAPHIC_STATE_DICTIONARY);
482+
throw new PdfAConformanceException(
483+
PdfAConformanceException.ONLY_STANDARD_BLEND_MODES_SHALL_BE_USED_FOR_THE_VALUE_OF_THE_BM_KEY_IN_AN_EXTENDED_GRAPHIC_STATE_DICTIONARY);
484+
}
485+
}
486+
487+
488+
private static boolean isValidXmpConformance(String value) {
489+
if (value == null) {
490+
return false;
491+
}
492+
if (value.length() != 1) {
493+
return false;
494+
}
495+
return "F".equals(value) || "E".equals(value);
496+
}
497+
498+
private static boolean isValidXmpRevision(String value) {
499+
if (value == null) {
500+
return false;
501+
}
502+
if (value.length() != 4) {
503+
return false;
504+
}
505+
for (final char c : value.toCharArray()) {
506+
if (!Character.isDigit(c)) {
507+
return false;
508+
}
509+
}
510+
return true;
511+
}
512+
513+
514+
private void checkPacketHeader(byte[] meta) {
515+
if (meta == null) {
516+
return;
517+
}
518+
final String metAsStr = new String(meta);
519+
final String regex = "<\\?xpacket.*encoding|bytes.*\\?>";
520+
final Pattern pattern = Pattern.compile(regex);
521+
if (pattern.matcher(metAsStr).find()) {
522+
throw new PdfAConformanceException(
523+
PdfaExceptionMessageConstant
524+
.XMP_METADATA_HEADER_PACKET_MAY_NOT_CONTAIN_BYTES_OR_ENCODING_ATTRIBUTE);
525+
}
526+
}
527+
528+
529+
private void checkFileProvenanceSpec(XMPMeta meta) {
530+
try {
531+
XMPProperty history = meta.getProperty(XMPConst.NS_XMP_MM, XMPConst.HISTORY);
532+
if (history == null) {
533+
return;
534+
}
535+
if (!history.getOptions().isArray()) {
536+
return;
537+
}
538+
final int amountOfEntries = meta.countArrayItems(XMPConst.NS_XMP_MM, XMPConst.HISTORY);
539+
for (int i = 0; i < amountOfEntries; i++) {
540+
int nameSpaceIndex = i + 1;
541+
if (!meta.doesPropertyExist(XMPConst.NS_XMP_MM,
542+
XMPConst.HISTORY + "[" + nameSpaceIndex + "]/stEvt:action")) {
543+
throw new PdfAConformanceException(MessageFormatUtil.format(
544+
PdfaExceptionMessageConstant.XMP_METADATA_HISTORY_ENTRY_SHALL_CONTAIN_KEY,
545+
"stEvt:action"));
546+
}
547+
if (!meta.doesPropertyExist(XMPConst.NS_XMP_MM,
548+
XMPConst.HISTORY + "[" + nameSpaceIndex + "]/stEvt:when")) {
549+
throw new PdfAConformanceException(MessageFormatUtil.format(
550+
PdfaExceptionMessageConstant.XMP_METADATA_HISTORY_ENTRY_SHALL_CONTAIN_KEY,
551+
"stEvt:when"));
552+
}
553+
}
554+
555+
556+
} catch (XMPException e) {
557+
throw new PdfException(e);
558+
}
559+
}
560+
561+
562+
private void checkVersionIdentification(XMPMeta meta) {
563+
try {
564+
XMPProperty prop = meta.getProperty(XMPConst.NS_PDFA_ID, XMPConst.PART);
565+
if (prop == null || !getConformanceLevel().getPart().equals(prop.getValue())) {
566+
throw new PdfAConformanceException(MessageFormatUtil.format(
567+
PdfaExceptionMessageConstant.XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_PART,
568+
getConformanceLevel().getPart()));
569+
}
570+
} catch (XMPException e) {
571+
throw new PdfAConformanceException(MessageFormatUtil.format(
572+
PdfaExceptionMessageConstant.XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_PART,
573+
getConformanceLevel().getPart()));
574+
}
575+
576+
try {
577+
XMPProperty prop = meta.getProperty(XMPConst.NS_PDFA_ID, XMPConst.REV);
578+
if (prop == null || !isValidXmpRevision(prop.getValue())) {
579+
throw new PdfAConformanceException(
580+
PdfaExceptionMessageConstant.XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_REV);
581+
}
582+
} catch (XMPException e) {
583+
throw new PdfAConformanceException(
584+
PdfaExceptionMessageConstant.XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_REV);
585+
}
586+
587+
try {
588+
XMPProperty prop = meta.getProperty(XMPConst.NS_PDFA_ID, XMPConst.CONFORMANCE);
589+
if (prop != null && !isValidXmpConformance(prop.getValue())) {
590+
throw new PdfAConformanceException(
591+
PdfaExceptionMessageConstant.XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_CONFORMANCE);
592+
}
593+
} catch (XMPException e) {
594+
// ignored because it is not required
444595
}
445596
}
446597

pdfa/src/main/java/com/itextpdf/pdfa/exceptions/PdfaExceptionMessageConstant.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,19 @@ public final class PdfaExceptionMessageConstant {
166166
public static final String ONLY_STANDARD_BLEND_MODES_SHALL_BE_USED_FOR_THE_VALUE_OF_THE_BM_KEY_IN_A_GRAPHIC_STATE_AND_ANNOTATION_DICTIONARY =
167167
"Only blend modes that are specified in ISO 32000-2:2020 shall be used for the value of the BM key in a"
168168
+ " graphic state dictionary or an annotation dictionary.";
169+
public static final String XMP_METADATA_HEADER_PACKET_MAY_NOT_CONTAIN_BYTES_OR_ENCODING_ATTRIBUTE = "XMP metadata"
170+
+ " header packet may not contain bytes or encoding attribute.";
171+
public static final String XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_PART = "XMP metadata header shall"
172+
+ " contain version identifier pdfaid:part with value {0}";
173+
174+
public static final String XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_REV = "XMP metadata header shall"
175+
+ " contain version identifier pdfaid:rev with four digit integer value";
176+
177+
public static final String XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_CONFORMANCE =
178+
"XMP metadata header shall"
179+
+ " contain version identifier pdfaid:rev F or E or absent if no conformance level is specified";
180+
public static final String XMP_METADATA_HISTORY_ENTRY_SHALL_CONTAIN_KEY = "XMP metadata history entry shall"
181+
+ " contain key {0}";
169182

170183
private PdfaExceptionMessageConstant(){}
171184
}

pdfa/src/test/java/com/itextpdf/pdfa/PdfA4ActionCheckTest.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,6 @@ private void generatePdfADocument(PdfAConformanceLevel conformanceLevel, String
466466
doc.addNewPage();
467467
consumer.accept(doc);
468468
doc.close();
469-
470469
}
471470

472471
}

0 commit comments

Comments
 (0)