Skip to content

Commit 045c81f

Browse files
committed
PDFBOX-6129: pass strict mode to PDFA Helper; be lenient when missing property; add test
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1930753 13f79535-47bb-0310-9956-ffa450edef68
1 parent 44dd424 commit 045c81f

File tree

3 files changed

+98
-5
lines changed

3 files changed

+98
-5
lines changed

xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ public XMPMetadata parse(InputStream input) throws XmpParsingException
217217
}
218218

219219
// find schema description
220-
PdfaExtensionHelper.populateSchemaMapping(xmp);
220+
PdfaExtensionHelper.populateSchemaMapping(xmp, strictParsing);
221221

222222
// parse data description
223223
for (Element description : descriptions)

xmpbox/src/main/java/org/apache/xmpbox/xml/PdfaExtensionHelper.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ private static void checkNamespaceDeclaration(Attr attr, Class<? extends Abstrac
9595
}
9696
}
9797

98-
public static void populateSchemaMapping(XMPMetadata meta) throws XmpParsingException
98+
public static void populateSchemaMapping(XMPMetadata meta, boolean strictParsing) throws XmpParsingException
9999
{
100100
List<XMPSchema> schems = meta.getAllSchemas();
101101
TypeMapping tm = meta.getTypeMapping();
@@ -118,14 +118,14 @@ public static void populateSchemaMapping(XMPMetadata meta) throws XmpParsingExce
118118
{
119119
if (af instanceof PDFASchemaType)
120120
{
121-
populatePDFASchemaType(meta, (PDFASchemaType) af, tm);
121+
populatePDFASchemaType(meta, (PDFASchemaType) af, tm, strictParsing);
122122
} // TODO unmanaged ?
123123
}
124124
}
125125
}
126126
}
127127

128-
private static void populatePDFASchemaType(XMPMetadata meta, PDFASchemaType st, TypeMapping tm)
128+
private static void populatePDFASchemaType(XMPMetadata meta, PDFASchemaType st, TypeMapping tm, boolean strictParsing)
129129
throws XmpParsingException
130130
{
131131
String namespaceUri = st.getNamespaceURI();
@@ -155,6 +155,10 @@ private static void populatePDFASchemaType(XMPMetadata meta, PDFASchemaType st,
155155
}
156156
}
157157
// populate properties
158+
if (properties == null && !strictParsing)
159+
{
160+
return;
161+
}
158162
requireNonNull(properties, () -> "Missing pdfaSchema:property in type definition");
159163
for (AbstractField af2 : properties.getAllProperties())
160164
{

xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import org.apache.xmpbox.type.TextType;
5151

5252
import static org.junit.jupiter.api.Assertions.assertEquals;
53+
import static org.junit.jupiter.api.Assertions.assertFalse;
5354
import static org.junit.jupiter.api.Assertions.assertNotNull;
5455
import static org.junit.jupiter.api.Assertions.assertNull;
5556
import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -1171,4 +1172,92 @@ void testTypeInLiResourceElement() throws XmpParsingException
11711172
assertEquals("created", firstHistoryEntry.getAction());
11721173
assertEquals("original PDF file", firstHistoryEntry.getParameters());
11731174
}
1174-
}
1175+
1176+
@Test
1177+
void testLenientPdfaExtension() throws XmpParsingException
1178+
{
1179+
// First bag in pdfaExtension is incomplete.
1180+
final String s =
1181+
"<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" +
1182+
"<x:xmpmeta xmlns:x=\"adobe:ns:meta/\"\n" +
1183+
" x:xmptk=\"Adobe XMP Core 4.2.1-c043 52.372728, 2009/01/18-15:08:04\">\n" +
1184+
" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
1185+
" <rdf:Description rdf:about=\"\"\n" +
1186+
" xmlns:xmpMM=\"http://ns.adobe.com/xap/1.0/mm/\">\n" +
1187+
" <xmpMM:DocumentID>uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d</xmpMM:DocumentID>\n" +
1188+
" <xmpMM:InstanceID>uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d</xmpMM:InstanceID>\n" +
1189+
" </rdf:Description>\n" +
1190+
" <rdf:Description rdf:about=\"\"\n" +
1191+
" xmlns:pdfaExtension=\"http://www.aiim.org/pdfa/ns/extension/\"\n" +
1192+
" xmlns:pdfaSchema=\"http://www.aiim.org/pdfa/ns/schema#\"\n" +
1193+
" xmlns:pdfaProperty=\"http://www.aiim.org/pdfa/ns/property#\">\n" +
1194+
" <pdfaExtension:schemas>\n" +
1195+
" <rdf:Bag>\n" +
1196+
" <rdf:li rdf:parseType=\"Resource\">\n" +
1197+
" <pdfaSchema:namespaceURI>http://ns.adobe.com/pdf/1.3/</pdfaSchema:namespaceURI>\n" +
1198+
" <pdfaSchema:prefix>pdf</pdfaSchema:prefix>\n" +
1199+
" <pdfaSchema:schema>Adobe PDF Schema</pdfaSchema:schema>\n" +
1200+
" </rdf:li>\n" +
1201+
" <rdf:li rdf:parseType=\"Resource\">\n" +
1202+
" <pdfaSchema:namespaceURI>http://ns.adobe.com/xap/1.0/mm/</pdfaSchema:namespaceURI>\n" +
1203+
" <pdfaSchema:prefix>xmpMM</pdfaSchema:prefix>\n" +
1204+
" <pdfaSchema:schema>XMP Media Management Schema</pdfaSchema:schema>\n" +
1205+
" <pdfaSchema:property>\n" +
1206+
" <rdf:Seq>\n" +
1207+
" <rdf:li rdf:parseType=\"Resource\">\n" +
1208+
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
1209+
" <pdfaProperty:description>UUID based identifier for specific incarnation of a document</pdfaProperty:description>\n" +
1210+
" <pdfaProperty:name>InstanceID</pdfaProperty:name>\n" +
1211+
" <pdfaProperty:valueType>URI</pdfaProperty:valueType>\n" +
1212+
" </rdf:li>\n" +
1213+
" </rdf:Seq>\n" +
1214+
" </pdfaSchema:property>\n" +
1215+
" </rdf:li>\n" +
1216+
" <rdf:li rdf:parseType=\"Resource\">\n" +
1217+
" <pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>\n" +
1218+
" <pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>\n" +
1219+
" <pdfaSchema:schema>PDF/A ID Schema</pdfaSchema:schema>\n" +
1220+
" <pdfaSchema:property>\n" +
1221+
" <rdf:Seq>\n" +
1222+
" <rdf:li rdf:parseType=\"Resource\">\n" +
1223+
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
1224+
" <pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>\n" +
1225+
" <pdfaProperty:name>part</pdfaProperty:name>\n" +
1226+
" <pdfaProperty:valueType>Integer</pdfaProperty:valueType>\n" +
1227+
" </rdf:li>\n" +
1228+
" <rdf:li rdf:parseType=\"Resource\">\n" +
1229+
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
1230+
" <pdfaProperty:description>Amendment of PDF/A standard</pdfaProperty:description>\n" +
1231+
" <pdfaProperty:name>amd</pdfaProperty:name>\n" +
1232+
" <pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
1233+
" </rdf:li>\n" +
1234+
" <rdf:li rdf:parseType=\"Resource\">\n" +
1235+
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
1236+
" <pdfaProperty:description>Conformance level of PDF/A standard</pdfaProperty:description>\n" +
1237+
" <pdfaProperty:name>conformance</pdfaProperty:name>\n" +
1238+
" <pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
1239+
" </rdf:li>\n" +
1240+
" </rdf:Seq>\n" +
1241+
" </pdfaSchema:property>\n" +
1242+
" </rdf:li>\n" +
1243+
" </rdf:Bag>\n" +
1244+
" </pdfaExtension:schemas>\n" +
1245+
" </rdf:Description>\n" +
1246+
" </rdf:RDF>\n" +
1247+
"</x:xmpmeta>\n" +
1248+
"<?xpacket end=\"w\"?>";
1249+
final DomXmpParser xmpParser1 = new DomXmpParser();
1250+
xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8));
1251+
XmpParsingException ex = assertThrows(XmpParsingException.class,
1252+
() -> xmpParser1.parse(s.getBytes(StandardCharsets.UTF_8)));
1253+
assertEquals("Missing pdfaSchema:property in type definition", ex.getMessage());
1254+
DomXmpParser xmpParser2 = new DomXmpParser();
1255+
assertTrue(xmpParser2.isStrictParsing());
1256+
xmpParser2.setStrictParsing(false);
1257+
assertFalse(xmpParser2.isStrictParsing());
1258+
XMPMetadata xmp2 = xmpParser2.parse(s.getBytes(StandardCharsets.UTF_8));
1259+
XMPMediaManagementSchema xmpMediaManagementSchema = xmp2.getXMPMediaManagementSchema();
1260+
assertEquals("uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d", xmpMediaManagementSchema.getInstanceID());
1261+
assertEquals("uuid:0b306144-6a43-dcbd-6b3e-c6b6b1df873d", xmpMediaManagementSchema.getDocumentID());
1262+
}
1263+
}

0 commit comments

Comments
 (0)