Skip to content

Commit 0569dae

Browse files
committed
PDFBOX-5292: allow xmp extension schemata to be defined and used within the same description, by David Sommer; closes #132
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1930356 13f79535-47bb-0310-9956-ffa450edef68
1 parent 2917dd8 commit 0569dae

File tree

2 files changed

+148
-13
lines changed

2 files changed

+148
-13
lines changed

xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import java.util.Map;
3232
import java.util.Deque;
3333
import java.util.StringTokenizer;
34+
import java.util.stream.Collectors;
3435

3536
import javax.xml.XMLConstants;
3637
import javax.xml.namespace.QName;
@@ -180,25 +181,17 @@ public XMPMetadata parse(InputStream input) throws XmpParsingException
180181
Element rdfRdf = findDescriptionsParent(root);
181182
nsFinder.push(rdfRdf); // PDFBOX-6099: push namespaces in rdf:RDF
182183
List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
183-
List<Element> dataDescriptions = new ArrayList<>(descriptions.size());
184184
for (Element description : descriptions)
185185
{
186-
Element first = DomHelper.getFirstChildElement(description);
187-
if (first != null && "pdfaExtension".equals(first.getPrefix()))
188-
{
189-
PdfaExtensionHelper.validateNaming(xmp, description);
190-
parseDescriptionRoot(xmp, description);
191-
}
192-
else
193-
{
194-
dataDescriptions.add(description);
195-
}
186+
PdfaExtensionHelper.validateNaming(xmp, description);
187+
parseSchemaExtensions(xmp, description);
196188
}
197189

198190
// find schema description
199191
PdfaExtensionHelper.populateSchemaMapping(xmp);
192+
200193
// parse data description
201-
for (Element description : dataDescriptions)
194+
for (Element description : descriptions)
202195
{
203196
parseDescriptionRoot(xmp, description);
204197
}
@@ -208,6 +201,46 @@ public XMPMetadata parse(InputStream input) throws XmpParsingException
208201
return xmp;
209202
}
210203

204+
private boolean isSchemaExtensionProperty(final Element element)
205+
{
206+
return element != null && "pdfaExtension".equals(element.getPrefix());
207+
}
208+
209+
private void parseSchemaExtensions(final XMPMetadata xmp, final Element description) throws XmpParsingException
210+
{
211+
final TypeMapping tm = xmp.getTypeMapping();
212+
nsFinder.push(description);
213+
try
214+
{
215+
final List<Element> schemaExtensions = DomHelper.getElementChildren(description)
216+
.stream()
217+
.filter(this::isSchemaExtensionProperty)
218+
.collect(Collectors.toList());
219+
for (final Element schemaExtension : schemaExtensions)
220+
{
221+
final String namespace = schemaExtension.getNamespaceURI();
222+
if (!tm.isDefinedSchema(schemaExtension.getNamespaceURI()))
223+
{
224+
throw new XmpParsingException(ErrorType.NoSchema,
225+
"This namespace is not a schema or a structured type : " + namespace);
226+
}
227+
PropertyType type = checkPropertyDefinition(xmp, DomHelper.getQName(schemaExtension));
228+
final XMPSchema schema = tm.getSchemaFactory(namespace).createXMPSchema(xmp, schemaExtension.getPrefix());
229+
loadAttributes(schema, description);
230+
ComplexPropertyContainer container = schema.getContainer();
231+
createProperty(xmp, schemaExtension, type, container);
232+
}
233+
}
234+
catch (XmpSchemaException e)
235+
{
236+
throw new XmpParsingException(ErrorType.Undefined, "Parsing failed", e);
237+
}
238+
finally
239+
{
240+
nsFinder.pop();
241+
}
242+
}
243+
211244
private void parseDescriptionRoot(XMPMetadata xmp, Element description) throws XmpParsingException
212245
{
213246
nsFinder.push(description);
@@ -318,6 +351,10 @@ private void parseChildrenAsProperties(XMPMetadata xmp, List<Element> properties
318351
throw new XmpParsingException(ErrorType.NoSchema,
319352
"This namespace is not a schema or a structured type : " + namespace);
320353
}
354+
if (isSchemaExtensionProperty(property))
355+
{
356+
continue;
357+
}
321358
XMPSchema schema = xmp.getSchema(namespace);
322359
if (schema == null)
323360
{
@@ -888,7 +925,7 @@ private void removeComments(Node root)
888925
return;
889926
}
890927

891-
for (int i = 0; i < nl.getLength(); i++)
928+
for (int i = 0; i < nl.getLength(); i++)
892929
{
893930
Node node = nl.item(i);
894931
if (node instanceof Comment)

xmpbox/src/test/java/org/apache/xmpbox/xml/DomXmpParserTest.java

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,14 @@
2828
import java.util.List;
2929

3030
import org.apache.xmpbox.XMPMetadata;
31+
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
3132
import org.apache.xmpbox.schema.PhotoshopSchema;
3233
import org.apache.xmpbox.schema.XMPMediaManagementSchema;
3334
import org.apache.xmpbox.schema.XMPPageTextSchema;
3435
import org.apache.xmpbox.schema.XMPSchema;
3536
import org.apache.xmpbox.type.AbstractField;
3637
import org.apache.xmpbox.type.ArrayProperty;
38+
import org.apache.xmpbox.type.BadFieldValueException;
3739
import org.apache.xmpbox.type.DefinedStructuredType;
3840
import org.apache.xmpbox.type.DimensionsType;
3941
import org.apache.xmpbox.type.PDFASchemaType;
@@ -405,4 +407,100 @@ void testPDFBox3882_2() throws XmpParsingException
405407
// xmpMediaManagementSchema.getDerivedFromProperty() doesn't work.
406408
// However the PDFLib XMP validator considers this file to be invalid, so lets not bother more
407409
}
410+
411+
/**
412+
* PDFBOX-5292: Test whether inline extension schema is detected.
413+
*
414+
* @throws XmpParsingException
415+
*/
416+
@Test
417+
void testPDFBox5292() throws XmpParsingException, BadFieldValueException
418+
{
419+
String s = "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" +
420+
"<x:xmpmeta xmlns:x=\"adobe:ns:meta/\" x:xmptk=\"Adobe XMP Core 5.6-c015 84.159810, 2016/09/10-02:41:30 \">\n" +
421+
" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
422+
" <rdf:Description rdf:about=\"\"\n" +
423+
" xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\"\n" +
424+
" xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n" +
425+
" xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\"\n" +
426+
" xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\"\n" +
427+
" xmlns:pdfaExtension=\"http://www.aiim.org/pdfa/ns/extension/\"\n" +
428+
" xmlns:pdfaSchema=\"http://www.aiim.org/pdfa/ns/schema#\"\n" +
429+
" xmlns:pdfaProperty=\"http://www.aiim.org/pdfa/ns/property#\"\n" +
430+
" xmlns:example=\"http://ns.example.org/default/1.0/\">\n" +
431+
" <xmp:CreateDate>2021-05-21T11:42:49+01:00</xmp:CreateDate>\n" +
432+
" <xmp:ModifyDate>2021-05-21T11:47:16+02:00</xmp:ModifyDate>\n" +
433+
" <xmp:MetadataDate>2021-05-21T11:47:16+02:00</xmp:MetadataDate>\n" +
434+
" <dc:format>application/pdf</dc:format>\n" +
435+
" <dc:title>\n" +
436+
" <rdf:Alt>\n" +
437+
" <rdf:li xml:lang=\"x-default\">Inline XMP Extension PoC</rdf:li>\n" +
438+
" </rdf:Alt>\n" +
439+
" </dc:title>\n" +
440+
" <dc:creator>\n" +
441+
" <rdf:Seq>\n" +
442+
" <rdf:li>DSO</rdf:li>\n" +
443+
" </rdf:Seq>\n" +
444+
" </dc:creator>\n" +
445+
" <dc:description>\n" +
446+
" <rdf:Alt>\n" +
447+
" <rdf:li xml:lang=\"x-default\">Inline XMP Extension PoC</rdf:li>\n" +
448+
" </rdf:Alt>\n" +
449+
" </dc:description>\n" +
450+
" <pdf:Keywords/>\n" +
451+
" <pdfaid:part>2</pdfaid:part>\n" +
452+
" <pdfaid:conformance>A</pdfaid:conformance>\n" +
453+
" <example:Data>Example</example:Data>\n" +
454+
" <pdfaExtension:schemas>\n" +
455+
" <rdf:Bag>\n" +
456+
" <rdf:li rdf:parseType=\"Resource\">\n" +
457+
" <pdfaSchema:schema>Simple Schema</pdfaSchema:schema>\n" +
458+
" <pdfaSchema:namespaceURI>http://ns.example.org/default/1.0/</pdfaSchema:namespaceURI>\n" +
459+
" <pdfaSchema:prefix>example</pdfaSchema:prefix>\n" +
460+
" <pdfaSchema:property>\n" +
461+
" <rdf:Seq>\n" +
462+
" <rdf:li rdf:parseType=\"Resource\">\n" +
463+
" <pdfaProperty:name>Data</pdfaProperty:name>\n" +
464+
" <pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
465+
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
466+
" <pdfaProperty:description>Example Data</pdfaProperty:description>\n" +
467+
" </rdf:li>\n" +
468+
" </rdf:Seq>\n" +
469+
" </pdfaSchema:property>\n" +
470+
" </rdf:li>\n" +
471+
" <rdf:li rdf:parseType=\"Resource\">\n" +
472+
" <pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>\n" +
473+
" <pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>\n" +
474+
" <pdfaSchema:schema>PDF/A ID Schema</pdfaSchema:schema>\n" +
475+
" <pdfaSchema:property>\n" +
476+
" <rdf:Seq>\n" +
477+
" <rdf:li rdf:parseType=\"Resource\">\n" +
478+
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
479+
" <pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>\n" +
480+
" <pdfaProperty:name>part</pdfaProperty:name>\n" +
481+
" <pdfaProperty:valueType>Integer</pdfaProperty:valueType>\n" +
482+
" </rdf:li>\n" +
483+
" <rdf:li rdf:parseType=\"Resource\">\n" +
484+
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
485+
" <pdfaProperty:description>Conformance level of PDF/A standard</pdfaProperty:description>\n" +
486+
" <pdfaProperty:name>conformance</pdfaProperty:name>\n" +
487+
" <pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
488+
" </rdf:li>\n" +
489+
" </rdf:Seq>\n" +
490+
" </pdfaSchema:property>\n" +
491+
" </rdf:li>\n" +
492+
" </rdf:Bag>\n" +
493+
" </pdfaExtension:schemas>\n" +
494+
" </rdf:Description>\n" +
495+
" </rdf:RDF>\n" +
496+
"</x:xmpmeta>\n" +
497+
"\n" +
498+
"<?xpacket end=\"w\"?>";
499+
DomXmpParser xmpParser = new DomXmpParser();
500+
XMPMetadata xmp = xmpParser.parse(s.getBytes(StandardCharsets.UTF_8));
501+
PDFAIdentificationSchema pdfaIdSchema = xmp.getPDFAIdentificationSchema();
502+
assertEquals(2, pdfaIdSchema.getPart());
503+
String dataValue = xmp.getSchema("http://ns.example.org/default/1.0/").getUnqualifiedTextPropertyValue("Data");
504+
assertEquals("Example", dataValue);
505+
}
408506
}

0 commit comments

Comments
 (0)